search_environment_data()
searches for patterns in
variable names, variable labels, and variable content.
cnt_search_result()
summarizes the matching results. These
functions are particularly useful when the source of some information is
expected to exist, but is unknown. The default envir
is the
global environment, but other environments and named lists that include
data frames are also accepted.
To demonstrate, load all of the pharmaversesdtm
datasets
into an environment.
pharmaversesdtm_data_env <- new.env()
data(
list = data(package = "pharmaversesdtm")$results[ , "Item"],
package = "pharmaversesdtm",
envir = pharmaversesdtm_data_env
)
Then search for a pattern:
search_environment_data(
regex("XANOMELINE", ignore_case = TRUE),
envir = pharmaversesdtm_data_env
)
#>
#> ── Search results ──────────────────────────────────────────────────────────────
#>
#> ── dm ──
#>
#> ✔ Variable content: ARM and ACTARM
#>
#> ── dm_peds ──
#>
#> ✔ Variable content: ARM and ACTARM
#>
#> ── ex ──
#>
#> ✔ Variable content: EXTRT
#>
#> ── ex_ophtha ──
#>
#> ✔ Variable content: EXTRT
#>
#> ── pc ──
#>
#> ✔ Variable content: PCTEST
#>
#> ── pp ──
#>
#> ✔ Variable content: PPCAT
#>
#> ── ts ──
#>
#> ✔ Variable content: TSVAL
To immediately review the results that matched the pattern, pass to
cnt_search_result()
.
search_environment_data(
regex("XANOMELINE", ignore_case = TRUE),
envir = pharmaversesdtm_data_env
) %>%
cnt_search_result(
n_distinct_vars = "USUBJID"
)
#> $dm
#> # A tibble: 3 × 4
#> ARM ACTARM n_USUBJID n
#> <chr> <chr> <int> <int>
#> 1 Xanomeline High Dose Xanomeline High Dose 72 72
#> 2 Xanomeline High Dose Xanomeline Low Dose 12 12
#> 3 Xanomeline Low Dose Xanomeline Low Dose 84 84
#>
#> $dm_peds
#> # A tibble: 2 × 4
#> ARM ACTARM n_USUBJID n
#> <chr> <chr> <int> <int>
#> 1 Xanomeline High Dose Xanomeline High Dose 2 2
#> 2 Xanomeline Low Dose Xanomeline Low Dose 1 1
#>
#> $ex
#> # A tibble: 1 × 3
#> EXTRT n_USUBJID n
#> <chr> <int> <int>
#> 1 XANOMELINE 168 365
#>
#> $ex_ophtha
#> # A tibble: 1 × 3
#> EXTRT n_USUBJID n
#> <chr> <int> <int>
#> 1 XANOMELINE 168 365
#>
#> $pc
#> # A tibble: 1 × 3
#> PCTEST n_USUBJID n
#> <chr> <int> <int>
#> 1 XANOMELINE 254 4572
#>
#> $pp
#> # A tibble: 1 × 3
#> PPCAT n_USUBJID n
#> <chr> <int> <int>
#> 1 XANOMELINE 168 2688
#>
#> $ts
#> # A tibble: 3 × 2
#> TSVAL n
#> <chr> <int>
#> 1 "Safety and Efficacy of the Xanomeline Transdermal Therapeutic System (… 1
#> 2 "To document the safety profile of the xanomeline TTS." 1
#> 3 "Xanomeline" 1
One way to further collect and review the results is to
bind_rows()
.
search_environment_data(
regex("STUDYID", ignore_case = TRUE),
envir = pharmaversesdtm_data_env
) %>%
cnt_search_result(
n_distinct_vars = "USUBJID"
) %>%
bind_rows(.id = "dataset")
#> # A tibble: 44 × 4
#> dataset STUDYID n_USUBJID n
#> <chr> <chr> <int> <int>
#> 1 ae CDISCPILOT01 225 1191
#> 2 ae_ophtha CDISCPILOT01 225 1191
#> 3 ce_vaccine ABC 2 44
#> 4 cm CDISCPILOT01 229 7510
#> 5 dm CDISCPILOT01 306 306
#> 6 dm_peds CDISCPILOT01 5 5
#> 7 dm_vaccine ABC 2 2
#> 8 ds CDISCPILOT01 306 850
#> 9 eg CDISCPILOT01 254 26717
#> 10 ex CDISCPILOT01 254 591
#> # ℹ 34 more rows
Optional arguments can be used to include/exclude datasets/variables.
search_environment_data(
regex("cancer", ignore_case = TRUE),
envir = pharmaversesdtm_data_env
) %>%
cnt_search_result(
n_distinct_vars = "USUBJID",
ignore_df_names = c("ae", "ae_ophtha"),
extra_vars = c("STUDYID")
)
#> $mh
#> # A tibble: 9 × 5
#> MHLLT MHDECOD STUDYID n_USUBJID n
#> <chr> <chr> <chr> <int> <int>
#> 1 BREAST CANCER BREAST CANCER CDISCP… 3 3
#> 2 CARCINOMA PROSTATE PROSTATE CANCER CDISCP… 1 1
#> 3 CARCINOMA SKIN SKIN CANCER CDISCP… 1 1
#> 4 MALIGNANT NASOPHARYNGEAL NEOPLASM NASOPHARYNGEAL CANC… CDISCP… 1 1
#> 5 ORAL CANCER STAGE UNSPECIFIED LIP AND/OR ORAL CAV… CDISCP… 1 1
#> 6 PROSTATE CANCER PROSTATE CANCER CDISCP… 3 3
#> 7 PROSTATIC CARCINOMA PROSTATE CANCER CDISCP… 1 1
#> 8 SKIN CARCINOMA SKIN CANCER CDISCP… 1 1
#> 9 THYROID CARCINOMA THYROID GLAND CANCER CDISCP… 1 1
#>
#> $smq_db
#> # A tibble: 3 × 2
#> termchar n
#> <chr> <int>
#> 1 Bile duct cancer recurrent 2
#> 2 Gallbladder cancer 2
#> 3 Gallbladder cancer recurrent 2
#>
#> $supprs_onco_imwg
#> # A tibble: 1 × 4
#> QLABEL STUDYID n_USUBJID n
#> <chr> <chr> <int> <int>
#> 1 New Anti-Cancer Therapy Date CDISCPILOT01 3 9