search_environment_data() searches for patterns in
variable names, variable labels, and variable content.
cnt_search_result() summarizes the matching results. These
functions are particularly useful when the source of some information is
expected to exist, but is unknown. The default envir is the
global environment, but other environments and named lists that include
data frames are also accepted.
To demonstrate, load all of the pharmaversesdtm datasets
into an environment.
pharmaversesdtm_data_env <- new.env()
data(
  list = data(package = "pharmaversesdtm")$results[ , "Item"], 
  package = "pharmaversesdtm", 
  envir = pharmaversesdtm_data_env
)Then search for a pattern:
search_environment_data(
  regex("XANOMELINE", ignore_case = TRUE),
  envir = pharmaversesdtm_data_env
)
#> 
#> ── Search results ──────────────────────────────────────────────────────────────
#> 
#> ── dm ──
#> 
#> ✔ Variable content: ARM and ACTARM
#> 
#> ── dm_peds ──
#> 
#> ✔ Variable content: ARM and ACTARM
#> 
#> ── ex ──
#> 
#> ✔ Variable content: EXTRT
#> 
#> ── ex_ophtha ──
#> 
#> ✔ Variable content: EXTRT
#> 
#> ── pc ──
#> 
#> ✔ Variable content: PCTEST
#> 
#> ── pp ──
#> 
#> ✔ Variable content: PPCAT
#> 
#> ── ts ──
#> 
#> ✔ Variable content: TSVALTo immediately review the results that matched the pattern, pass to
cnt_search_result().
search_environment_data(
  regex("XANOMELINE", ignore_case = TRUE),
  envir = pharmaversesdtm_data_env
) %>% 
  cnt_search_result(
    n_distinct_vars = "USUBJID"
  )
#> $dm
#> # A tibble: 3 × 4
#>   ARM                  ACTARM               n_USUBJID     n
#>   <chr>                <chr>                    <int> <int>
#> 1 Xanomeline High Dose Xanomeline High Dose        72    72
#> 2 Xanomeline High Dose Xanomeline Low Dose         12    12
#> 3 Xanomeline Low Dose  Xanomeline Low Dose         84    84
#> 
#> $dm_peds
#> # A tibble: 2 × 4
#>   ARM                  ACTARM               n_USUBJID     n
#>   <chr>                <chr>                    <int> <int>
#> 1 Xanomeline High Dose Xanomeline High Dose         2     2
#> 2 Xanomeline Low Dose  Xanomeline Low Dose          1     1
#> 
#> $ex
#> # A tibble: 1 × 3
#>   EXTRT      n_USUBJID     n
#>   <chr>          <int> <int>
#> 1 XANOMELINE       168   365
#> 
#> $ex_ophtha
#> # A tibble: 1 × 3
#>   EXTRT      n_USUBJID     n
#>   <chr>          <int> <int>
#> 1 XANOMELINE       168   365
#> 
#> $pc
#> # A tibble: 1 × 3
#>   PCTEST     n_USUBJID     n
#>   <chr>          <int> <int>
#> 1 XANOMELINE       254  4572
#> 
#> $pp
#> # A tibble: 1 × 3
#>   PPCAT      n_USUBJID     n
#>   <chr>          <int> <int>
#> 1 XANOMELINE       168  2688
#> 
#> $ts
#> # A tibble: 3 × 2
#>   TSVAL                                                                        n
#>   <chr>                                                                    <int>
#> 1 "Safety and Efficacy of the Xanomeline Transdermal Therapeutic System (…     1
#> 2 "To document the safety profile of the xanomeline TTS."                      1
#> 3 "Xanomeline"                                                                 1One way to further collect and review the results is to
bind_rows().
search_environment_data(
  regex("STUDYID", ignore_case = TRUE),
  envir = pharmaversesdtm_data_env
) %>% 
  cnt_search_result(
    n_distinct_vars = "USUBJID"
  ) %>% 
  bind_rows(.id = "dataset")
#> # A tibble: 44 × 4
#>    dataset    STUDYID      n_USUBJID     n
#>    <chr>      <chr>            <int> <int>
#>  1 ae         CDISCPILOT01       225  1191
#>  2 ae_ophtha  CDISCPILOT01       225  1191
#>  3 ce_vaccine ABC                  2    44
#>  4 cm         CDISCPILOT01       229  7510
#>  5 dm         CDISCPILOT01       306   306
#>  6 dm_peds    CDISCPILOT01         5     5
#>  7 dm_vaccine ABC                  2     2
#>  8 ds         CDISCPILOT01       306   850
#>  9 eg         CDISCPILOT01       254 26717
#> 10 ex         CDISCPILOT01       254   591
#> # ℹ 34 more rowsOptional arguments can be used to include/exclude datasets/variables.
search_environment_data(
  regex("cancer", ignore_case = TRUE),
  envir = pharmaversesdtm_data_env
) %>% 
  cnt_search_result(
    n_distinct_vars = "USUBJID",
    ignore_df_names = c("ae", "ae_ophtha"),
    extra_vars = c("STUDYID")
  )
#> $mh
#> # A tibble: 9 × 5
#>   MHLLT                             MHDECOD              STUDYID n_USUBJID     n
#>   <chr>                             <chr>                <chr>       <int> <int>
#> 1 BREAST CANCER                     BREAST CANCER        CDISCP…         3     3
#> 2 CARCINOMA PROSTATE                PROSTATE CANCER      CDISCP…         1     1
#> 3 CARCINOMA SKIN                    SKIN CANCER          CDISCP…         1     1
#> 4 MALIGNANT NASOPHARYNGEAL NEOPLASM NASOPHARYNGEAL CANC… CDISCP…         1     1
#> 5 ORAL CANCER STAGE UNSPECIFIED     LIP AND/OR ORAL CAV… CDISCP…         1     1
#> 6 PROSTATE CANCER                   PROSTATE CANCER      CDISCP…         3     3
#> 7 PROSTATIC CARCINOMA               PROSTATE CANCER      CDISCP…         1     1
#> 8 SKIN CARCINOMA                    SKIN CANCER          CDISCP…         1     1
#> 9 THYROID CARCINOMA                 THYROID GLAND CANCER CDISCP…         1     1
#> 
#> $smq_db
#> # A tibble: 3 × 2
#>   termchar                         n
#>   <chr>                        <int>
#> 1 Bile duct cancer recurrent       2
#> 2 Gallbladder cancer               2
#> 3 Gallbladder cancer recurrent     2
#> 
#> $supprs_onco_imwg
#> # A tibble: 1 × 4
#>   QLABEL                       STUDYID      n_USUBJID     n
#>   <chr>                        <chr>            <int> <int>
#> 1 New Anti-Cancer Therapy Date CDISCPILOT01         3     9