Installation
if (!("remotes" %in% installed.packages())) {
install.packages("remotes")
}
#> Installing package into '/home/runner/work/_temp/Library'
#> (as 'lib' is unspecified)
remotes::install_github('sigven/pharmOncoX')
#> Using github PAT from envvar GITHUB_PAT. Use `gitcreds::gitcreds_set()` and unset GITHUB_PAT in .Renviron (or elsewhere) if you want to use the more secure git credential store instead.
#> Downloading GitHub repo sigven/pharmOncoX@HEAD
#> cpp11 (NA -> 0.5.0) [CRAN]
#> Installing 1 packages: cpp11
#> Installing package into '/home/runner/work/_temp/Library'
#> (as 'lib' is unspecified)
#> ── R CMD build ─────────────────────────────────────────────────────────────────
#> * checking for file ‘/tmp/RtmpOW419Q/remotes181c1ae9b67c/sigven-pharmOncoX-809bb47/DESCRIPTION’ ... OK
#> * preparing ‘pharmOncoX’:
#> * checking DESCRIPTION meta-information ... OK
#> * checking for LF line-endings in source and make files and shell scripts
#> * checking for empty or unneeded directories
#> Omitted ‘LazyData’ from DESCRIPTION
#> * building ‘pharmOncoX_1.8.2.tar.gz’
#> Installing package into '/home/runner/work/_temp/Library'
#> (as 'lib' is unspecified)
library(pharmOncoX)
cache_dir <- tempdir()
Cancer drug classifications
Targeted agents
Plotted below are key statistics with respect to drug classification numbers for targeted and chemotherapy agents found in pharmOncoX. Existing drug classifications have been retrieved from ATC, and these have been extended with manual addition/curation, and also through the establishment of multiple novel levels in the ATC tree, particularly for targeted therapies. Note that only drugs that are indicated for cancer conditions (as harvested from the Open Targets platform) are considered in the numbers plotted below.
p_targeted_classifications
Retrieval of drugs - examples
Get BRAF-targeted drugs, list records per indication
drugs <- get_drugs(
cache_dir = cache_dir,
treatment_category = c("targeted_therapy_classified",
"targeted_therapy_unclassified"),
drug_target = c('BRAF'))
#> INFO [2024-11-18 14:00:38] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:00:38] Object 'drug_map_name' sucessfully loaded
#> INFO [2024-11-18 14:00:38] Retrieved n = 32288 records
#> INFO [2024-11-18 14:00:38] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:00:38] Object 'drug_map_alias' sucessfully loaded
#> INFO [2024-11-18 14:00:38] Retrieved n = 557017 records
#> INFO [2024-11-18 14:00:38] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:00:38] Object 'drug_map_basic' sucessfully loaded
#> INFO [2024-11-18 14:00:38] Retrieved n = 32434 records
#> INFO [2024-11-18 14:00:38] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:00:38] Object 'drug_map_target' sucessfully loaded
#> INFO [2024-11-18 14:00:38] Retrieved n = 40255 records
#> INFO [2024-11-18 14:00:38] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:00:38] Object 'drug_map_indication' sucessfully loaded
#> INFO [2024-11-18 14:00:38] Retrieved n = 67821 records
#> INFO [2024-11-18 14:00:41] Record set satisfying user-defined criteria: n = 311
#> INFO [2024-11-18 14:00:41] Collapsing record set - providing output on a 'per_drug_target_indication' resolution
#> INFO [2024-11-18 14:00:41] Final record set: n = 274 records
## Number of drug records
nrow(drugs$records)
#> [1] 274
## Column names of drug records
colnames(drugs$records)
#> [1] "drug_id" "drug_name"
#> [3] "drug_type" "molecule_chembl_id"
#> [5] "drug_action_type" "drug_alias"
#> [7] "nci_concept_definition" "opentargets"
#> [9] "drug_cancer_relevance" "inhibition_moa"
#> [11] "is_salt" "is_adc"
#> [13] "drug_blackbox_warning" "nci_t"
#> [15] "target_symbol" "target_entrezgene"
#> [17] "target_genename" "target_ensembl_gene_id"
#> [19] "target_type" "drug_max_phase_indication"
#> [21] "drug_approved_indication" "drug_frac_cancer_indications"
#> [23] "drug_approved_noncancer" "drug_n_indications"
#> [25] "drug_year_first_approval" "drug_max_ct_phase"
#> [27] "disease_efo_id" "disease_efo_label"
#> [29] "primary_site" "drug_clinical_id"
#> [31] "drug_clinical_source" "atc_code_level1"
#> [33] "atc_level1" "atc_code_level2"
#> [35] "atc_level2" "atc_code_level3"
#> [37] "atc_level3" "atc_treatment_category"
Get RAS-targeted drugs, list per drug target
drugs <- get_drugs(
cache_dir = cache_dir,
treatment_category = c("targeted_therapy_classified"),
drug_action_inhibition = T,
output_resolution = "drug2target")$records |>
dplyr::filter(atc_level3 == "RAS inhibitors")
drugs <- drugs |>
dplyr::select(
-c("drug_alias",
"disease_main_group",
"drug_clinical_id")) |>
dplyr::mutate(
disease_indication = stringr::str_replace_all(
disease_indication, "\\|",", ")
) |>
dplyr::select(
drug_id, drug_name, drug_type, molecule_chembl_id,
drug_action_type, target_symbol, dplyr::everything()
)
dt_drugtable_ras_inhibitors <- DT::datatable(
drugs,
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip")
)
Get MEK inhibitors, list per drug only
drugs <- get_drugs(
cache_dir = cache_dir,
treatment_category = c("targeted_therapy_classified"),
drug_action_inhibition = T,
drug_source_opentargets = T,
output_resolution = "drug" )$records |>
dplyr::filter(atc_level3 == "MEK inhibitors")
drugs <- drugs |>
dplyr::select(
-c("drug_alias",
"disease_main_group",
"drug_clinical_id")) |>
dplyr::mutate(
disease_indication = stringr::str_replace_all(
disease_indication, "\\|",", ")
)
dt_drugtable_mek_inhibitors <- DT::datatable(
drugs,
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip")
)
Get immune checkpoint inhibitors, list per drug target
drugs <- get_drugs(
cache_dir = cache_dir,
treatment_category = c("targeted_therapy_classified"),
drug_source_opentargets = F,
drug_classified_cancer = T,
output_resolution = "drug2target")
drugs$records <- drugs$records |>
dplyr::filter(
(!is.na(atc_level3) &
(atc_level3 == "PD-1/PDL-1 inhibitors" |
atc_level3 == "Other immune checkpoint inhibitors")
)) |>
dplyr::select(
-c("drug_alias",
"disease_main_group",
"drug_clinical_id")) |>
dplyr::mutate(
disease_indication = stringr::str_replace_all(
disease_indication, "\\|",", ")
) |>
dplyr::select(
drug_id,
drug_name,
drug_type,
target_symbol,
target_genename,
dplyr::everything()
)
dt_drugtable_ici <- DT::datatable(
drugs$records,
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip")
)
Get immune checkpoint inhibitors indicated for lung cancer conditions, list per drug-target entry
drugs <- get_drugs(
cache_dir = cache_dir,
output_resolution = "drug2target",
treatment_category = c("targeted_therapy_classified"),
drug_source_opentargets = T,
drug_indication_main = "Lung")
drugs$records <- drugs$records |>
dplyr::filter(
(!is.na(atc_level3) &
(atc_level3 == "PD-1/PDL-1 inhibitors" |
atc_level3 == "Other immune checkpoint inhibitors")
)) |>
dplyr::select(
-c("drug_alias",
"disease_main_group",
"drug_clinical_id")) |>
dplyr::mutate(
disease_indication = stringr::str_replace_all(
disease_indication, "\\|",", ")
) |>
dplyr::select(
drug_id,
drug_name,
drug_type,
target_symbol,
target_genename,
dplyr::everything()
)
dt_drugtable_ici_lung <- DT::datatable(
drugs$records,
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip")
)
Get antimetabolite drugs
drugs <- get_drugs(
cache_dir = cache_dir,
treatment_category = c("chemo_therapy_classified"),
output_resolution = "drug")
#> INFO [2024-11-18 14:00:59] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:00:59] Object 'drug_map_name' sucessfully loaded
#> INFO [2024-11-18 14:00:59] Retrieved n = 32288 records
#> INFO [2024-11-18 14:00:59] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:00:59] Object 'drug_map_alias' sucessfully loaded
#> INFO [2024-11-18 14:00:59] Retrieved n = 557017 records
#> INFO [2024-11-18 14:01:00] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:00] Object 'drug_map_basic' sucessfully loaded
#> INFO [2024-11-18 14:01:00] Retrieved n = 32434 records
#> INFO [2024-11-18 14:01:00] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:00] Object 'drug_map_target' sucessfully loaded
#> INFO [2024-11-18 14:01:00] Retrieved n = 40255 records
#> INFO [2024-11-18 14:01:00] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:00] Object 'drug_map_indication' sucessfully loaded
#> INFO [2024-11-18 14:01:00] Retrieved n = 67821 records
#> INFO [2024-11-18 14:01:02] Record set satisfying user-defined criteria: n = 17896
#> INFO [2024-11-18 14:01:02] Collapsing record set - providing output on a 'per_drug' resolution
#> INFO [2024-11-18 14:01:02] Final record set: n = 190 records
drugs$records <- drugs$records |>
dplyr::filter(
!is.na(atc_level2) &
stringr::str_detect(
atc_level2, "ANTIMETABOLITES"
)
) |>
dplyr::select(
-c("drug_alias",
"disease_main_group",
"drug_clinical_id")) |>
dplyr::mutate(
disease_indication = stringr::str_replace_all(
disease_indication, "\\|",", ")
)
dt_drugtable_metabolites <- DT::datatable(
drugs$records,
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip")
)
Get taxanes
drugs <- get_drugs(
cache_dir = cache_dir,
treatment_category = "chemo_therapy_classified",
output_resolution = "drug")
#> INFO [2024-11-18 14:01:03] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:03] Object 'drug_map_name' sucessfully loaded
#> INFO [2024-11-18 14:01:03] Retrieved n = 32288 records
#> INFO [2024-11-18 14:01:03] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:03] Object 'drug_map_alias' sucessfully loaded
#> INFO [2024-11-18 14:01:03] Retrieved n = 557017 records
#> INFO [2024-11-18 14:01:03] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:03] Object 'drug_map_basic' sucessfully loaded
#> INFO [2024-11-18 14:01:03] Retrieved n = 32434 records
#> INFO [2024-11-18 14:01:03] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:03] Object 'drug_map_target' sucessfully loaded
#> INFO [2024-11-18 14:01:03] Retrieved n = 40255 records
#> INFO [2024-11-18 14:01:03] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:03] Object 'drug_map_indication' sucessfully loaded
#> INFO [2024-11-18 14:01:03] Retrieved n = 67821 records
#> INFO [2024-11-18 14:01:06] Record set satisfying user-defined criteria: n = 17896
#> INFO [2024-11-18 14:01:06] Collapsing record set - providing output on a 'per_drug' resolution
#> INFO [2024-11-18 14:01:06] Final record set: n = 190 records
drugs$records <- drugs$records |>
dplyr::filter(
stringr::str_detect(
atc_level3, "Taxanes"
)
) |>
dplyr::select(
-c("drug_alias",
"disease_main_group",
"drug_clinical_id")) |>
dplyr::mutate(
disease_indication = stringr::str_replace_all(
disease_indication, "\\|",", ")
)
dt_drugtable_taxanes <- DT::datatable(
drugs$records,
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip")
)
Get platinum compounds
drugs <- get_drugs(
cache_dir = cache_dir,
treatment_category = "chemo_therapy_classified",
output_resolution = "drug")
#> INFO [2024-11-18 14:01:06] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:06] Object 'drug_map_name' sucessfully loaded
#> INFO [2024-11-18 14:01:06] Retrieved n = 32288 records
#> INFO [2024-11-18 14:01:07] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:07] Object 'drug_map_alias' sucessfully loaded
#> INFO [2024-11-18 14:01:07] Retrieved n = 557017 records
#> INFO [2024-11-18 14:01:07] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:07] Object 'drug_map_basic' sucessfully loaded
#> INFO [2024-11-18 14:01:07] Retrieved n = 32434 records
#> INFO [2024-11-18 14:01:07] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:07] Object 'drug_map_target' sucessfully loaded
#> INFO [2024-11-18 14:01:07] Retrieved n = 40255 records
#> INFO [2024-11-18 14:01:07] Reading from cache_dir = '/tmp/RtmpOW419Q', argument force_download = F
#> INFO [2024-11-18 14:01:07] Object 'drug_map_indication' sucessfully loaded
#> INFO [2024-11-18 14:01:07] Retrieved n = 67821 records
#> INFO [2024-11-18 14:01:09] Record set satisfying user-defined criteria: n = 17896
#> INFO [2024-11-18 14:01:09] Collapsing record set - providing output on a 'per_drug' resolution
#> INFO [2024-11-18 14:01:10] Final record set: n = 190 records
drugs$records <- drugs$records |>
dplyr::filter(
stringr::str_detect(
atc_level3, "Platinum compounds"
)
) |>
dplyr::select(
-c("drug_alias",
"disease_main_group",
"drug_clinical_id")) |>
dplyr::mutate(
disease_indication = stringr::str_replace_all(
disease_indication, "\\|",", ")
) |>
dplyr::select(
drug_id, drug_name, drug_type, molecule_chembl_id,
drug_action_type, opentargets, dplyr::everything()
)
dt_drugtable_platins <- DT::datatable(
drugs$records,
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip")
)
Retrieval of biomarkers
Reported associations between BRCA1/2 alterations and drug sensitivity
- Get evidence from CIViC and CGI for cancer drug sensitivity of BRCA1/2 alterations (somatically (tumor) or inherited/germline)
biomarkers <- get_biomarkers(
cache_dir = cache_dir)
#> INFO [2024-11-18 14:01:10] Downloading remote dataset from Google Drive to cache_dir
#> INFO [2024-11-18 14:01:14] Reading from cache_dir = ' (/tmp/RtmpOW419Q'), argument force_download = F
#> INFO [2024-11-18 14:01:14] Object 'biomarkers' sucessfully loaded
#> INFO [2024-11-18 14:01:14] md5 checksum is valid: 3da82383d54e07b7ea398d956916aaa6
brca1_biomarkers <- list()
for(source in c('civic','cgi')){
brca1_biomarkers[[source]] <-
biomarkers$data[[source]]$variant |>
dplyr::filter(
!is.na(symbol) & (symbol == "BRCA1" | symbol == "BRCA2")) |>
dplyr::group_by(variant_id, variant_name_primary, variant_consequence) |>
dplyr::summarise(
variant_alias = paste(variant_alias, collapse=", "),
.groups = "drop") |>
dplyr::inner_join(
biomarkers$data[[source]]$clinical, by = "variant_id") |>
dplyr::select(
variant_id, variant_name_primary, therapeutic_context,
evidence_type, evidence_level,
biomarker_source, biomarker_source_datestamp,
molecular_profile_name, evidence_id, variant_origin,
primary_site, evidence_id, source_id,
evidence_url,
evidence_description,
clinical_significance) |>
dplyr::distinct() |>
dplyr::rename(literature_id = source_id,
variant_name = variant_name_primary) |>
dplyr::filter(evidence_type == "Predictive") |>
dplyr::select(
variant_name,
primary_site,
therapeutic_context,
molecular_profile_name,
evidence_level,
dplyr::everything()
)
}
brca1_biomarkers_all <-
dplyr::bind_rows(brca1_biomarkers[['civic']],
brca1_biomarkers[['cgi']]) |>
dplyr::arrange(evidence_level)
dt_brca1_biomarkers <- DT::datatable(
brca1_biomarkers_all,
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip")
)
Session Info
# set eval = FALSE if you don't want this info (useful for reproducibility)
# to appear
sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.5 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] pharmOncoX_1.8.2
#>
#> loaded via a namespace (and not attached):
#> [1] sass_0.4.9 utf8_1.2.4 generics_0.1.3 stringi_1.8.4
#> [5] digest_0.6.37 magrittr_2.0.3 evaluate_1.0.1 grid_4.4.2
#> [9] fastmap_1.2.0 jsonlite_1.8.9 processx_3.8.4 pkgbuild_1.4.5
#> [13] googledrive_2.1.1 ps_1.8.1 httr_1.4.7 purrr_1.0.2
#> [17] fansi_1.0.6 crosstalk_1.2.1 scales_1.3.0 textshaping_0.4.0
#> [21] jquerylib_0.1.4 cli_3.6.3 rlang_1.1.4 crayon_1.5.3
#> [25] munsell_0.5.1 remotes_2.5.0 withr_3.0.2 cachem_1.1.0
#> [29] yaml_2.3.10 tools_4.4.2 gargle_1.5.2 dplyr_1.1.4
#> [33] colorspace_2.1-1 ggplot2_3.5.1 DT_0.33 curl_6.0.1
#> [37] assertthat_0.2.1 vctrs_0.6.5 R6_2.5.1 lifecycle_1.0.4
#> [41] stringr_1.5.1 fs_1.6.5 htmlwidgets_1.6.4 ragg_1.3.3
#> [45] pkgconfig_2.0.3 desc_1.4.3 callr_3.7.6 gtable_0.3.6
#> [49] pkgdown_2.1.1 pillar_1.9.0 bslib_0.8.0 glue_1.8.0
#> [53] lgr_0.4.4 systemfonts_1.1.0 xfun_0.49 tibble_3.2.1
#> [57] tidyselect_1.2.1 knitr_1.49 farver_2.1.2 htmltools_0.5.8.1
#> [61] rmarkdown_2.29 compiler_4.4.2
References
Freshour, Sharon L, Susanna Kiwala, Kelsy C Cotto, Adam C Coffman,
Joshua F McMichael, Jonathan J Song, Malachi Griffith, Obi L Griffith,
and Alex H Wagner. 2021. “Integration of the
Drug-Gene Interaction Database (DGIdb 4.0)
with Open Crowdsource Efforts.” Nucleic Acids Res. 49
(D1): D1144–51. http://dx.doi.org/10.1093/nar/gkaa1084.
Griffith, Malachi, Nicholas C Spies, Kilannin Krysiak, Joshua F
McMichael, Adam C Coffman, Arpad M Danos, Benjamin J Ainscough, et al.
2017. “CIViC Is a Community Knowledgebase for Expert
Crowdsourcing the Clinical Interpretation of Variants in Cancer.”
Nat. Genet. 49 (2): 170–74. http://dx.doi.org/10.1038/ng.3774.
Kim, Sunghwan, Jie Chen, Tiejun Cheng, Asta Gindulyte, Jia He, Siqian
He, Qingliang Li, et al. 2021. “PubChem in 2021: New
Data Content and Improved Web Interfaces.” Nucleic Acids
Res. 49 (D1): D1388–95. http://dx.doi.org/10.1093/nar/gkaa971.
Nakken, Sigve. 2023. phenOncoX: A Phenotype Ontology Map for
Cancer. https://github.com/sigven/phenOncoX.
Ochoa, David, Andrew Hercules, Miguel Carmona, Daniel Suveges, Asier
Gonzalez-Uriarte, Cinzia Malangone, Alfredo Miranda, et al. 2021.
“Open Targets Platform: Supporting Systematic Drug-Target
Identification and Prioritisation.” Nucleic Acids Res.
49 (D1): D1302–10. http://dx.doi.org/10.1093/nar/gkaa1027.
Sioutos, Nicholas, Sherri de Coronado, Margaret W Haber, Frank W Hartel,
Wen-Ling Shaiu, and Lawrence W Wright. 2007. “NCI
Thesaurus: A Semantic Model Integrating Cancer-Related Clinical and
Molecular Information.” J. Biomed. Inform. 40 (1):
30–43. http://dx.doi.org/10.1016/j.jbi.2006.02.013.
Tamborero, David, Carlota Rubio-Perez, Jordi Deu-Pons, Michael P
Schroeder, Ana Vivancos, Ana Rovira, Ignasi Tusquets, et al. 2018.
“Cancer Genome Interpreter Annotates the Biological and Clinical
Relevance of Tumor Alterations.” Genome Med. 10 (1): 25.
http://dx.doi.org/10.1186/s13073-018-0531-8.