Installation
if (!("remotes" %in% installed.packages()) {
install.packages("remotes")
}
remotes::install_github('sigven/phenOncoX')
Get OncoTree terms
This shows how to retrieve cancer phenotype terms in, as defined in OncoTree (max tree depth = 2)
## load the data
download_dir <- tempdir()
oncotree <- phenOncoX::get_tree(
cache_dir = download_dir, max_tree_depth = 2)
#> INFO [2024-11-19 14:20:05] Downloading remote dataset from Google Drive to cache_dir
#> INFO [2024-11-19 14:20:06] Reading from cache_dir = '/tmp/RtmpxmppUp', argument force_download = FALSE
#> INFO [2024-11-19 14:20:06] Object 'oncotree_core' sucessfully loaded
#> INFO [2024-11-19 14:20:06] md5 checksum is valid: ed3bfdab9736bd772d4e24e119f629a2
#> INFO [2024-11-19 14:20:06] Retrieved 806 records
#> INFO [2024-11-19 14:20:06] Limiting OncoTree terms to those with a maximum tree depth of = '2'
## Number of records
nrow(oncotree$records)
#> [1] 251
## Show metadata for underlying resources
oncotree$metadata
#> source source_description
#> 1 OncoTree A cancer classification system for precision oncology
#> source_url
#> 1 http://oncotree.mskcc.org/#/home
#> source_citation source_version
#> 1 Kundra et al., JCO Clin Cancer Inform, 2021; 33625877 2021_11_02
#> source_abbreviation source_license
#> 1 oncotree CC BY 4.0
#> source_license_url
#> 1 https://creativecommons.org/licenses/by/4.0/
Get all (OncoTree-expanded) cancer phenotype terms
## get all cancer phenotype terms
oncoterms <- phenOncoX::get_terms(
cache_dir = download_dir)
#> INFO [2024-11-19 14:20:06] Downloading remote dataset from Google Drive to cache_dir
#> INFO [2024-11-19 14:20:10] Reading from cache_dir = '/tmp/RtmpxmppUp', argument force_download = FALSE
#> INFO [2024-11-19 14:20:10] Object 'oncotree_expanded' sucessfully loaded
#> INFO [2024-11-19 14:20:10] md5 checksum is valid: 912046e9270261a8ea1d4d61f934d2fa
#> INFO [2024-11-19 14:20:10] Retrieved 25381 records
## Number of records
nrow(oncoterms$records)
#> [1] 25381
Number of (OncoTree-expanded) cancer phenotype terms per primary tumor type/tissue
## get all cancer phenotype terms
oncoterms <- phenOncoX::get_terms(
cache_dir = download_dir)
#> INFO [2024-11-19 14:20:10] Reading from cache_dir = '/tmp/RtmpxmppUp', argument force_download = FALSE
#> INFO [2024-11-19 14:20:10] Object 'oncotree_expanded' sucessfully loaded
#> INFO [2024-11-19 14:20:10] Retrieved n = 25381 records
## Number of records
as.data.frame(oncoterms$records |>
dplyr::filter(!is.na(primary_site)) |>
dplyr::group_by(primary_site) |>
dplyr::summarise(num_terms = dplyr::n(),
.groups = "drop") |>
dplyr::arrange(dplyr::desc(num_terms)))
#> primary_site num_terms
#> 1 Lymphoid 4038
#> 2 Soft Tissue 2828
#> 3 CNS/Brain 2296
#> 4 Head and Neck 2273
#> 5 Skin 1817
#> 6 Myeloid 1778
#> 7 Lung 1086
#> 8 Colon/Rectum 966
#> 9 Ovary/Fallopian Tube 851
#> 10 Breast 781
#> 11 Esophagus/Stomach 719
#> 12 Bone 551
#> 13 Bladder/Urinary Tract 482
#> 14 Uterus 451
#> 15 Biliary Tract 416
#> 16 Liver 415
#> 17 Kidney 396
#> 18 Pancreas 343
#> 19 Peripheral Nervous System 292
#> 20 Cervix 287
#> 21 Eye 274
#> 22 Vulva/Vagina 265
#> 23 Thyroid 250
#> 24 Testis 239
#> 25 Prostate 207
#> 26 Other/Unknown 176
#> 27 Pleura 124
#> 28 Peritoneum 99
#> 29 Thymus 87
#> 30 Adrenal Gland 70
#> 31 Penis 61
#> 32 Ampulla of Vater 46
Get all (OncoTree-expanded) cancer phenotype terms relevant for prostate cancer
## get all oncoterms for prostate cancer
oncoterms_prostate <- phenOncoX::get_terms(
cache_dir = download_dir,
site = "Prostate")
## Make as datatable
prostate_terms_table <- DT::datatable(
dplyr::select(
oncoterms_prostate$records,
primary_site, cui, cui_name,
dplyr::everything()),
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip"))
Session Info
# set eval = FALSE if you don't want this info (useful for reproducibility)
# to appear
sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.5 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> loaded via a namespace (and not attached):
#> [1] jsonlite_1.8.9 dplyr_1.1.4 compiler_4.4.2 crayon_1.5.3
#> [5] tidyselect_1.2.1 phenOncoX_0.8.1 jquerylib_0.1.4 systemfonts_1.1.0
#> [9] textshaping_0.4.0 yaml_2.3.10 fastmap_1.2.0 R6_2.5.1
#> [13] generics_0.1.3 curl_6.0.1 knitr_1.49 htmlwidgets_1.6.4
#> [17] tibble_3.2.1 desc_1.4.3 bslib_0.8.0 pillar_1.9.0
#> [21] rlang_1.1.4 DT_0.33 utf8_1.2.4 cachem_1.1.0
#> [25] lgr_0.4.4 xfun_0.49 fs_1.6.5 sass_0.4.9
#> [29] cli_3.6.3 withr_3.0.2 pkgdown_2.1.1 magrittr_2.0.3
#> [33] crosstalk_1.2.1 digest_0.6.37 lifecycle_1.0.4 vctrs_0.6.5
#> [37] evaluate_1.0.1 gargle_1.5.2 glue_1.8.0 ragg_1.3.3
#> [41] googledrive_2.1.1 fansi_1.0.6 httr_1.4.7 rmarkdown_2.29
#> [45] purrr_1.0.2 tools_4.4.2 pkgconfig_2.0.3 htmltools_0.5.8.1
References
Huang, Kuan-Lin, R Jay Mashl, Yige Wu, Deborah I Ritter, Jiayin Wang,
Clara Oh, Marta Paczkowska, et al. 2018. “Pathogenic Germline
Variants in 10,389 Adult Cancers.” Cell 173 (2):
355–370.e14. http://dx.doi.org/10.1016/j.cell.2018.03.039.
Kundra, Ritika, Hongxin Zhang, Robert Sheridan, Sahussapont Joseph
Sirintrapun, Avery Wang, Angelica Ochoa, Manda Wilson, et al. 2021.
“OncoTree: A Cancer Classification System for
Precision Oncology.” JCO Clin Cancer Inform 5
(February): 221–30. http://dx.doi.org/10.1200/CCI.20.00108.
Louden, Diana Nelson. 2020. “MedGen: NCBI’s Portal to Information on Medical Conditions
with a Genetic Component.” Med. Ref. Serv. Q. 39 (2):
183–91. https://doi.org/10.1080/02763869.2020.1726152.
Malone, James, Ele Holloway, Tomasz Adamusiak, Misha Kapushesky, Jie
Zheng, Nikolay Kolesnikov, Anna Zhukova, Alvis Brazma, and Helen
Parkinson. 2010. “Modeling Sample Variables with an Experimental
Factor Ontology.” Bioinformatics 26 (8): 1112–18. http://dx.doi.org/10.1093/bioinformatics/btq099.
Schriml, Lynn Marie, Cesar Arze, Suvarna Nadendla, Yu-Wei Wayne Chang,
Mark Mazaitis, Victor Felix, Gang Feng, and Warren Alden Kibbe. 2012.
“Disease Ontology: A Backbone for Disease Semantic
Integration.” Nucleic Acids Res. 40 (Database issue):
D940–6. http://dx.doi.org/10.1093/nar/gkr972.