Installation
if (!("remotes" %in% installed.packages()) {
install.packages("remotes")
}
remotes::install_github('sigven/phenOncoX')
Get OncoTree terms
This shows how to retrieve cancer phenotype terms in, as defined in OncoTree (max tree depth = 2)
## load the data
download_dir <- tempdir()
oncotree <- phenOncoX::get_tree(
cache_dir = download_dir, max_tree_depth = 2)
#> Error in get(paste0(generic, ".", class), envir = get_method_env()) :
#> object 'type_sum.accel' not found
#> INFO [2024-12-19 09:27:49] Downloading remote dataset from Google Drive to cache_dir
#> INFO [2024-12-19 09:27:51] Reading from cache_dir = '/tmp/RtmpBwOioR', argument force_download = FALSE
#> INFO [2024-12-19 09:27:51] Object 'oncotree_core' sucessfully loaded
#> INFO [2024-12-19 09:27:51] md5 checksum is valid: 7b9a4bb7aba44bdd5019fc69fa28394a
#> INFO [2024-12-19 09:27:51] Retrieved 804 records
#> INFO [2024-12-19 09:27:51] Limiting OncoTree terms to those with a maximum tree depth of = '2'
## Number of records
nrow(oncotree$records)
#> [1] 250
## Show metadata for underlying resources
oncotree$metadata
#> source source_description
#> 1 OncoTree A cancer classification system for precision oncology
#> source_url
#> 1 http://oncotree.mskcc.org/#/home
#> source_citation source_version
#> 1 Kundra et al., JCO Clin Cancer Inform, 2021; 33625877 2021_11_02
#> source_abbreviation source_license
#> 1 oncotree CC BY 4.0
#> source_license_url
#> 1 https://creativecommons.org/licenses/by/4.0/
Get all (OncoTree-expanded) cancer phenotype terms
## get all cancer phenotype terms
oncoterms <- phenOncoX::get_terms(
cache_dir = download_dir)
#> INFO [2024-12-19 09:27:51] Downloading remote dataset from Google Drive to cache_dir
#> INFO [2024-12-19 09:27:54] Reading from cache_dir = '/tmp/RtmpBwOioR', argument force_download = FALSE
#> INFO [2024-12-19 09:27:54] Object 'oncotree_expanded' sucessfully loaded
#> INFO [2024-12-19 09:27:54] md5 checksum is valid: 90e6f108f080302b1a1ef7a4866dca57
#> INFO [2024-12-19 09:27:54] Retrieved 25481 records
## Number of records
nrow(oncoterms$records)
#> [1] 25481
Number of (OncoTree-expanded) cancer phenotype terms per primary tumor type/tissue
## get all cancer phenotype terms
oncoterms <- phenOncoX::get_terms(
cache_dir = download_dir)
#> INFO [2024-12-19 09:27:54] Reading from cache_dir = '/tmp/RtmpBwOioR', argument force_download = FALSE
#> INFO [2024-12-19 09:27:54] Object 'oncotree_expanded' sucessfully loaded
#> INFO [2024-12-19 09:27:54] Retrieved n = 25481 records
## Number of records
as.data.frame(oncoterms$records |>
dplyr::filter(!is.na(primary_site)) |>
dplyr::group_by(primary_site) |>
dplyr::summarise(num_terms = dplyr::n(),
.groups = "drop") |>
dplyr::arrange(dplyr::desc(num_terms)))
#> primary_site num_terms
#> 1 Lymphoid 4048
#> 2 Soft Tissue 2856
#> 3 CNS/Brain 2307
#> 4 Head and Neck 2282
#> 5 Skin 1817
#> 6 Myeloid 1800
#> 7 Lung 1097
#> 8 Colon/Rectum 973
#> 9 Ovary/Fallopian Tube 856
#> 10 Breast 758
#> 11 Esophagus/Stomach 725
#> 12 Bone 551
#> 13 Bladder/Urinary Tract 488
#> 14 Uterus 451
#> 15 Biliary Tract 416
#> 16 Liver 415
#> 17 Kidney 399
#> 18 Pancreas 345
#> 19 Peripheral Nervous System 292
#> 20 Cervix 287
#> 21 Eye 280
#> 22 Vulva/Vagina 265
#> 23 Thyroid 249
#> 24 Testis 239
#> 25 Prostate 208
#> 26 Other/Unknown 173
#> 27 Pleura 124
#> 28 Peritoneum 99
#> 29 Thymus 87
#> 30 Adrenal Gland 70
#> 31 Penis 61
#> 32 Ampulla of Vater 46
Get all (OncoTree-expanded) cancer phenotype terms relevant for prostate cancer
## get all oncoterms for prostate cancer
oncoterms_prostate <- phenOncoX::get_terms(
cache_dir = download_dir,
site = "Prostate")
## Make as datatable
prostate_terms_table <- DT::datatable(
dplyr::select(
oncoterms_prostate$records,
primary_site, cui, cui_name,
dplyr::everything()),
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip"))
Session Info
# set eval = FALSE if you don't want this info (useful for reproducibility)
# to appear
sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> loaded via a namespace (and not attached):
#> [1] jsonlite_1.8.9 dplyr_1.1.4 compiler_4.4.2 crayon_1.5.3
#> [5] tidyselect_1.2.1 phenOncoX_0.8.2 jquerylib_0.1.4 systemfonts_1.1.0
#> [9] textshaping_0.4.1 yaml_2.3.10 fastmap_1.2.0 R6_2.5.1
#> [13] generics_0.1.3 curl_6.0.1 knitr_1.49 htmlwidgets_1.6.4
#> [17] tibble_3.2.1 desc_1.4.3 bslib_0.8.0 pillar_1.10.0
#> [21] rlang_1.1.4 DT_0.33 cachem_1.1.0 lgr_0.4.4
#> [25] xfun_0.49 fs_1.6.5 sass_0.4.9 cli_3.6.3
#> [29] pkgdown_2.1.1 withr_3.0.2 magrittr_2.0.3 crosstalk_1.2.1
#> [33] digest_0.6.37 lifecycle_1.0.4 vctrs_0.6.5 evaluate_1.0.1
#> [37] gargle_1.5.2 glue_1.8.0 ragg_1.3.3 googledrive_2.1.1
#> [41] rmarkdown_2.29 purrr_1.0.2 httr_1.4.7 tools_4.4.2
#> [45] pkgconfig_2.0.3 htmltools_0.5.8.1
References
Huang, Kuan-Lin, R Jay Mashl, Yige Wu, Deborah I Ritter, Jiayin Wang,
Clara Oh, Marta Paczkowska, et al. 2018. “Pathogenic Germline
Variants in 10,389 Adult Cancers.” Cell 173 (2):
355–370.e14. http://dx.doi.org/10.1016/j.cell.2018.03.039.
Kundra, Ritika, Hongxin Zhang, Robert Sheridan, Sahussapont Joseph
Sirintrapun, Avery Wang, Angelica Ochoa, Manda Wilson, et al. 2021.
“OncoTree: A Cancer Classification System for
Precision Oncology.” JCO Clin Cancer Inform 5
(February): 221–30. http://dx.doi.org/10.1200/CCI.20.00108.
Louden, Diana Nelson. 2020. “MedGen: NCBI’s Portal to Information on Medical Conditions
with a Genetic Component.” Med. Ref. Serv. Q. 39 (2):
183–91. https://doi.org/10.1080/02763869.2020.1726152.
Malone, James, Ele Holloway, Tomasz Adamusiak, Misha Kapushesky, Jie
Zheng, Nikolay Kolesnikov, Anna Zhukova, Alvis Brazma, and Helen
Parkinson. 2010. “Modeling Sample Variables with an Experimental
Factor Ontology.” Bioinformatics 26 (8): 1112–18. http://dx.doi.org/10.1093/bioinformatics/btq099.
Schriml, Lynn Marie, Cesar Arze, Suvarna Nadendla, Yu-Wei Wayne Chang,
Mark Mazaitis, Victor Felix, Gang Feng, and Warren Alden Kibbe. 2012.
“Disease Ontology: A Backbone for Disease Semantic
Integration.” Nucleic Acids Res. 40 (Database issue):
D940–6. http://dx.doi.org/10.1093/nar/gkr972.