Installation
if (!("remotes" %in% installed.packages()) {
install.packages("remotes")
}
remotes::install_github('sigven/phenOncoX')
Get OncoTree terms
This shows how to retrieve cancer phenotype terms in, as defined in OncoTree (max tree depth = 2)
## load the data
download_dir <- tempdir()
oncotree <- phenOncoX::get_tree(
cache_dir = download_dir, max_tree_depth = 2)
## Number of records
nrow(oncotree$records)
#> [1] 249
## Show metadata for underlying resources
oncotree$metadata
#> source source_description
#> 1 OncoTree A cancer classification system for precision oncology
#> source_url
#> 1 http://oncotree.mskcc.org/#/home
#> source_citation source_version
#> 1 Kundra et al., JCO Clin Cancer Inform, 2021; 33625877 2025_04_08
#> source_abbreviation source_license
#> 1 oncotree CC BY 4.0
#> source_license_url
#> 1 https://creativecommons.org/licenses/by/4.0/
Get all (OncoTree-expanded) cancer phenotype terms
## get all cancer phenotype terms
oncoterms <- phenOncoX::get_terms(
cache_dir = download_dir)
## Number of records
nrow(oncoterms$records)
#> [1] 14982
Number of (OncoTree-expanded) cancer phenotype terms per primary tumor type/tissue
## get all cancer phenotype terms
oncoterms <- phenOncoX::get_terms(
cache_dir = download_dir)
## Number of records
as.data.frame(oncoterms$records |>
dplyr::filter(!is.na(primary_site)) |>
dplyr::group_by(primary_site) |>
dplyr::summarise(num_terms = dplyr::n(),
.groups = "drop") |>
dplyr::arrange(dplyr::desc(num_terms)))
#> primary_site num_terms
#> 1 Lymphoid 2467
#> 2 Soft Tissue 2061
#> 3 Head and Neck 1467
#> 4 Skin 1306
#> 5 Myeloid 1110
#> 6 CNS/Brain 981
#> 7 Lung 776
#> 8 Breast 616
#> 9 Ovary/Fallopian Tube 535
#> 10 Colon/Rectum 530
#> 11 Esophagus/Stomach 477
#> 12 Bladder/Urinary Tract 318
#> 13 Kidney 314
#> 14 Pancreas 289
#> 15 Prostate 190
#> 16 Biliary Tract 168
#> 17 Bone 156
#> 18 Peripheral Nervous System 135
#> 19 Thyroid 124
#> 20 Vulva/Vagina 101
#> 21 Cervix 82
#> 22 Testis 77
#> 23 Uterus 68
#> 24 Other/Unknown 67
#> 25 Liver 44
#> 26 Eye 42
#> 27 Thymus 33
#> 28 Adrenal Gland 28
#> 29 Pleura 26
#> 30 Penis 22
#> 31 Ampulla of Vater 15
#> 32 Peritoneum 12
Get all (OncoTree-expanded) cancer phenotype terms relevant for prostate cancer
## get all oncoterms for prostate cancer
oncoterms_prostate <- phenOncoX::get_terms(
cache_dir = download_dir,
site = "Prostate")
## Make as datatable
prostate_terms_table <- DT::datatable(
dplyr::select(
oncoterms_prostate$records,
primary_site, cui, cui_name,
dplyr::everything()),
escape = FALSE,
extensions = c("Buttons", "Responsive"),
width = "100%",
options = list(
buttons = c("csv", "excel"),
dom = "Bfrtip"))
Session Info
# set eval = FALSE if you don't want this info (useful for reproducibility)
# to appear
sessionInfo()
#> R version 4.5.1 (2025-06-13)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> loaded via a namespace (and not attached):
#> [1] jsonlite_2.0.0 dplyr_1.1.4 compiler_4.5.1 crayon_1.5.3
#> [5] tidyselect_1.2.1 phenOncoX_1.0.2 jquerylib_0.1.4 systemfonts_1.2.3
#> [9] textshaping_1.0.3 yaml_2.3.10 fastmap_1.2.0 R6_2.6.1
#> [13] generics_0.1.4 curl_7.0.0 knitr_1.50 htmlwidgets_1.6.4
#> [17] tibble_3.3.0 desc_1.4.3 bslib_0.9.0 pillar_1.11.1
#> [21] rlang_1.1.6 DT_0.34.0 cachem_1.1.0 lgr_0.5.0
#> [25] xfun_0.53 fs_1.6.6 sass_0.4.10 cli_3.6.5
#> [29] pkgdown_2.1.3 withr_3.0.2 magrittr_2.0.4 crosstalk_1.2.2
#> [33] digest_0.6.37 lifecycle_1.0.4 vctrs_0.6.5 evaluate_1.0.5
#> [37] gargle_1.6.0 glue_1.8.0 ragg_1.5.0 googledrive_2.1.2
#> [41] rmarkdown_2.29 purrr_1.1.0 httr_1.4.7 tools_4.5.1
#> [45] pkgconfig_2.0.3 htmltools_0.5.8.1
References
Huang, Kuan-Lin, R Jay Mashl, Yige Wu, Deborah I Ritter, Jiayin Wang,
Clara Oh, Marta Paczkowska, et al. 2018. “Pathogenic Germline
Variants in 10,389 Adult Cancers.” Cell 173 (2):
355–370.e14. http://dx.doi.org/10.1016/j.cell.2018.03.039.
Kundra, Ritika, Hongxin Zhang, Robert Sheridan, Sahussapont Joseph
Sirintrapun, Avery Wang, Angelica Ochoa, Manda Wilson, et al. 2021.
“OncoTree: A Cancer Classification System for
Precision Oncology.” JCO Clin Cancer Inform 5
(February): 221–30. http://dx.doi.org/10.1200/CCI.20.00108.
Louden, Diana Nelson. 2020. “MedGen: NCBI’s Portal to Information on Medical Conditions
with a Genetic Component.” Med. Ref. Serv. Q. 39 (2):
183–91. https://doi.org/10.1080/02763869.2020.1726152.
Malone, James, Ele Holloway, Tomasz Adamusiak, Misha Kapushesky, Jie
Zheng, Nikolay Kolesnikov, Anna Zhukova, Alvis Brazma, and Helen
Parkinson. 2010. “Modeling Sample Variables with an Experimental
Factor Ontology.” Bioinformatics 26 (8): 1112–18. http://dx.doi.org/10.1093/bioinformatics/btq099.
Schriml, Lynn Marie, Cesar Arze, Suvarna Nadendla, Yu-Wei Wayne Chang,
Mark Mazaitis, Victor Felix, Gang Feng, and Warren Alden Kibbe. 2012.
“Disease Ontology: A Backbone for Disease Semantic
Integration.” Nucleic Acids Res. 40 (Database issue):
D940–6. http://dx.doi.org/10.1093/nar/gkr972.