if (!("remotes" %in% installed.packages())) {
install.packages("remotes")
}
remotes::install_github('sigven/phenOncoX')Installation
Core OncoTree terms
This shows how to retrieve cancer phenotype terms in, as defined in OncoTree (max tree depth = 2)
## load the data
download_dir <- tempdir()
oncotree <- phenOncoX::get_tree(
cache_dir = download_dir, max_tree_depth = 2)
## Number of records
nrow(oncotree$records)[1] 265
## Show metadata for underlying resources
oncotree$metadata source source_description
1 OncoTree A cancer classification system for precision oncology
source_url
1 http://oncotree.mskcc.org/#/home
source_citation source_version
1 Kundra et al., JCO Clin Cancer Inform, 2021; 33625877 2025_10_03
source_abbreviation source_license
1 oncotree CC BY 4.0
source_license_url
1 https://creativecommons.org/licenses/by/4.0/
Cancer phenotype terms
## get all cancer phenotype terms - OncoTree-expanded
oncoterms <- phenOncoX::get_terms(
cache_dir = download_dir)
## Number of records
nrow(oncoterms$records)[1] 27236
Term statistics per primary tumor type/tissue
## get all cancer phenotype terms
oncoterms <- phenOncoX::get_terms(
cache_dir = download_dir)
## Number of records
as.data.frame(oncoterms$records |>
dplyr::filter(!is.na(primary_site)) |>
dplyr::group_by(primary_site) |>
dplyr::summarise(num_terms = dplyr::n(),
.groups = "drop") |>
dplyr::arrange(dplyr::desc(num_terms))) primary_site num_terms
1 Lymphoid 4306
2 Soft Tissue 2996
3 Head and Neck 2462
4 CNS/Brain 2432
5 Myeloid 1927
6 Skin 1880
7 Lung 1186
8 Colon/Rectum 1110
9 Ovary/Fallopian Tube 927
10 Breast 815
11 Esophagus/Stomach 763
12 Bone 515
13 Bladder/Urinary Tract 495
14 Uterus 481
15 Liver 470
16 Peripheral Nervous System 467
17 Kidney 458
18 Pancreas 370
19 Biliary Tract 357
20 Eye 296
21 Cervix 286
22 Vulva/Vagina 265
23 Thyroid 255
24 Testis 242
25 Prostate 225
26 Other/Unknown 172
27 Pleura 135
28 Peritoneum 109
29 Adrenal Gland 102
30 Thymus 95
31 Penis 63
32 Ampulla of Vater 47
Terms relevant for prostate cancer
## get all oncoterms for prostate cancer
oncoterms_prostate <- phenOncoX::get_terms(
cache_dir = download_dir,
site = "Prostate")
# ## Make as datatable
# prostate_terms_table <- DT::datatable(
# dplyr::select(
# oncoterms_prostate$records,
# primary_site, cui, cui_name,
# dplyr::everything()),
# escape = FALSE,
# extensions = c("Buttons", "Responsive"),
# width = "100%",
# options = list(
# buttons = c("csv", "excel"),
# dom = "Bfrtip"))
prostate_terms_table <- reactable::reactable(
dplyr::select(
oncoterms_prostate$records,
primary_site, cui, cui_name,
dplyr::everything()),
filterable = TRUE,
sortable = TRUE,
striped = TRUE,
compact = TRUE,
searchable = TRUE,
defaultPageSize = 10
#downloadable = TRUE
)Session Info
R version 4.6.0 (2026-04-24)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04.4 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: UTC
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] jsonlite_2.0.0 dplyr_1.2.1 compiler_4.6.0 crayon_1.5.3
[5] tidyselect_1.2.1 phenOncoX_1.2.2 yaml_2.3.12 fastmap_1.2.0
[9] R6_2.6.1 generics_0.1.4 curl_7.1.0 knitr_1.51
[13] htmlwidgets_1.6.4 tibble_3.3.1 reactable_0.4.5 pillar_1.11.1
[17] rlang_1.2.0 lgr_0.5.2 reactR_0.6.1 xfun_0.58
[21] fs_2.1.0 otel_0.2.0 cli_3.6.6 withr_3.0.2
[25] magrittr_2.0.5 crosstalk_1.2.2 digest_0.6.39 lifecycle_1.0.5
[29] vctrs_0.7.3 evaluate_1.0.5 gargle_1.6.1 glue_1.8.1
[33] googledrive_2.1.2 rmarkdown_2.31 purrr_1.2.2 httr_1.4.8
[37] tools_4.6.0 pkgconfig_2.0.3 htmltools_0.5.9
References
Huang, Kuan-Lin, R Jay Mashl, Yige Wu, et al. 2018. “Pathogenic Germline Variants in 10,389 Adult Cancers.” Cell 173 (2): 355–370.e14. http://dx.doi.org/10.1016/j.cell.2018.03.039.
Kundra, Ritika, Hongxin Zhang, Robert Sheridan, et al. 2021. “OncoTree: A Cancer Classification System for Precision Oncology.” JCO Clin Cancer Inform 5 (February): 221–30. http://dx.doi.org/10.1200/CCI.20.00108.
Louden, Diana Nelson. 2020. “MedGen: NCBI’s Portal to Information on Medical Conditions with a Genetic Component.” Med. Ref. Serv. Q. 39 (2): 183–91. https://doi.org/10.1080/02763869.2020.1726152.
Malone, James, Ele Holloway, Tomasz Adamusiak, et al. 2010. “Modeling Sample Variables with an Experimental Factor Ontology.” Bioinformatics 26 (8): 1112–18. http://dx.doi.org/10.1093/bioinformatics/btq099.
Schriml, Lynn Marie, Cesar Arze, Suvarna Nadendla, et al. 2012. “Disease Ontology: A Backbone for Disease Semantic Integration.” Nucleic Acids Res. 40 (Database issue): D940–6. http://dx.doi.org/10.1093/nar/gkr972.
