Skip to contents



Installation


if (!("remotes" %in% installed.packages()) {
  install.packages("remotes")
}
remotes::install_github('sigven/phenOncoX')



Get OncoTree terms

This shows how to retrieve cancer phenotype terms in, as defined in OncoTree (max tree depth = 2)


## load the data
download_dir <- tempdir()

oncotree <- phenOncoX::get_tree(
  cache_dir = download_dir, max_tree_depth = 2)
#> INFO  [2024-11-19 14:20:05] Downloading remote dataset from Google Drive to cache_dir
#> INFO  [2024-11-19 14:20:06] Reading from cache_dir = '/tmp/RtmpxmppUp', argument force_download = FALSE
#> INFO  [2024-11-19 14:20:06] Object 'oncotree_core' sucessfully loaded
#> INFO  [2024-11-19 14:20:06] md5 checksum is valid: ed3bfdab9736bd772d4e24e119f629a2
#> INFO  [2024-11-19 14:20:06] Retrieved 806 records
#> INFO  [2024-11-19 14:20:06] Limiting OncoTree terms to those with a maximum tree depth of = '2'

## Number of records
nrow(oncotree$records)
#> [1] 251

## Show metadata for underlying resources
oncotree$metadata
#>     source                                    source_description
#> 1 OncoTree A cancer classification system for precision oncology
#>                         source_url
#> 1 http://oncotree.mskcc.org/#/home
#>                                         source_citation source_version
#> 1 Kundra et al., JCO Clin Cancer Inform, 2021; 33625877     2021_11_02
#>   source_abbreviation source_license
#> 1            oncotree      CC BY 4.0
#>                             source_license_url
#> 1 https://creativecommons.org/licenses/by/4.0/



Get all (OncoTree-expanded) cancer phenotype terms


## get all cancer phenotype terms

oncoterms <- phenOncoX::get_terms(
  cache_dir = download_dir)
#> INFO  [2024-11-19 14:20:06] Downloading remote dataset from Google Drive to cache_dir
#> INFO  [2024-11-19 14:20:10] Reading from cache_dir = '/tmp/RtmpxmppUp', argument force_download = FALSE
#> INFO  [2024-11-19 14:20:10] Object 'oncotree_expanded' sucessfully loaded
#> INFO  [2024-11-19 14:20:10] md5 checksum is valid: 912046e9270261a8ea1d4d61f934d2fa
#> INFO  [2024-11-19 14:20:10] Retrieved 25381 records

## Number of records
nrow(oncoterms$records)
#> [1] 25381



Number of (OncoTree-expanded) cancer phenotype terms per primary tumor type/tissue


## get all cancer phenotype terms

oncoterms <- phenOncoX::get_terms(
  cache_dir = download_dir)
#> INFO  [2024-11-19 14:20:10] Reading from cache_dir = '/tmp/RtmpxmppUp', argument force_download = FALSE
#> INFO  [2024-11-19 14:20:10] Object 'oncotree_expanded' sucessfully loaded
#> INFO  [2024-11-19 14:20:10] Retrieved n = 25381 records

## Number of records
as.data.frame(oncoterms$records |>
  dplyr::filter(!is.na(primary_site)) |>
  dplyr::group_by(primary_site) |> 
  dplyr::summarise(num_terms = dplyr::n(),
                   .groups = "drop") |>
  dplyr::arrange(dplyr::desc(num_terms)))
#>                 primary_site num_terms
#> 1                   Lymphoid      4038
#> 2                Soft Tissue      2828
#> 3                  CNS/Brain      2296
#> 4              Head and Neck      2273
#> 5                       Skin      1817
#> 6                    Myeloid      1778
#> 7                       Lung      1086
#> 8               Colon/Rectum       966
#> 9       Ovary/Fallopian Tube       851
#> 10                    Breast       781
#> 11         Esophagus/Stomach       719
#> 12                      Bone       551
#> 13     Bladder/Urinary Tract       482
#> 14                    Uterus       451
#> 15             Biliary Tract       416
#> 16                     Liver       415
#> 17                    Kidney       396
#> 18                  Pancreas       343
#> 19 Peripheral Nervous System       292
#> 20                    Cervix       287
#> 21                       Eye       274
#> 22              Vulva/Vagina       265
#> 23                   Thyroid       250
#> 24                    Testis       239
#> 25                  Prostate       207
#> 26             Other/Unknown       176
#> 27                    Pleura       124
#> 28                Peritoneum        99
#> 29                    Thymus        87
#> 30             Adrenal Gland        70
#> 31                     Penis        61
#> 32          Ampulla of Vater        46

Get all (OncoTree-expanded) cancer phenotype terms relevant for prostate cancer


## get all oncoterms for prostate cancer
oncoterms_prostate <- phenOncoX::get_terms(
  cache_dir = download_dir, 
  site = "Prostate")

## Make as datatable
prostate_terms_table <- DT::datatable(
  dplyr::select(
    oncoterms_prostate$records, 
    primary_site, cui, cui_name,
    dplyr::everything()),
  escape = FALSE,
  extensions = c("Buttons", "Responsive"), 
  width = "100%",
  options = list(
    buttons = c("csv", "excel"), 
    dom = "Bfrtip"))




Session Info

# set eval = FALSE if you don't want this info (useful for reproducibility) 
# to appear
sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 22.04.5 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0
#> 
#> locale:
#>  [1] LC_CTYPE=C.UTF-8       LC_NUMERIC=C           LC_TIME=C.UTF-8       
#>  [4] LC_COLLATE=C.UTF-8     LC_MONETARY=C.UTF-8    LC_MESSAGES=C.UTF-8   
#>  [7] LC_PAPER=C.UTF-8       LC_NAME=C              LC_ADDRESS=C          
#> [10] LC_TELEPHONE=C         LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C   
#> 
#> time zone: UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> loaded via a namespace (and not attached):
#>  [1] jsonlite_1.8.9    dplyr_1.1.4       compiler_4.4.2    crayon_1.5.3     
#>  [5] tidyselect_1.2.1  phenOncoX_0.8.1   jquerylib_0.1.4   systemfonts_1.1.0
#>  [9] textshaping_0.4.0 yaml_2.3.10       fastmap_1.2.0     R6_2.5.1         
#> [13] generics_0.1.3    curl_6.0.1        knitr_1.49        htmlwidgets_1.6.4
#> [17] tibble_3.2.1      desc_1.4.3        bslib_0.8.0       pillar_1.9.0     
#> [21] rlang_1.1.4       DT_0.33           utf8_1.2.4        cachem_1.1.0     
#> [25] lgr_0.4.4         xfun_0.49         fs_1.6.5          sass_0.4.9       
#> [29] cli_3.6.3         withr_3.0.2       pkgdown_2.1.1     magrittr_2.0.3   
#> [33] crosstalk_1.2.1   digest_0.6.37     lifecycle_1.0.4   vctrs_0.6.5      
#> [37] evaluate_1.0.1    gargle_1.5.2      glue_1.8.0        ragg_1.3.3       
#> [41] googledrive_2.1.1 fansi_1.0.6       httr_1.4.7        rmarkdown_2.29   
#> [45] purrr_1.0.2       tools_4.4.2       pkgconfig_2.0.3   htmltools_0.5.8.1



References

Huang, Kuan-Lin, R Jay Mashl, Yige Wu, Deborah I Ritter, Jiayin Wang, Clara Oh, Marta Paczkowska, et al. 2018. “Pathogenic Germline Variants in 10,389 Adult Cancers.” Cell 173 (2): 355–370.e14. http://dx.doi.org/10.1016/j.cell.2018.03.039.
Kundra, Ritika, Hongxin Zhang, Robert Sheridan, Sahussapont Joseph Sirintrapun, Avery Wang, Angelica Ochoa, Manda Wilson, et al. 2021. OncoTree: A Cancer Classification System for Precision Oncology.” JCO Clin Cancer Inform 5 (February): 221–30. http://dx.doi.org/10.1200/CCI.20.00108.
Louden, Diana Nelson. 2020. MedGen: NCBI’s Portal to Information on Medical Conditions with a Genetic Component.” Med. Ref. Serv. Q. 39 (2): 183–91. https://doi.org/10.1080/02763869.2020.1726152.
Malone, James, Ele Holloway, Tomasz Adamusiak, Misha Kapushesky, Jie Zheng, Nikolay Kolesnikov, Anna Zhukova, Alvis Brazma, and Helen Parkinson. 2010. “Modeling Sample Variables with an Experimental Factor Ontology.” Bioinformatics 26 (8): 1112–18. http://dx.doi.org/10.1093/bioinformatics/btq099.
Schriml, Lynn Marie, Cesar Arze, Suvarna Nadendla, Yu-Wei Wayne Chang, Mark Mazaitis, Victor Felix, Gang Feng, and Warren Alden Kibbe. 2012. “Disease Ontology: A Backbone for Disease Semantic Integration.” Nucleic Acids Res. 40 (Database issue): D940–6. http://dx.doi.org/10.1093/nar/gkr972.