
Function that makes input data for an UpSet plot (filtering/intersection results) for the somatic-germline classification procedure
Source:R/germline.R
plot_filtering_stats_germline.Rd
param calls unfiltered calls (germline + somatic) param config config
Usage
plot_filtering_stats_germline(
report = NULL,
plot_margin_top = 50,
plot_margin_bottom = 20,
plot_margin_left = 20,
plot_margin_right = 20,
font_family = "Helvetica",
font_size = 15,
pie_line_width = 3,
opacity_filtered_categories = 0.4,
hole_size_pie = 0.4
)
Arguments
- report
list object with PCGR report content
- plot_margin_top
top margin of the plot
- plot_margin_bottom
bottom margin of the plot
- plot_margin_left
left margin of the plot
- plot_margin_right
right margin of the plot
- font_family
font family for plot text
- font_size
font size for plot text
- pie_line_width
line width for pie chart segments
- opacity_filtered_categories
opacity for filtered categories
- hole_size_pie
size of the hole in the pie chart
Details
return upset data
export make_upset_plot_data <- function(calls, config) columns <- c() if (config["tumor_only"]["exclude_pon"] == TRUE) columns <- c(columns, "STATUS_PON")if (config["tumor_only"]["exclude_likely_hom_germline"] == TRUE) columns <- c(columns, "STATUS_GERMLINE_HOM")if (config["tumor_only"]["exclude_likely_het_germline"] == TRUE) columns <- c(columns, "STATUS_GERMLINE_HET")if (config["tumor_only"]["exclude_dbsnp_nonsomatic"] == TRUE) columns <- c(columns, "STATUS_DBSNP")assertable::assert_colnames( calls, c("VAR_ID", "gnomADe_AF_ABOVE_TOLERATED", "STATUS_CLINVAR_GERMLINE"), only_colnames = F, quiet = T) df <- dplyr::select(calls, .data$VAR_ID, .data$gnomADe_AF_ABOVE_TOLERATED, .data$STATUS_CLINVAR_GERMLINE) for (c in columns) if (c %in% colnames(calls)) df, c <- calls, cfor (v in colnames(df)) if (v != "VAR_ID") df, v <- as.integer(df, v)df <- df |> dplyr::rename( gnomAD = .data$gnomADe_AF_ABOVE_TOLERATED, ClinVar = .data$STATUS_CLINVAR_GERMLINE) if ("STATUS_PON" %in% colnames(df)) df <- dplyr::rename(df, Panel_Of_Normals = .data$STATUS_PON)if ("STATUS_GERMLINE_HOM" %in% colnames(df)) df <- dplyr::rename(df, HomAF = .data$STATUS_GERMLINE_HOM)if ("STATUS_GERMLINE_HET" %in% colnames(df)) df <- dplyr::rename(df, HetAF = .data$STATUS_GERMLINE_HET)if ("STATUS_DBSNP" %in% colnames(df)) df <- dplyr::rename(df, dbSNP = .data$STATUS_DBSNP)return(df)
#' #' Function that makes an upset calls for germline-filtered variants #' classification procedure #' #' param upset_data unfiltered calls (germline + somatic) #' #' return p #' #' export upset_plot_tumor_only <- function(upset_data) isets <- c() all_negative_filters <- c() for (c in colnames(upset_data)) if (c != "VAR_ID") if (length(unique(upset_data, c == 1)) == 1) if (unique(upset_data, c == 1) == T) isets <- c(isets, c) else all_negative_filters <- c(all_negative_filters, c)else isets <- c(isets, c)for (m in all_negative_filters) upset_data, m <- NULLp <- UpSetR::upset(upset_data, sets = isets, sets.bar.color = "#56B4E9", order.by = "freq", nintersects = 20, text.scale = 1.5, show.numbers = T, point.size = 6, color.pal = "Blues", empty.intersections = "on") return(p) Function that generates variant filtering statistics (i.e. removal of likely non-somatic/germline events) for callsets coming from tumor-only sequencing
#' param callset list object with unfiltered calls (callset$variant_unfiltered) #' param settings list object with PCGR run configuration settings #' #' export tumor_only_vfilter_stats <- function(callset = NULL, settings = NULL)
## raw, unfiltered set of (PASSED) variant calls that
## should be subject to filtering
vcalls <- callset$variant_unfiltered
to_settings <- settings$conf$somatic_snv$tumor_only## initiate report object with tumor-only variant filter stats
to_stats <-
pcgrr::init_tumor_only_content()to_stats[['vfilter']][['unfiltered_n']] <-
NROW(vcalls)## Assign statistics to successive filtering levels for
## different evidence criteria
## excluded germline calls found in gnomAD
to_stats[["vfilter"]][["gnomad_n_remain"]] <-
NROW(vcalls) -
NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_GNOMAD", ])
# pcgrr::log4r_info(paste0("Excluding coinciding germline variants in ",
# "gnomAD populations"))
# pcgrr::log4r_info(paste0("Total sample calls remaining: ",
# to_stats$vfilter[["gnomad_n_remain"]]))## excluded germline calls found in ClinVar
to_stats$vfilter[["clinvar_n_remain"]] <-
to_stats$vfilter[["gnomad_n_remain"]] -
NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_CLINVAR", ])
# pcgrr::log4r_info(paste0("Excluding coinciding germline variants in ClinVar"))
# pcgrr::log4r_info(paste0("Total sample calls remaining: ",
# to_stats$vfilter[["clinvar_n_remain"]]))## excluded germline calls found in panel of normals (if provided)
to_stats$vfilter[["pon_n_remain"]] <-
to_stats$vfilter[["clinvar_n_remain"]]
if (as.logical(to_settings[["exclude_pon"]]) == TRUE) {
to_stats$vfilter[["pon_n_remain"]] <-
to_stats$vfilter[["pon_n_remain"]] -
NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_PON", ])
# pcgrr::log4r_info(
# paste0("Excluding putative germline variants found in calls ",
# "from panel-of-normals (PON)"))
# pcgrr::log4r_info(
# paste0("Total sample calls remaining: ",
# to_stats$vfilter[["pon_n_remain"]]))
}## excluded germline calls found with 100% allelic fraction
## (likely homozygous germline variants)
to_stats$vfilter[["hom_n_remain"]] <-
to_stats$vfilter[["pon_n_remain"]]
if (as.logical(to_settings[["exclude_likely_hom_germline"]]) == TRUE) {
to_stats$vfilter[["hom_n_remain"]] <-
to_stats$vfilter[["hom_n_remain"]] -
NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_HOMOZYGOUS", ])
# pcgrr::log4r_info(
# paste0("Excluding likely homozygous germline variants found ",
# "as variants with 100% allelic fraction"))
# pcgrr::log4r_info(paste0("Total sample calls remaining: ",
# to_stats$vfilter[["hom_n_remain"]]))
}## excluded germline calls found as likely heterozygous germline variants
to_stats$vfilter[["het_n_remain"]] <-
to_stats$vfilter[["hom_n_remain"]]
if (as.logical(to_settings[["exclude_likely_het_germline"]]) == TRUE) {
to_stats$vfilter[["het_n_remain"]] <-
to_stats$vfilter[["het_n_remain"]] -
NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_HETEROZYGOUS", ])
# pcgrr::log4r_info(paste0(
# "Excluding likely heterozygous germline variants found as variants ",
# "with 40-60% allelic fraction and recorded in gnomAD + dbSNP"))
# pcgrr::log4r_info(paste0("Total sample calls remaining: ",
# to_stats$vfilter[["het_n_remain"]]))
}## excluded calls with dbSNP germline status (if set in config)
to_stats$vfilter[["dbsnp_n_remain"]] <-
to_stats$vfilter[["het_n_remain"]]
if (as.logical(to_settings[["exclude_dbsnp_nonsomatic"]]) == TRUE) { #pcgrr::log4r_info(
# paste0("Excluding non-somatically associated dbSNP variants ",
# "(dbSNP - not recorded as somatic in ClinVar",
# "and not registered in COSMIC or found in TCGA")) to_stats$vfilter[["dbsnp_n_remain"]] <-
to_stats$vfilter[["dbsnp_n_remain"]] -
NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_DBSNP", ])
#pcgrr::log4r_info(paste0("Total sample calls remaining: ",
# to_stats$vfilter[["dbsnp_n_remain"]]))
}## excluded non-exonic calls (if set in config)
to_stats$vfilter[["nonexonic_n_remain"]] <-
to_stats$vfilter[["dbsnp_n_remain"]]
if (as.logical(to_settings[["exclude_nonexonic"]]) == TRUE) { #pcgrr::log4r_info(
# paste0("Excluding non-somatically associated dbSNP variants ",
# "(dbSNP - not recorded as somatic in ClinVar",
# "and not registered in COSMIC or found in TCGA")) to_stats$vfilter[["nonexonic_n_remain"]] <-
to_stats$vfilter[["nonexonic_n_remain"]] -
NROW(vcalls[vcalls$EXONIC_STATUS == "nonexonic", ])
#pcgrr::log4r_info(paste0("Total sample calls remaining: ",
# to_stats$vfilter[["dbsnp_n_remain"]]))
}to_stats[["eval"]] <- TRUEfor (db_filter in c("gnomad", "dbsnp", "pon",
"clinvar", "hom", "het", "nonexonic")) {
if (to_stats[["vfilter"]][[paste0(db_filter, "_n_remain")]] > 0 &
to_stats[["vfilter"]][["unfiltered_n"]] > 0) {
to_stats[["vfilter"]][[paste0(db_filter, "_frac_remain")]] <-
round((as.numeric(to_stats[["vfilter"]][[paste0(db_filter,
"_n_remain")]]) /
to_stats[["vfilter"]][["unfiltered_n"]]) * 100, digits = 2)
}
}
return(to_stats)