Skip to contents

param calls unfiltered calls (germline + somatic) param config config

Usage

plot_filtering_stats_germline(
  report = NULL,
  plot_margin_top = 50,
  plot_margin_bottom = 20,
  plot_margin_left = 20,
  plot_margin_right = 20,
  font_family = "Helvetica",
  font_size = 15,
  pie_line_width = 3,
  opacity_filtered_categories = 0.4,
  hole_size_pie = 0.4
)

Arguments

report

list object with PCGR report content

plot_margin_top

top margin of the plot

plot_margin_bottom

bottom margin of the plot

plot_margin_left

left margin of the plot

plot_margin_right

right margin of the plot

font_family

font family for plot text

font_size

font size for plot text

pie_line_width

line width for pie chart segments

opacity_filtered_categories

opacity for filtered categories

hole_size_pie

size of the hole in the pie chart

Value

filtering_stats list with data frame and plotly pie chart

Details

return upset data

export make_upset_plot_data <- function(calls, config) columns <- c() if (config["tumor_only"]["exclude_pon"] == TRUE) columns <- c(columns, "STATUS_PON")if (config["tumor_only"]["exclude_likely_hom_germline"] == TRUE) columns <- c(columns, "STATUS_GERMLINE_HOM")if (config["tumor_only"]["exclude_likely_het_germline"] == TRUE) columns <- c(columns, "STATUS_GERMLINE_HET")if (config["tumor_only"]["exclude_dbsnp_nonsomatic"] == TRUE) columns <- c(columns, "STATUS_DBSNP")assertable::assert_colnames( calls, c("VAR_ID", "gnomADe_AF_ABOVE_TOLERATED", "STATUS_CLINVAR_GERMLINE"), only_colnames = F, quiet = T) df <- dplyr::select(calls, .data$VAR_ID, .data$gnomADe_AF_ABOVE_TOLERATED, .data$STATUS_CLINVAR_GERMLINE) for (c in columns) if (c %in% colnames(calls)) df, c <- calls, cfor (v in colnames(df)) if (v != "VAR_ID") df, v <- as.integer(df, v)df <- df |> dplyr::rename( gnomAD = .data$gnomADe_AF_ABOVE_TOLERATED, ClinVar = .data$STATUS_CLINVAR_GERMLINE) if ("STATUS_PON" %in% colnames(df)) df <- dplyr::rename(df, Panel_Of_Normals = .data$STATUS_PON)if ("STATUS_GERMLINE_HOM" %in% colnames(df)) df <- dplyr::rename(df, HomAF = .data$STATUS_GERMLINE_HOM)if ("STATUS_GERMLINE_HET" %in% colnames(df)) df <- dplyr::rename(df, HetAF = .data$STATUS_GERMLINE_HET)if ("STATUS_DBSNP" %in% colnames(df)) df <- dplyr::rename(df, dbSNP = .data$STATUS_DBSNP)return(df)

#' #' Function that makes an upset calls for germline-filtered variants #' classification procedure #' #' param upset_data unfiltered calls (germline + somatic) #' #' return p #' #' export upset_plot_tumor_only <- function(upset_data) isets <- c() all_negative_filters <- c() for (c in colnames(upset_data)) if (c != "VAR_ID") if (length(unique(upset_data, c == 1)) == 1) if (unique(upset_data, c == 1) == T) isets <- c(isets, c) else all_negative_filters <- c(all_negative_filters, c)else isets <- c(isets, c)for (m in all_negative_filters) upset_data, m <- NULLp <- UpSetR::upset(upset_data, sets = isets, sets.bar.color = "#56B4E9", order.by = "freq", nintersects = 20, text.scale = 1.5, show.numbers = T, point.size = 6, color.pal = "Blues", empty.intersections = "on") return(p) Function that generates variant filtering statistics (i.e. removal of likely non-somatic/germline events) for callsets coming from tumor-only sequencing

#' param callset list object with unfiltered calls (callset$variant_unfiltered) #' param settings list object with PCGR run configuration settings #' #' export tumor_only_vfilter_stats <- function(callset = NULL, settings = NULL)

## raw, unfiltered set of (PASSED) variant calls that
## should be subject to filtering
vcalls <- callset$variant_unfiltered
to_settings <- settings$conf$somatic_snv$tumor_only## initiate report object with tumor-only variant filter stats
to_stats <-
  pcgrr::init_tumor_only_content()to_stats[['vfilter']][['unfiltered_n']] <-
  NROW(vcalls)## Assign statistics to successive filtering levels for
## different evidence criteria
## excluded germline calls found in gnomAD
to_stats[["vfilter"]][["gnomad_n_remain"]] <-
  NROW(vcalls) -
  NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_GNOMAD", ])
# pcgrr::log4r_info(paste0("Excluding coinciding germline variants in ",
#                          "gnomAD populations"))
# pcgrr::log4r_info(paste0("Total sample calls remaining: ",
#                          to_stats$vfilter[["gnomad_n_remain"]]))## excluded germline calls found in ClinVar
to_stats$vfilter[["clinvar_n_remain"]] <-
  to_stats$vfilter[["gnomad_n_remain"]] -
  NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_CLINVAR", ])
# pcgrr::log4r_info(paste0("Excluding coinciding germline variants in ClinVar"))
# pcgrr::log4r_info(paste0("Total sample calls remaining: ",
#                          to_stats$vfilter[["clinvar_n_remain"]]))## excluded germline calls found in panel of normals (if provided)
to_stats$vfilter[["pon_n_remain"]] <-
  to_stats$vfilter[["clinvar_n_remain"]]
if (as.logical(to_settings[["exclude_pon"]]) == TRUE) {
  to_stats$vfilter[["pon_n_remain"]] <-
    to_stats$vfilter[["pon_n_remain"]] -
    NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_PON", ])
  # pcgrr::log4r_info(
  #   paste0("Excluding putative germline variants found in calls ",
  #          "from panel-of-normals (PON)"))
  # pcgrr::log4r_info(
  #   paste0("Total sample calls remaining: ",
  #          to_stats$vfilter[["pon_n_remain"]]))
}## excluded germline calls found with 100% allelic fraction
## (likely homozygous germline variants)
to_stats$vfilter[["hom_n_remain"]] <-
  to_stats$vfilter[["pon_n_remain"]]
if (as.logical(to_settings[["exclude_likely_hom_germline"]]) == TRUE) {
  to_stats$vfilter[["hom_n_remain"]] <-
    to_stats$vfilter[["hom_n_remain"]] -
    NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_HOMOZYGOUS", ])
  # pcgrr::log4r_info(
  #   paste0("Excluding likely homozygous germline variants found ",
  #          "as variants with 100% allelic fraction"))
  # pcgrr::log4r_info(paste0("Total sample calls remaining: ",
  #                          to_stats$vfilter[["hom_n_remain"]]))
}## excluded germline calls found as likely heterozygous germline variants
to_stats$vfilter[["het_n_remain"]] <-
  to_stats$vfilter[["hom_n_remain"]]
if (as.logical(to_settings[["exclude_likely_het_germline"]]) == TRUE) {
  to_stats$vfilter[["het_n_remain"]] <-
    to_stats$vfilter[["het_n_remain"]] -
    NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_HETEROZYGOUS", ])
  # pcgrr::log4r_info(paste0(
  #   "Excluding likely heterozygous germline variants found as variants ",
  #   "with 40-60% allelic fraction and recorded in gnomAD + dbSNP"))
  # pcgrr::log4r_info(paste0("Total sample calls remaining: ",
  #                          to_stats$vfilter[["het_n_remain"]]))
}## excluded calls with dbSNP germline status (if set in config)
to_stats$vfilter[["dbsnp_n_remain"]] <-
  to_stats$vfilter[["het_n_remain"]]
if (as.logical(to_settings[["exclude_dbsnp_nonsomatic"]]) == TRUE) {  #pcgrr::log4r_info(
  #  paste0("Excluding non-somatically associated dbSNP variants ",
  #         "(dbSNP - not recorded as somatic in ClinVar",
  #         "and not registered in COSMIC or found in TCGA"))  to_stats$vfilter[["dbsnp_n_remain"]] <-
    to_stats$vfilter[["dbsnp_n_remain"]] -
    NROW(vcalls[vcalls$SOMATIC_CLASSIFICATION == "GERMLINE_DBSNP", ])
  #pcgrr::log4r_info(paste0("Total sample calls remaining: ",
  #                         to_stats$vfilter[["dbsnp_n_remain"]]))
}## excluded non-exonic calls (if set in config)
to_stats$vfilter[["nonexonic_n_remain"]] <-
  to_stats$vfilter[["dbsnp_n_remain"]]
if (as.logical(to_settings[["exclude_nonexonic"]]) == TRUE) {  #pcgrr::log4r_info(
  #  paste0("Excluding non-somatically associated dbSNP variants ",
  #         "(dbSNP - not recorded as somatic in ClinVar",
  #         "and not registered in COSMIC or found in TCGA"))  to_stats$vfilter[["nonexonic_n_remain"]] <-
    to_stats$vfilter[["nonexonic_n_remain"]] -
    NROW(vcalls[vcalls$EXONIC_STATUS == "nonexonic", ])
  #pcgrr::log4r_info(paste0("Total sample calls remaining: ",
  #                         to_stats$vfilter[["dbsnp_n_remain"]]))
}to_stats[["eval"]] <- TRUEfor (db_filter in c("gnomad", "dbsnp", "pon",
                    "clinvar", "hom", "het", "nonexonic")) {
  if (to_stats[["vfilter"]][[paste0(db_filter, "_n_remain")]] > 0 &
      to_stats[["vfilter"]][["unfiltered_n"]] > 0) {
    to_stats[["vfilter"]][[paste0(db_filter, "_frac_remain")]] <-
      round((as.numeric(to_stats[["vfilter"]][[paste0(db_filter,
                                                          "_n_remain")]]) /
               to_stats[["vfilter"]][["unfiltered_n"]]) * 100, digits = 2)
  }
}
return(to_stats)
Function that generates a pie chart for germline filtering statistics for callsets coming from tumor-only sequencing