Setup
proda_col_map <- c(
"name" = "protein_id",
"diff" = "log_fc",
"t_statistic" = "t_statistic",
"pval" = "p_value",
"adj_pval" = "p_value_adjusted"
)
ionstar_ground_truth <- list(
id_column = "protein_id",
positive = list(label = "ECOLI", pattern = "ECOLI"),
negative = list(label = "HUMAN", pattern = "HUMAN")
)
ionstar_contrasts <- function(fit) {
contr <- list()
contr[["dilution_(9/7.5)_1.2"]] <- data.frame(
contrast = "dilution_(9/7.5)_1.2",
proDA::test_diff(fit, contrast = "dilution.e - dilution.d"))
contr[["dilution_(7.5/6)_1.25"]] <- data.frame(
contrast = "dilution_(7.5/6)_1.25",
proDA::test_diff(fit, contrast = "dilution.d - dilution.c"))
contr[["dilution_(6/4.5)_1.3(3)"]] <- data.frame(
contrast = "dilution_(6/4.5)_1.3(3)",
proDA::test_diff(fit, contrast = "dilution.c - dilution.b"))
contr[["dilution_(4.5/3)_1.5"]] <- data.frame(
contrast = "dilution_(4.5/3)_1.5",
proDA::test_diff(fit, contrast = "dilution.b - dilution.a"))
dplyr::bind_rows(contr)
}
datadir <- file.path(find.package("prolfquadata"), "quantdata")
inputMQfile <- file.path(datadir, "MAXQuant_IonStar2018_PXD003881.zip")
inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx")
Dataset 1: IonStar / MaxQuant LFQ intensities
mqdata <- prolfquapp::tidyMQ_ProteinGroups(inputMQfile)
mqdata <- mqdata |> dplyr::filter(!grepl("^REV__|^CON__", proteinID))
annotation <- readxl::read_xlsx(inputAnnotation)
res <- dplyr::inner_join(mqdata, annotation, by = "raw.file")
res <- dplyr::filter(res, nr.peptides > 1)
config <- prolfqua::AnalysisConfiguration$new()
config$factors[["dilution."]] <- "sample"
config$fileName <- "raw.file"
config$workIntensity <- "mq.protein.lfq.intensity"
config$hierarchy[["proteinID"]] <- "proteinID"
config <- prolfqua::AnalysisConfiguration$new(config)
lfqdata <- prolfqua::setup_analysis(res, config)
lfqPROT <- prolfqua::LFQData$new(lfqdata, config)
lfqPROT$remove_small_intensities()
lfqhum <- lfqPROT$get_subset(dplyr::filter(lfqPROT$data, grepl("HUMAN", proteinID)))
tr <- lfqPROT$get_Transformer()
tr$log2()
tr$robscale_subset(lfqhum$get_Transformer()$log2()$lfq)
lfqtrans <- tr$lfq
fit <- proDA::proDA(se, design = ~ dilution. - 1, data_is_log_transformed = TRUE)
bb <- ionstar_contrasts(fit)
write_contrast_results(
bb,
path = "../inst/Benchresults/proDA_IonStar_MQ_LFQ",
metadata = list(
dataset = "IonStar_MQ_LFQ",
method = "proDA",
method_description = "proDA on MaxQuant LFQ protein intensities",
input_file = "MAXQuant_IonStar2018_PXD003881.zip/proteinGroups.txt",
software_version = paste0("proDA ", packageVersion("proDA")),
date = as.character(Sys.Date()),
ground_truth = ionstar_ground_truth
),
col_map = proda_col_map
)
mqdata2 <- list()
mqdata2$data <- prolfquapp::tidyMQ_Peptides(inputMQfile)
mqdata2$config <- prolfqua::create_config_MQ_peptide()
annotation <- readxl::read_xlsx(inputAnnotation)
res2 <- dplyr::inner_join(mqdata2$data, annotation, by = "raw.file")
mqdata2$config$table$factors[["dilution."]] <- "sample"
mqdata2$config$table$factors[["run_Id"]] <- "run_ID"
mqdata2$config$table$factorDepth <- 1
mqdata2$data <- prolfqua::setup_analysis(res2, mqdata2$config)
lfqdata2 <- prolfqua::LFQData$new(mqdata2$data, mqdata2$config)
lfqdata2$data <- lfqdata2$data |> dplyr::filter(!grepl("^REV__|^CON__", protein_Id))
lfqdata2$filter_proteins_by_peptide_count()
lfqdata2$remove_small_intensities()
tr2 <- lfqdata2$get_Transformer()
subset_h2 <- lfqdata2$get_copy()
subset_h2$data <- subset_h2$data |> dplyr::filter(grepl("HUMAN", protein_Id))
subset_h2 <- subset_h2$get_Transformer()$log2()$lfq
lfqNorm2 <- tr2$log2()$robscale_subset(lfqsubset = subset_h2)$lfq
agg2 <- lfqNorm2$get_Aggregator()
agg2$medpolish()
lfqtrans2 <- agg2$lfq_agg
se2 <- prolfqua::LFQDataToSummarizedExperiment(lfqtrans2)
fit2 <- proDA::proDA(se2, design = ~ dilution. - 1, data_is_log_transformed = TRUE)
bb2 <- ionstar_contrasts(fit2)
write_contrast_results(
bb2,
path = "../inst/Benchresults/proDA_IonStar_MQ_medpolish",
metadata = list(
dataset = "IonStar_MQ_medpolish",
method = "proDA",
method_description = "proDA on MaxQuant peptides with median polish aggregation",
input_file = "MAXQuant_IonStar2018_PXD003881.zip/peptides.txt",
software_version = paste0("proDA ", packageVersion("proDA")),
date = as.character(Sys.Date()),
ground_truth = ionstar_ground_truth
),
col_map = proda_col_map
)
inputFragfile <- file.path(datadir, "MSFragger_IonStar2018_PXD003881.zip")
annotation <- readxl::read_xlsx(inputAnnotation)
msstats <- read.csv(
unz(inputFragfile, "IonstarWithMSFragger/MSstats.csv"),
header = TRUE, sep = ",", stringsAsFactors = FALSE)
msstats <- tibble::tibble(msstats)
msstats$Run <- tolower(msstats$Run)
merged3 <- dplyr::inner_join(annotation, msstats, by = c("raw.file" = "Run"))
summedPep <- merged3 |>
dplyr::group_by(raw.file, run_ID, sample, ProteinName, PeptideSequence) |>
dplyr::summarize(nprecursor = dplyr::n(),
pepIntensity = sum(Intensity, na.rm = TRUE),
.groups = "drop")
atable3 <- prolfqua::AnalysisConfiguration$new()
atable3$fileName <- "raw.file"
atable3$hierarchy[["protein_Id"]] <- c("ProteinName")
atable3$hierarchy[["peptide_Id"]] <- c("PeptideSequence")
atable3$hierarchyDepth <- 1
atable3$set_response("pepIntensity")
atable3$factors[["dilution."]] <- "sample"
atable3$factors[["run"]] <- "run_ID"
atable3$factorDepth <- 1
config3 <- prolfqua::AnalysisConfiguration$new(atable3)
adata3 <- prolfqua::setup_analysis(summedPep, config3)
lfqdata3 <- prolfqua::LFQData$new(adata3, config3)
lfqdata3$filter_proteins_by_peptide_count()
lfqdata3$remove_small_intensities()
subset_h3 <- lfqdata3$get_copy()$get_Transformer()$log2()$lfq
subset_h3$data <- subset_h3$data |> dplyr::filter(grepl("HUMAN", protein_Id))
tr3 <- lfqdata3$get_Transformer()
lfqNorm3 <- tr3$log2()$robscale_subset(lfqsubset = subset_h3)$lfq
agg3 <- lfqNorm3$get_Aggregator()
agg3$medpolish()
lfqtrans3 <- agg3$lfq_agg
se3 <- prolfqua::LFQDataToSummarizedExperiment(lfqtrans3)
fit3 <- proDA::proDA(se3, design = ~ dilution. - 1, data_is_log_transformed = TRUE)
bb3 <- ionstar_contrasts(fit3)
write_contrast_results(
bb3,
path = "../inst/Benchresults/proDA_IonStar_FragPipe_MSstatsCSV",
metadata = list(
dataset = "IonStar_FragPipe_MSstatsCSV",
method = "proDA",
method_description = "proDA on FragPipe MSstats.csv with median polish aggregation",
input_file = "MSFragger_IonStar2018_PXD003881.zip/IonstarWithMSFragger/MSstats.csv",
software_version = paste0("proDA ", packageVersion("proDA")),
date = as.character(Sys.Date()),
ground_truth = ionstar_ground_truth
),
col_map = proda_col_map
)
Dataset 4: IonStar / FragPipe combined_protein.tsv
atable4 <- prolfqua::AnalysisConfiguration$new()
atable4$fileName <- "raw.file"
atable4$hierarchy[["protein_Id"]] <- c("protein")
atable4$hierarchyDepth <- 1
atable4$set_response("razor.intensity")
atable4$factors[["dilution."]] <- "sample"
atable4$factors[["run"]] <- "run_ID"
atable4$factorDepth <- 1
config4 <- prolfqua::AnalysisConfiguration$new(atable4)
adata4 <- prolfqua::setup_analysis(merged4, config4)
lfqdata4 <- prolfqua::LFQData$new(adata4, config4)
lfqdata4$remove_small_intensities()
subset_h4 <- lfqdata4$get_copy()$get_Transformer()$log2()$lfq
subset_h4$data <- subset_h4$data |> dplyr::filter(grepl("HUMAN", protein_Id))
tr4 <- lfqdata4$get_Transformer()
lfqNorm4 <- tr4$log2()$robscale_subset(lfqsubset = subset_h4)$lfq
se4 <- prolfqua::LFQDataToSummarizedExperiment(lfqNorm4)
fit4 <- proDA::proDA(se4, design = ~ dilution. - 1, data_is_log_transformed = TRUE)
bb4 <- ionstar_contrasts(fit4)
write_contrast_results(
bb4,
path = "../inst/Benchresults/proDA_IonStar_FragPipe_combined_protein",
metadata = list(
dataset = "IonStar_FragPipe_combined_protein",
method = "proDA",
method_description = "proDA on FragPipe combined_protein.tsv razor intensities",
input_file = "MSFragger_IonStar2018_PXD003881.zip/IonstarWithMSFragger/combined_protein.tsv",
software_version = paste0("proDA ", packageVersion("proDA")),
date = as.character(Sys.Date()),
ground_truth = ionstar_ground_truth
),
col_map = proda_col_map
)
tmp <- msdata::quant(full.names = TRUE)
xx <- read.csv(tmp, sep = "\t")
peptides <- prolfquapp::tidyMQ_Peptides(xx)
annotation5 <- data.frame(raw.file = unique(peptides$raw.file))
annotation5 <- annotation5 |>
dplyr::mutate(group = gsub("^6", "", raw.file)) |>
dplyr::mutate(group = gsub("_[0-9]$", "", group))
peptides <- dplyr::inner_join(annotation5, peptides)
config5 <- prolfqua::create_config_MQ_peptide()
config5$table$factors["group."] <- "group"
peptides <- prolfqua::setup_analysis(peptides, config5)
lfqpeptides <- prolfqua::LFQData$new(peptides, config5)
lfqpeptides$filter_proteins_by_peptide_count()
lfqpeptides$remove_small_intensities()
tr5 <- lfqpeptides$get_Transformer()
tr5 <- tr5$intensity_matrix(vsn::justvsn)
lfqtransformed5 <- tr5$lfq
agg5 <- lfqtransformed5$get_Aggregator()
agg5$medpolish()
lfqProt5 <- agg5$lfq_agg
write_contrast_results(
bb5,
path = "../inst/Benchresults/proDA_CPTAC_MQ",
metadata = list(
dataset = "CPTAC_MQ",
method = "proDA",
method_description = "proDA on CPTAC MaxQuant peptides with vsn + median polish",
input_file = "msdata::quant()/peptides.txt",
software_version = paste0("proDA ", packageVersion("proDA")),
date = as.character(Sys.Date()),
ground_truth = list(
id_column = "protein_id",
positive = list(label = "UPS", pattern = "_UPS"),
negative = list(label = "YEAST", pattern = "YEAST")
)
),
col_map = proda_col_map
)