Please download and install the prolfquadata package from github

conflicted::conflict_prefer("filter", "dplyr")

Load data

We start by loading the IonStar dataset and the annotation from the prolfquadata package. The method inner_join adds the annotation to the data.

datadir <- file.path(find.package("prolfquadata") , "quantdata")
inputFragfile <-  file.path(datadir, "MSFragger_IonStar2018_PXD003881.zip")
inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx")
annotation <- readxl::read_xlsx(inputAnnotation)
## Error in get(paste0(generic, ".", class), envir = get_method_env()) : 
##   object 'type_sum.accel' not found
protein <- tibble::as_tibble(read.csv(unz(inputFragfile,"IonstarWithMSFragger/combined_protein.tsv"),
                                      header = TRUE, sep = "\t", stringsAsFactors = FALSE))

undebug( prolfquapp::tidy_FragPipe_combined_protein)
protein <- prolfquapp::tidy_FragPipe_combined_protein_deprec(protein)
protein <- protein |> dplyr::filter(unique.stripped.peptides > 1)
merged <- dplyr::inner_join(annotation, protein)

Create prolfqua configuration

atable <- prolfqua::AnalysisTableAnnotation$new()
atable$fileName = "raw.file"
atable$hierarchy[["protein_Id"]] <- c("protein")

atable$hierarchyDepth <- 1
atable$set_response("razor.intensity")
atable$factors[["dilution."]] = "sample"
atable$factors[["run"]] = "run_ID"
atable$factorDepth <- 1


config <- prolfqua::AnalysisConfiguration$new(atable)

adata <- prolfqua::setup_analysis(merged, config)
lfqdata <- prolfqua::LFQData$new(adata, config)
lfqdata$remove_small_intensities()

Normalize data using human proteins

pl <- lfqdata$get_Plotter()
pl$intensity_distribution_density()

subset_h <- lfqdata$get_copy()$get_Transformer()$log2()$lfq
subset_h$data <- subset_h$data |> dplyr::filter(grepl("HUMAN", protein_Id))
tr <- lfqdata$get_Transformer()
lfqdataNormalized <- tr$log2()$robscale_subset(lfqsubset = subset_h)$lfq
pl <- lfqdataNormalized$get_Plotter()
pl$intensity_distribution_density()

hm <- pl$NA_heatmap()
hm

Model data using prolfqua and specify contrasts

Contrasts <- c(
  "dilution_(9/7.5)_1.2" =   "dilution.e - dilution.d",
  "dilution_(7.5/6)_1.25" =   "dilution.d - dilution.c",
  "dilution_(6/4.5)_1.3(3)" =   "dilution.c - dilution.b",
  "dilution_(4.5/3)_1.5" =   "dilution.b - dilution.a"
)


lmmodel <- "~ dilution."
lmmodel <- paste0(lfqdataNormalized$config$table$get_response() , lmmodel)

modelFunction <- prolfqua::strategy_lm( lmmodel, model_name = "Model")

mod <- prolfqua::build_model(lfqdataNormalized$data, modelFunction)
contr <- prolfqua::Contrasts$new(mod, Contrasts)
contrimp <- prolfqua::ContrastsMissing$new(lfqdataNormalized, Contrasts)

merged <- prolfqua::merge_contrasts_results(contr, contrimp)
mergedmod <- prolfqua::ContrastsModerated$new(merged$merged)

cp <- mergedmod$get_Plotter()
cp$volcano()
## $FDR

Benchmark data

ttd <- prolfqua::ionstar_bench_preprocess(mergedmod$get_contrasts())
benchmark_prolfqua <- prolfqua::make_benchmark(ttd$data,
                                             model_description = "MSFragger med. polish and lm. density",
                                             model_name = "MSFragger_prot_med_lm",
                                             FDRvsFDP = list(list(score = "FDR", desc = FALSE))
)


knitr::kable(benchmark_prolfqua$pAUC_summaries()$ftable$content)
contrast what AUC pAUC_10 pAUC_20
all diff 94.03414 72.03297 80.81840
all scaled.p.value 95.12016 81.86899 85.01266
all statistic 95.01501 80.86869 84.54175
dilution_(4.5/3)_1.5 diff 91.74353 71.35642 74.94692
dilution_(4.5/3)_1.5 scaled.p.value 91.97449 74.07253 75.69785
dilution_(4.5/3)_1.5 statistic 91.88659 73.16519 75.26106
dilution_(6/4.5)_1.3(3) diff 94.51047 77.29426 83.11132
dilution_(6/4.5)_1.3(3) scaled.p.value 95.25739 82.86520 85.39251
dilution_(6/4.5)_1.3(3) statistic 95.15824 82.03018 84.98705
dilution_(7.5/6)_1.25 diff 93.58321 64.35484 79.10116
dilution_(7.5/6)_1.25 scaled.p.value 95.02455 82.23264 86.55908
dilution_(7.5/6)_1.25 statistic 94.93966 81.16409 86.10408
dilution_(9/7.5)_1.2 diff 95.95465 74.05003 85.26557
dilution_(9/7.5)_1.2 scaled.p.value 97.96069 89.12013 92.60967
dilution_(9/7.5)_1.2 statistic 97.80569 87.74253 91.93801
prolfqua::table_facade(benchmark_prolfqua$smc$summary, "Nr of estimated contrasts")
Nr of estimated contrasts
nr_missing protein_Id
0 3836
benchmark_prolfqua$plot_score_distribution()

benchmark_prolfqua$plot_ROC(0.05)

benchmark_prolfqua$plot_FDRvsFDP()

Model data using proDA and specify contrasts

se <- prolfqua::LFQDataToSummarizedExperiment(lfqdataNormalized)
fit <- proDA::proDA(se, design = ~ dilution. - 1, data_is_log_transformed = TRUE)

contr <- list()
contr[["dilution_(9/7.5)_1.2"]] <- data.frame(
  contrast = "dilution_(9/7.5)_1.2",
  proDA::test_diff(fit, contrast = "dilution.e - dilution.d"))
contr[["dilution_(7.5/6)_1.25"]] <- data.frame(
  contrast = "dilution_(7.5/6)_1.25",
  proDA::test_diff(fit, contrast = "dilution.d - dilution.c"))
contr[["dilution_(6/4.5)_1.3(3)"]] <- data.frame(
  contrast = "dilution_(6/4.5)_1.3(3)", 
  proDA::test_diff(fit, contrast = "dilution.c - dilution.b"))
contr[["dilution_(4.5/3)_1.5"]] <- data.frame(
  contrast = "dilution_(4.5/3)_1.5", 
  proDA::test_diff(fit, contrast = "dilution.b - dilution.a" ))

bb <- dplyr::bind_rows(contr)

Benchmark data

ttd <- prolfqua::ionstar_bench_preprocess( bb , idcol = "name" )

benchmark_proDA <- prolfqua::make_benchmark(ttd$data,
                                            contrast = "contrast",
                                            toscale = c("pval"),
                                            fcestimate = "diff",
                                            benchmark = list(
                                              list(score = "diff", desc = TRUE),
                                              list(score = "t_statistic", desc = TRUE),
                                              list(score = "scaled.pval", desc = TRUE)
                                            ),  
                                            model_description = "proDA_medpolishInt",
                                            model_name = "proDA_medpolishInt",
                                            FDRvsFDP = list(list(score = "adj_pval", desc = FALSE))
                                            , hierarchy = c("name"), summarizeNA = "t_statistic"
)

sumarry <- benchmark_proDA$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")
nr of proteins with 0, 1, 2, 3 missing contrasts.
nr_missing name
0 3836
knitr::kable(benchmark_proDA$pAUC_summaries()$ftable$content)
contrast what AUC pAUC_10 pAUC_20
all diff 91.99592 61.86335 76.45600
all scaled.pval 94.25411 80.02409 83.62640
all t_statistic 94.25411 80.02409 83.62640
dilution_(4.5/3)_1.5 diff 89.74745 69.12772 76.47584
dilution_(4.5/3)_1.5 scaled.pval 91.77920 74.78168 77.29185
dilution_(4.5/3)_1.5 t_statistic 91.77920 74.78168 77.29185
dilution_(6/4.5)_1.3(3) diff 92.40180 68.82831 79.75247
dilution_(6/4.5)_1.3(3) scaled.pval 94.09976 81.69756 84.47232
dilution_(6/4.5)_1.3(3) t_statistic 94.09976 81.69756 84.47232
dilution_(7.5/6)_1.25 diff 91.19929 48.04371 70.53692
dilution_(7.5/6)_1.25 scaled.pval 93.94111 79.59794 84.24361
dilution_(7.5/6)_1.25 t_statistic 93.94111 79.59794 84.24361
dilution_(9/7.5)_1.2 diff 94.43666 61.61623 79.02591
dilution_(9/7.5)_1.2 scaled.pval 97.21442 85.21924 89.29099
dilution_(9/7.5)_1.2 t_statistic 97.21442 85.21924 89.29099
prolfqua::table_facade(benchmark_prolfqua$smc$summary, "Nr of estimated contrasts")
Nr of estimated contrasts
nr_missing protein_Id
0 3836
benchmark_proDA$plot_score_distribution()

benchmark_proDA$plot_ROC(0.05)

benchmark_proDA$plot_FDRvsFDP()

Compare prolfqua and proda

Direct comparison with msqrob2 is impossible since, to fit the dropout model, the peptide intensities are required, while here, we are starting the analysis from the combined_proteins.tsv file.

bdir <- file.path("../inst/Benchresults/",format( Sys.Date(), "%Y%m%d"))
if (!dir.exists(bdir)) {dir.create(bdir)}

saveRDS(list(benchmark_proDA = benchmark_proDA, benchmark_prolfqua = benchmark_prolfqua)
  ,file.path("../inst/Benchresults/",format( Sys.Date(), "%Y%m%d"),"FragPipev14_comb_prot.RDS"))

proda <- benchmark_proDA$pAUC_summaries()$ftable$content
proda$package <- "proda"
prolfqua <- benchmark_prolfqua$pAUC_summaries()$ftable$content
prolfqua$package <- "prolfqua"

tmp <- dplyr::bind_rows(proda, prolfqua)
tmp$what |> unique()
## [1] "diff"           "scaled.pval"    "t_statistic"    "scaled.p.value"
## [5] "statistic"
tmp$what[tmp$what == "statistic"] <- "t_statistic"
tmp$what[tmp$what == "scaled.pval"] <- "scaled.p.value"

tmp |> ggplot2::ggplot(ggplot2::aes(x = what, y = pAUC_10, group = package, color = NULL, fill = package)) +
  ggplot2::geom_bar(stat = "identity",  position = ggplot2::position_dodge()) +
  ggplot2::facet_wrap(~ contrast) +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1))