Please download and install the prolfquadata package from github

conflicted::conflict_prefer("filter", "dplyr")

Data preprocessing

Load data

datadir <- file.path(find.package("prolfquadata") , "quantdata")
inputFragfile <-  file.path(datadir, "MSFragger_IonStar2018_PXD003881.zip")
inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx")
annotation <- readxl::read_xlsx(inputAnnotation)
## Error in get(paste0(generic, ".", class), envir = get_method_env()) : 
##   object 'type_sum.accel' not found
unzip(inputFragfile, list = TRUE)
##                                                       Name    Length
## 1                IonstarWithMSFragger/combined_peptide.tsv  10202897
## 2                IonstarWithMSFragger/combined_protein.tsv   5288537
## 3                      IonstarWithMSFragger/fragger.params      8423
## 4 IonstarWithMSFragger/fragpipe_2021-02-18_20-02-51.config      7509
## 5         IonstarWithMSFragger/log_2021-02-18_23-40-52.txt    585337
## 6                         IonstarWithMSFragger/mbr_ion.tsv  52755282
## 7                         IonstarWithMSFragger/MSstats.csv 147944334
## 8                     IonstarWithMSFragger/reprint.int.tsv    861960
## 9                     IonstarWithMSFragger/reprint.spc.tsv    276563
##                  Date
## 1 2021-02-18 23:40:00
## 2 2021-02-18 23:40:00
## 3 2021-02-18 20:02:00
## 4 2021-02-18 20:02:00
## 5 2021-02-18 23:41:00
## 6 2021-02-18 23:39:00
## 7 2021-02-18 23:40:00
## 8 2021-02-18 23:40:00
## 9 2021-02-18 22:47:00
msstats <- read.csv(
    unz(inputFragfile,"IonstarWithMSFragger/MSstats.csv"),
    header = TRUE,
    sep = ",",
    stringsAsFactors = FALSE)



msstats <- tibble::tibble(msstats)
msstats$Run <- tolower(msstats$Run)

merged <- dplyr::inner_join(annotation, msstats, by = c("raw.file" = "Run"))

summedPep <- merged |> dplyr::group_by(raw.file, run_ID, sample, ProteinName, PeptideSequence) |> dplyr::summarize(nprecursor = dplyr::n(),pepIntensity = sum(Intensity, na.rm = TRUE), .groups = "drop")

Create prolfqua configuration

atable <- prolfqua::AnalysisTableAnnotation$new()
atable$fileName = "raw.file"
atable$hierarchy[["protein_Id"]] <- c("ProteinName")
atable$hierarchy[["peptide_Id"]] <- c("PeptideSequence")

atable$hierarchyDepth <- 1
atable$set_response("pepIntensity")
atable$factors[["dilution."]] = "sample"
atable$factors[["run"]] = "run_ID"
atable$factorDepth <- 1

config <- prolfqua::AnalysisConfiguration$new(atable)

adata <- prolfqua::setup_analysis(summedPep, config)
lfqdata <- prolfqua::LFQData$new(adata, config)
lfqdata$hierarchy_counts()
## # A tibble: 1 × 3
##   isotopeLabel protein_Id peptide_Id
##   <chr>             <int>      <int>
## 1 light              5122      31107
lfqdata$filter_proteins_by_peptide_count()
lfqdata$hierarchy_counts()
## # A tibble: 1 × 3
##   isotopeLabel protein_Id peptide_Id
##   <chr>             <int>      <int>
## 1 light              3904      29889
lfqdata$remove_small_intensities()
lfqdata$hierarchy_counts()
## # A tibble: 1 × 3
##   isotopeLabel protein_Id peptide_Id
##   <chr>             <int>      <int>
## 1 light              3904      29889
knitr::kable(lfqdata$hierarchy_counts(), caption = "number of proteins and peptides.")
number of proteins and peptides.
isotopeLabel protein_Id peptide_Id
light 3904 29889

Normalize data using human proteins

pl <- lfqdata$get_Plotter()
pl$intensity_distribution_density()

subset_h <- lfqdata$get_copy()$get_Transformer()$log2()$lfq
subset_h$data <- subset_h$data |> dplyr::filter(grepl("HUMAN", protein_Id))
tr <- lfqdata$get_Transformer()
lfqdataPeptideNorm <- tr$log2()$robscale_subset(lfqsubset = subset_h)$lfq
pl <- lfqdataPeptideNorm$get_Plotter()
pl$intensity_distribution_density()

hm <- pl$NA_heatmap()
hm

Aggregate proteins

agg <- lfqdataPeptideNorm$get_Aggregator()
agg$medpolish()
lfqdataNormalized <- agg$lfq_agg
lfqdataNormalized$hierarchy_counts()
## # A tibble: 1 × 2
##   isotopeLabel protein_Id
##   <chr>             <int>
## 1 light              3904

DEA using prolfqua

Contrasts <- c(
  "dilution_(9/7.5)_1.2" =   "dilution.e - dilution.d",
  "dilution_(7.5/6)_1.25" =   "dilution.d - dilution.c",
  "dilution_(6/4.5)_1.3(3)" =   "dilution.c - dilution.b",
  "dilution_(4.5/3)_1.5" =   "dilution.b - dilution.a"
)


lmmodel <- "~ dilution."
lmmodel <- paste0(lfqdataNormalized$config$table$get_response() , lmmodel)

modelFunction <- prolfqua::strategy_lm( lmmodel, model_name = "Model")


mod <- prolfqua::build_model(lfqdataNormalized$data, modelFunction)
contr <- prolfqua::Contrasts$new(mod, Contrasts)
contrimp <- prolfqua::ContrastsMissing$new(lfqdataNormalized, Contrasts)

merged <- prolfqua::merge_contrasts_results(contr, contrimp)
mergedmod <- prolfqua::ContrastsModerated$new(merged$merged)

cp <- mergedmod$get_Plotter()
cp$volcano()$FDR

Benchmarking

ttd <- prolfqua::ionstar_bench_preprocess(mergedmod$get_contrasts())
benchmark_prolfqua <- prolfqua::make_benchmark(ttd$data,
                                             model_description = "prolfqua_merged",
                                             model_name = "prolfqua_merged",
                                             FDRvsFDP = list(list(score = "FDR", desc = FALSE))
)


knitr::kable(benchmark_prolfqua$pAUC_summaries()$ftable$content)
contrast what AUC pAUC_10 pAUC_20
all diff 95.08941 75.62771 85.05479
all scaled.p.value 96.04553 83.17176 87.64209
all statistic 96.03922 83.09677 87.62712
dilution_(4.5/3)_1.5 diff 93.72267 81.39620 85.65552
dilution_(4.5/3)_1.5 scaled.p.value 93.58459 83.35680 85.19206
dilution_(4.5/3)_1.5 statistic 93.58722 83.28201 85.20315
dilution_(6/4.5)_1.3(3) diff 95.03489 82.50829 88.05018
dilution_(6/4.5)_1.3(3) scaled.p.value 96.92545 88.24154 90.65322
dilution_(6/4.5)_1.3(3) statistic 96.90698 88.16417 90.61789
dilution_(7.5/6)_1.25 diff 94.31033 63.86013 79.61097
dilution_(7.5/6)_1.25 scaled.p.value 94.87680 78.15999 83.92652
dilution_(7.5/6)_1.25 statistic 94.86947 77.91034 83.86978
dilution_(9/7.5)_1.2 diff 96.50924 74.39250 86.26315
dilution_(9/7.5)_1.2 scaled.p.value 98.23989 88.83473 93.14924
dilution_(9/7.5)_1.2 statistic 98.22705 88.70681 93.08966
prolfqua::table_facade(benchmark_prolfqua$smc$summary, "Nr of estimated contrasts")
Nr of estimated contrasts
nr_missing protein_Id
0 3899
benchmark_prolfqua$plot_score_distribution()

benchmark_prolfqua$plot_ROC(0.05)

benchmark_prolfqua$plot_FDRvsFDP()

DEA using proDA

se <- prolfqua::LFQDataToSummarizedExperiment(lfqdataNormalized)
fit <- proDA::proDA(se, design = ~ dilution. - 1, data_is_log_transformed = TRUE)

contr <- list()
contr[["dilution_(9/7.5)_1.2"]] <- data.frame(
  contrast = "dilution_(9/7.5)_1.2",
  proDA::test_diff(fit, contrast = "dilution.e - dilution.d"))
contr[["dilution_(7.5/6)_1.25"]] <- data.frame(
  contrast = "dilution_(7.5/6)_1.25",
  proDA::test_diff(fit, contrast = "dilution.d - dilution.c"))
contr[["dilution_(6/4.5)_1.3(3)"]] <- data.frame(
  contrast = "dilution_(6/4.5)_1.3(3)", 
  proDA::test_diff(fit, contrast = "dilution.c - dilution.b"))
contr[["dilution_(4.5/3)_1.5"]] <- data.frame(
  contrast = "dilution_(4.5/3)_1.5", 
  proDA::test_diff(fit, contrast = "dilution.b - dilution.a" ))

bb <- dplyr::bind_rows(contr)

Benchmarking

ttd <- prolfqua::ionstar_bench_preprocess( bb , idcol = "name" )

benchmark_proDA <- prolfqua::make_benchmark(ttd$data,
                                            contrast = "contrast",
                                            toscale = c("pval"),
                                            fcestimate = "diff",
                                            benchmark = list(
                                              list(score = "diff", desc = TRUE),
                                              list(score = "t_statistic", desc = TRUE),
                                              list(score = "scaled.pval", desc = TRUE)
                                            ),  
                                            model_description = "proDA_medpolishInt",
                                            model_name = "proDA",
                                            FDRvsFDP = list(list(score = "adj_pval", desc = FALSE))
                                            , hierarchy = c("name"), summarizeNA = "t_statistic"
)

sumarry <- benchmark_proDA$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")
nr of proteins with 0, 1, 2, 3 missing contrasts.
nr_missing name
0 3899
knitr::kable(benchmark_proDA$pAUC_summaries()$ftable$content)
contrast what AUC pAUC_10 pAUC_20
all diff 94.93411 75.93204 85.38220
all scaled.pval 95.09318 80.01212 84.66020
all t_statistic 95.09318 80.01212 84.66020
dilution_(4.5/3)_1.5 diff 94.63617 83.29955 87.92716
dilution_(4.5/3)_1.5 scaled.pval 92.90655 81.24404 83.66242
dilution_(4.5/3)_1.5 t_statistic 92.90655 81.24404 83.66242
dilution_(6/4.5)_1.3(3) diff 94.98945 85.11985 89.62882
dilution_(6/4.5)_1.3(3) scaled.pval 96.47262 85.71922 88.54897
dilution_(6/4.5)_1.3(3) t_statistic 96.47262 85.71922 88.54897
dilution_(7.5/6)_1.25 diff 93.61285 61.76129 77.63309
dilution_(7.5/6)_1.25 scaled.pval 93.28098 74.51681 80.27637
dilution_(7.5/6)_1.25 t_statistic 93.28098 74.51681 80.27637
dilution_(9/7.5)_1.2 diff 96.27883 73.69306 85.71381
dilution_(9/7.5)_1.2 scaled.pval 97.49176 84.89702 89.57879
dilution_(9/7.5)_1.2 t_statistic 97.49176 84.89702 89.57879
prolfqua::table_facade(benchmark_proDA$smc$summary, "Nr of estimated contrasts")
Nr of estimated contrasts
nr_missing name
0 3899
benchmark_proDA$plot_score_distribution()

benchmark_proDA$plot_ROC(0.05)

benchmark_proDA$plot_FDRvsFDP()

DEA using msqrob2

To use proDA, we need to create an SummarizedExperiment. We use the to_wide function of prolfqua to get the data in in the SummarizedExperiment compatible format.

Defining Contrasts and computing group comparisons

As usual, two steps are required, first fit the models, then compute the contrasts.

prlm <- msqrobHurdle(pe,
                     i = "protein",
                     formula = ~dilution.,
                     overwrite = TRUE)

Since msqrob does not report average abundances, we are computing them for each contrast.

st <- lfqdataNormalized$get_Stats()
protAbundanceIngroup <- st$stats()
protAbundanceIngroup <- protAbundanceIngroup |>
  tidyr::pivot_wider(
    id_cols = protein_Id,
    names_from = dilution.,
    names_prefix = "abd.",
    values_from = meanAbundance)
L <- makeContrast(c("dilution.e-dilution.d=0",
                    "dilution.d-dilution.c=0",
                    "dilution.c-dilution.b=0",
                    "dilution.b=0"),
                  parameterNames = c("dilution.e",
                                     "dilution.d",
                                     "dilution.c",
                                     "dilution.b"))
prlm <- hypothesisTestHurdle(prlm, i = "protein", L, overwrite = TRUE)
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.e.d =  mean( c(abd.e,abd.d), na.rm = TRUE)  )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.d.c =  mean( c(abd.d,abd.c), na.rm = TRUE)  )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.c.b =  mean( c(abd.c,abd.b), na.rm = TRUE)  )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.b.a =  mean( c(abd.b,abd.a), na.rm = TRUE)  )


xx <- rowData(prlm[["protein"]])
hurdle <- xx[grepl("hurdle_",names(xx))]

res <- list()
for (i in names(hurdle)) { 
  hurdle[[i]]$contrast <- i
  res[[i]] <- prolfqua::matrix_to_tibble(hurdle[[i]], preserve_row_names = "name")
}

hurdle <- dplyr::bind_rows(res)

Now we need to merge the results of both models msqrobHurdleIntensity and msqrobHurdleCount. To find out which models were not estimated by msqrobHurdleIntensity we check for NA’s and use the anti_join to select those from the msqrobHurdleCount models.

logFC <- hurdle |> dplyr::select("name","contrast", starts_with("logFC"))
logFC <- dplyr::filter(logFC ,!is.na(logFCt))

logFC$modelName <- "msqrobHurdleIntensity"
names(logFC) <- c("name","contrast","logFC","se","df","t","pval","modelName")

logOR <- hurdle |> dplyr::select("name","contrast", starts_with("logOR"))
logOR$modelName <- "msqrobHurdleCount"
names(logOR) <- c("name","contrast","logFC","se","df","t","pval","modelName")


ddd <- dplyr::anti_join(logOR , logFC, by = c("name", "contrast"))
all <- dplyr::bind_rows(ddd , logFC) |> dplyr::arrange(contrast, name)
all <- prolfqua::adjust_p_values(all, column = "pval", group_by_col = "contrast")
all$contrast |> unique()
## [1] "hurdle_dilution.b"              "hurdle_dilution.c - dilution.b"
## [3] "hurdle_dilution.d - dilution.c" "hurdle_dilution.e - dilution.d"
protAbundanceIngroup <- protAbundanceIngroup |> 
  dplyr::select(-starts_with("abd")) |> 
  tidyr::pivot_longer(starts_with("avgAbd"), names_to = "contrast" ,values_to = "avgAbd")

protAbundanceIngroup <- protAbundanceIngroup |> 
  dplyr::mutate(contrast = 
           dplyr::case_when(contrast == "avgAbd.e.d" ~ "dilution_(9/7.5)_1.2",
                     contrast == "avgAbd.d.c" ~ "dilution_(7.5/6)_1.25",
                     contrast == "avgAbd.c.b" ~ "dilution_(6/4.5)_1.3(3)",
                     contrast == "avgAbd.b.a" ~ "dilution_(4.5/3)_1.5",
                     TRUE ~ "something wrong"))
all <- all |> dplyr::mutate(contrast = dplyr::case_when(
  contrast == "hurdle_dilution.e - dilution.d" ~ "dilution_(9/7.5)_1.2",
  contrast == "hurdle_dilution.d - dilution.c" ~ "dilution_(7.5/6)_1.25",
  contrast == "hurdle_dilution.c - dilution.b" ~ "dilution_(6/4.5)_1.3(3)",
  contrast == "hurdle_dilution.b" ~ "dilution_(4.5/3)_1.5",
  TRUE ~ "something wrong"))
stopifnot(sum(all$contrast == "something wrong") == 0 )
stopifnot(sum(all$contrast == "something wrong") == 0 )


bb <- dplyr::inner_join(all, protAbundanceIngroup, by = c("name" = "protein_Id", "contrast" = "contrast"))

Benchmarking

Here we use proflqua benchmark functions to generate some summaries.

ttd <- prolfqua::ionstar_bench_preprocess( bb , idcol = "name" )
benchmark_msqrob <- prolfqua::make_benchmark(ttd$data,
                                             contrast = "contrast",
                                             toscale = c("pval"),
                                             fcestimate = "logFC",
                                             benchmark = list(
                                               list(score = "logFC", desc = TRUE),
                                               list(score = "t", desc = TRUE),
                                               list(score = "scaled.pval", desc = TRUE)
                                             ),  
                                             model_description = "msqrob_QFeature",
                                             model_name = "msqrob2",
                                             FDRvsFDP = list(list(score = "FDR", desc = FALSE))
                                             , hierarchy = c("name"), summarizeNA = "t"
)

sumarry <- benchmark_msqrob$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")
nr of proteins with 0, 1, 2, 3 missing contrasts.
nr_missing name
0 3899
res <- benchmark_msqrob$pAUC_summaries()
knitr::kable(res$ftable$content,caption = res$ftable$caption)
AUC, and pAUC at 0.1 and 0.2 FPR for (NC) msqrob_QFeature
contrast what AUC pAUC_10 pAUC_20
all logFC 95.28429 77.26238 86.27762
all scaled.pval 96.11219 83.36280 87.89590
all t 96.08561 83.14210 87.76231
dilution_(4.5/3)_1.5 logFC 95.95577 85.38291 90.42446
dilution_(4.5/3)_1.5 scaled.pval 95.75265 85.54075 88.85923
dilution_(4.5/3)_1.5 t 95.71773 85.23075 88.68102
dilution_(6/4.5)_1.3(3) logFC 95.77607 85.56448 90.53485
dilution_(6/4.5)_1.3(3) scaled.pval 97.05394 88.87187 91.97448
dilution_(6/4.5)_1.3(3) t 97.03145 88.70474 91.89117
dilution_(7.5/6)_1.25 logFC 93.49705 65.08673 79.62722
dilution_(7.5/6)_1.25 scaled.pval 94.18917 78.51848 83.36609
dilution_(7.5/6)_1.25 t 94.15678 78.33434 83.18942
dilution_(9/7.5)_1.2 logFC 95.72366 73.87153 84.78715
dilution_(9/7.5)_1.2 scaled.pval 97.60333 86.18294 90.60964
dilution_(9/7.5)_1.2 t 97.58486 85.98590 90.50751
res$barp
ROC curves

ROC curves

#res$ftable
benchmark_msqrob$plot_ROC(xlim = 0.2)
plot ROC curves

plot ROC curves

benchmark_msqrob$plot_FDRvsFDP()
plot FDR vs FDP

plot FDR vs FDP

benchmark_msqrob$plot_precision_recall()

DEA using MSstats

annotation <- readxl::read_xlsx(inputAnnotation)

msstats2 <- read.csv(
    unz(inputFragfile,"IonstarWithMSFragger/MSstats.csv"),
    header = TRUE,
    sep = ",",
    stringsAsFactors = FALSE)

msstats2$BioReplicate <- NULL
msstats2$Run <- tolower(msstats2$Run)
msstats2$Condition <- NULL


#msstats$BioReplicate <- NULL
annotation2 <- annotation
annotation2$BioReplicate <- annotation2$run_ID
annotation2$run_ID <- NULL
annotation2$date <- NULL
annotation2$Condition <- annotation2$sample
annotation2$sample <- NULL


msstats2 <- dplyr::inner_join(msstats2 , annotation2, by = c(Run = "raw.file"))
pin <- unique(lfqdataNormalized$data$protein_Id)

# keep only the same proteins which are analysed also with msqrob2, proDA and prolfqua
msstats2 <- dplyr::filter(msstats2, ProteinName %in% pin)
msstats2$ProteinName |> unique() |> length()
## [1] 3904
msstatscolumns <- c("ProteinName", "PeptideSequence",  "PrecursorCharge" , "FragmentIon" ,     "ProductCharge"   , "IsotopeLabelType", "Condition","BioReplicate","Run",    
"Fraction" ,"Intensity" )
invisible(utils::capture.output(
    QuantData <- MSstats::dataProcess(msstats2, use_log_file = FALSE, verbose = FALSE)
    ))
# based on multiple comparisons  (T1 vs T3; T1 vs T7; T1 vs T9)
comparison1 <- matrix(c(0,0,0,-1,1),nrow = 1)
comparison2 <- matrix(c(0,0,-1,1,0),nrow = 1)
comparison3 <- matrix(c(0,-1,1,0,0),nrow = 1)
comparison4 <- matrix(c(-1,1,0,0,0),nrow = 1)
comparison <- rbind(comparison1,comparison2, comparison3, comparison4)
row.names(comparison) <- c("dilution_(9/7.5)_1.2","dilution_(7.5/6)_1.25","dilution_(6/4.5)_1.3(3)","dilution_(4.5/3)_1.5")
colnames(comparison) <- c("a","b","c","d","e")
invisible(utils::capture.output(
    testResultMultiComparisons <- MSstats::groupComparison(
    contrast.matrix = comparison,
    data = QuantData,
    verbose = FALSE,
    use_log_file = FALSE)
    ))
bb <- testResultMultiComparisons$ComparisonResult

Benchmarking

library(prolfqua)
ttd <- prolfqua::ionstar_bench_preprocess(bb, idcol = "Protein")
benchmark_msstats <- prolfqua::make_benchmark(
    ttd$data,
    contrast = "Label",
    toscale = c("pvalue"),
    fcestimate = "log2FC",
    benchmark = list(
        list(score = "log2FC", desc = TRUE),
        list(score = "Tvalue", desc = TRUE),
        list(score = "scaled.pvalue", desc = TRUE)
    ),  
    model_description = "MSStats",
    model_name = "MSStats",
    FDRvsFDP = list(list(score = "adj.pvalue", desc = FALSE))
    , hierarchy = c("Protein"), summarizeNA = "Tvalue"
)
sum(benchmark_msstats$smc$summary$Protein)
## [1] 3897
sumarry <- benchmark_msstats$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")
nr of proteins with 0, 1, 2, 3 missing contrasts.
nr_missing Protein
0 3798
1 61
2 26
3 6
4 6

Comparing DEA results from prolfqua, proda, msqrob2, and MSstats

Direct comparison with msqrob2 is impossible since, to fit the dropout model, the peptide intensities are required, while here, we are starting the analysis from the combined_proteins.tsv file.

allB <- list(benchmark_proDA = benchmark_proDA,
             benchmark_prolfqua = benchmark_prolfqua,
             benchmark_msqrob = benchmark_msqrob,
             benchmark_msstats = benchmark_msstats)

bdir <- file.path("../inst/Benchresults/",format( Sys.Date(), "%Y%m%d"))
if (!dir.exists(bdir)) {dir.create(bdir)}

saveRDS(allB
  ,file.path("../inst/Benchresults/",format( Sys.Date(), "%Y%m%d"),"FragPipev14_comb_MSStats.RDS"))

proda <- allB$benchmark_proDA$pAUC_summaries()$ftable$content
proda$package <- "proda"
prolfqua <- allB$benchmark_prolfqua$pAUC_summaries()$ftable$content
prolfqua$package <- "prolfqua"
msqrob2 <- allB$benchmark_msqrob$pAUC_summaries()$ftable$content
msqrob2$package <- "msqrob2"
tmp <- dplyr::bind_rows(proda, prolfqua, msqrob2)
bmsstats <- allB$benchmark_msstats$pAUC_summaries()$ftable$content
bmsstats$package <- "MSstats"
bmsstats$contrast <- bmsstats$Label
bmsstats$Label <- NULL
tmp <- dplyr::bind_rows(list(proda, prolfqua, msqrob2, bmsstats))


tmp$what[tmp$what == "statistic"] <- "t_statistic"
tmp$what[tmp$what == "scaled.pval"] <- "scaled.p.value"
tmp$what[tmp$what == "scaled.pvalue"] <- "scaled.p.value"

tmp$what[tmp$what == "logFC"] <- "diff"
tmp$what[tmp$what == "log2FC"] <- "diff"
tmp$what[tmp$what == "t"] <- "t_statistic"
tmp$what[tmp$what == "Tvalue"] <- "t_statistic"
tmp$what |> unique()
## [1] "diff"           "scaled.p.value" "t_statistic"
tmp |> ggplot2::ggplot(ggplot2::aes(x = what, y = pAUC_10, group = package, color = NULL, fill = package)) +
  ggplot2::geom_bar(stat = "identity",  position = ggplot2::position_dodge()) +
  ggplot2::facet_wrap(~ contrast) +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1))

all <- tmp |> dplyr::filter(contrast == "all")

knitr::kable(tmp |> dplyr::filter(contrast == "all"), caption = "Table")
Table
contrast what AUC pAUC_10 pAUC_20 package
all diff 94.93411 75.93204 85.38220 proda
all scaled.p.value 95.09318 80.01212 84.66020 proda
all t_statistic 95.09318 80.01212 84.66020 proda
all diff 95.08941 75.62771 85.05479 prolfqua
all scaled.p.value 96.04553 83.17176 87.64209 prolfqua
all t_statistic 96.03922 83.09677 87.62712 prolfqua
all diff 95.28429 77.26238 86.27762 msqrob2
all scaled.p.value 96.11219 83.36280 87.89590 msqrob2
all t_statistic 96.08561 83.14210 87.76231 msqrob2
all t_statistic 96.57121 81.46841 87.43125 MSstats
all diff 95.41737 72.92764 84.40111 MSstats
all scaled.p.value 96.57664 81.52092 87.43752 MSstats
ggplot2::ggplot(all, ggplot2::aes(x = package, y = pAUC_10)) +
  ggplot2::geom_bar(stat = "identity") +
  ggplot2::facet_wrap(~what)  + 
  ggplot2::theme_minimal() + 
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = -90, vjust = 0.5)) +
  ggplot2::xlab("")

a1 <- allB$benchmark_proDA$smc$summary
names(a1)[2] <- "protein_Id"
a1$name <- allB$benchmark_proDA$model_name

a2 <- allB$benchmark_prolfqua$smc$summary
names(a2)[2] <- "protein_Id"
a2$name <- allB$benchmark_prolfqua$model_name

a3 <- allB$benchmark_msqrob$smc$summary
names(a3)[2] <- "protein_Id"
a3$name <- allB$benchmark_msqrob$model_name

a4 <- allB$benchmark_msstats$smc$summary
names(a4)[2] <- "protein_Id"
a4$name <- allB$benchmark_msstats$model_name
dd <- dplyr::bind_rows(list(a1,a2,a3,a4))
dd <- dd |> dplyr::mutate(nrcontrasts = protein_Id * (4 - nr_missing))
dds <- dd |> dplyr::group_by(name) |> dplyr::summarize(nrcontrasts = sum(nrcontrasts))
dds$percent <- dds$nrcontrasts/max(dds$nrcontrasts) * 100

nrgg <- dds |> ggplot2::ggplot(ggplot2::aes(x = name, y = nrcontrasts )) + 
  ggplot2::geom_bar(stat = "identity", fill = "white", colour = "black") + 
  ggplot2::coord_cartesian(ylim = c(min(dds$nrcontrasts) - 200, max(dds$nrcontrasts) + 10)) +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = -90, vjust = 0.5)) +
  ggplot2::geom_text(ggplot2::aes(label = paste0(round(nrcontrasts, digits = 1), paste0("  (",round(percent, digits = 1),"%)"))),
            vjust = 0, hjust = -0.2, angle = -90) #+ 
nrgg
Number of comparisons for each method

Number of comparisons for each method