vignettes/BenchmarkFragPipeMSStats.Rmd
      BenchmarkFragPipeMSStats.RmdPlease download and install the prolfquadata package
from github
conflicted::conflict_prefer("filter", "dplyr")
datadir <- file.path(find.package("prolfquadata") , "quantdata")
inputFragfile <-  file.path(datadir, "MSFragger_IonStar2018_PXD003881.zip")
inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx")
annotation <- readxl::read_xlsx(inputAnnotation)## Error in get(paste0(generic, ".", class), envir = get_method_env()) : 
##   object 'type_sum.accel' not found
unzip(inputFragfile, list = TRUE)##                                                       Name    Length
## 1                IonstarWithMSFragger/combined_peptide.tsv  10202897
## 2                IonstarWithMSFragger/combined_protein.tsv   5288537
## 3                      IonstarWithMSFragger/fragger.params      8423
## 4 IonstarWithMSFragger/fragpipe_2021-02-18_20-02-51.config      7509
## 5         IonstarWithMSFragger/log_2021-02-18_23-40-52.txt    585337
## 6                         IonstarWithMSFragger/mbr_ion.tsv  52755282
## 7                         IonstarWithMSFragger/MSstats.csv 147944334
## 8                     IonstarWithMSFragger/reprint.int.tsv    861960
## 9                     IonstarWithMSFragger/reprint.spc.tsv    276563
##                  Date
## 1 2021-02-18 23:40:00
## 2 2021-02-18 23:40:00
## 3 2021-02-18 20:02:00
## 4 2021-02-18 20:02:00
## 5 2021-02-18 23:41:00
## 6 2021-02-18 23:39:00
## 7 2021-02-18 23:40:00
## 8 2021-02-18 23:40:00
## 9 2021-02-18 22:47:00
msstats <- read.csv(
    unz(inputFragfile,"IonstarWithMSFragger/MSstats.csv"),
    header = TRUE,
    sep = ",",
    stringsAsFactors = FALSE)
msstats <- tibble::tibble(msstats)
msstats$Run <- tolower(msstats$Run)
merged <- dplyr::inner_join(annotation, msstats, by = c("raw.file" = "Run"))
summedPep <- merged |> dplyr::group_by(raw.file, run_ID, sample, ProteinName, PeptideSequence) |> dplyr::summarize(nprecursor = dplyr::n(),pepIntensity = sum(Intensity, na.rm = TRUE), .groups = "drop")
atable <- prolfqua::AnalysisTableAnnotation$new()
atable$fileName = "raw.file"
atable$hierarchy[["protein_Id"]] <- c("ProteinName")
atable$hierarchy[["peptide_Id"]] <- c("PeptideSequence")
atable$hierarchyDepth <- 1
atable$set_response("pepIntensity")
atable$factors[["dilution."]] = "sample"
atable$factors[["run"]] = "run_ID"
atable$factorDepth <- 1
config <- prolfqua::AnalysisConfiguration$new(atable)
adata <- prolfqua::setup_analysis(summedPep, config)
lfqdata <- prolfqua::LFQData$new(adata, config)
lfqdata$hierarchy_counts()## # A tibble: 1 × 3
##   isotopeLabel protein_Id peptide_Id
##   <chr>             <int>      <int>
## 1 light              5122      31107
lfqdata$filter_proteins_by_peptide_count()
lfqdata$hierarchy_counts()## # A tibble: 1 × 3
##   isotopeLabel protein_Id peptide_Id
##   <chr>             <int>      <int>
## 1 light              3904      29889
lfqdata$remove_small_intensities()
lfqdata$hierarchy_counts()## # A tibble: 1 × 3
##   isotopeLabel protein_Id peptide_Id
##   <chr>             <int>      <int>
## 1 light              3904      29889
knitr::kable(lfqdata$hierarchy_counts(), caption = "number of proteins and peptides.")| isotopeLabel | protein_Id | peptide_Id | 
|---|---|---|
| light | 3904 | 29889 | 
pl <- lfqdata$get_Plotter()
pl$intensity_distribution_density()
subset_h <- lfqdata$get_copy()$get_Transformer()$log2()$lfq
subset_h$data <- subset_h$data |> dplyr::filter(grepl("HUMAN", protein_Id))
tr <- lfqdata$get_Transformer()
lfqdataPeptideNorm <- tr$log2()$robscale_subset(lfqsubset = subset_h)$lfq
pl <- lfqdataPeptideNorm$get_Plotter()
pl$intensity_distribution_density()
hm <- pl$NA_heatmap()
hm
Contrasts <- c(
  "dilution_(9/7.5)_1.2" =   "dilution.e - dilution.d",
  "dilution_(7.5/6)_1.25" =   "dilution.d - dilution.c",
  "dilution_(6/4.5)_1.3(3)" =   "dilution.c - dilution.b",
  "dilution_(4.5/3)_1.5" =   "dilution.b - dilution.a"
)
lmmodel <- "~ dilution."
lmmodel <- paste0(lfqdataNormalized$config$table$get_response() , lmmodel)
modelFunction <- prolfqua::strategy_lm( lmmodel, model_name = "Model")
mod <- prolfqua::build_model(lfqdataNormalized$data, modelFunction)
contr <- prolfqua::Contrasts$new(mod, Contrasts)
contrimp <- prolfqua::ContrastsMissing$new(lfqdataNormalized, Contrasts)
merged <- prolfqua::merge_contrasts_results(contr, contrimp)
mergedmod <- prolfqua::ContrastsModerated$new(merged$merged)
cp <- mergedmod$get_Plotter()
cp$volcano()$FDR
ttd <- prolfqua::ionstar_bench_preprocess(mergedmod$get_contrasts())
benchmark_prolfqua <- prolfqua::make_benchmark(ttd$data,
                                             model_description = "prolfqua_merged",
                                             model_name = "prolfqua_merged",
                                             FDRvsFDP = list(list(score = "FDR", desc = FALSE))
)
knitr::kable(benchmark_prolfqua$pAUC_summaries()$ftable$content)| contrast | what | AUC | pAUC_10 | pAUC_20 | 
|---|---|---|---|---|
| all | diff | 95.08941 | 75.62771 | 85.05479 | 
| all | scaled.p.value | 96.04553 | 83.17176 | 87.64209 | 
| all | statistic | 96.03922 | 83.09677 | 87.62712 | 
| dilution_(4.5/3)_1.5 | diff | 93.72267 | 81.39620 | 85.65552 | 
| dilution_(4.5/3)_1.5 | scaled.p.value | 93.58459 | 83.35680 | 85.19206 | 
| dilution_(4.5/3)_1.5 | statistic | 93.58722 | 83.28201 | 85.20315 | 
| dilution_(6/4.5)_1.3(3) | diff | 95.03489 | 82.50829 | 88.05018 | 
| dilution_(6/4.5)_1.3(3) | scaled.p.value | 96.92545 | 88.24154 | 90.65322 | 
| dilution_(6/4.5)_1.3(3) | statistic | 96.90698 | 88.16417 | 90.61789 | 
| dilution_(7.5/6)_1.25 | diff | 94.31033 | 63.86013 | 79.61097 | 
| dilution_(7.5/6)_1.25 | scaled.p.value | 94.87680 | 78.15999 | 83.92652 | 
| dilution_(7.5/6)_1.25 | statistic | 94.86947 | 77.91034 | 83.86978 | 
| dilution_(9/7.5)_1.2 | diff | 96.50924 | 74.39250 | 86.26315 | 
| dilution_(9/7.5)_1.2 | scaled.p.value | 98.23989 | 88.83473 | 93.14924 | 
| dilution_(9/7.5)_1.2 | statistic | 98.22705 | 88.70681 | 93.08966 | 
prolfqua::table_facade(benchmark_prolfqua$smc$summary, "Nr of estimated contrasts")| nr_missing | protein_Id | 
|---|---|
| 0 | 3899 | 
benchmark_prolfqua$plot_score_distribution()
benchmark_prolfqua$plot_ROC(0.05)
benchmark_prolfqua$plot_FDRvsFDP()
se <- prolfqua::LFQDataToSummarizedExperiment(lfqdataNormalized)
fit <- proDA::proDA(se, design = ~ dilution. - 1, data_is_log_transformed = TRUE)
contr <- list()
contr[["dilution_(9/7.5)_1.2"]] <- data.frame(
  contrast = "dilution_(9/7.5)_1.2",
  proDA::test_diff(fit, contrast = "dilution.e - dilution.d"))
contr[["dilution_(7.5/6)_1.25"]] <- data.frame(
  contrast = "dilution_(7.5/6)_1.25",
  proDA::test_diff(fit, contrast = "dilution.d - dilution.c"))
contr[["dilution_(6/4.5)_1.3(3)"]] <- data.frame(
  contrast = "dilution_(6/4.5)_1.3(3)", 
  proDA::test_diff(fit, contrast = "dilution.c - dilution.b"))
contr[["dilution_(4.5/3)_1.5"]] <- data.frame(
  contrast = "dilution_(4.5/3)_1.5", 
  proDA::test_diff(fit, contrast = "dilution.b - dilution.a" ))
bb <- dplyr::bind_rows(contr)
ttd <- prolfqua::ionstar_bench_preprocess( bb , idcol = "name" )
benchmark_proDA <- prolfqua::make_benchmark(ttd$data,
                                            contrast = "contrast",
                                            toscale = c("pval"),
                                            fcestimate = "diff",
                                            benchmark = list(
                                              list(score = "diff", desc = TRUE),
                                              list(score = "t_statistic", desc = TRUE),
                                              list(score = "scaled.pval", desc = TRUE)
                                            ),  
                                            model_description = "proDA_medpolishInt",
                                            model_name = "proDA",
                                            FDRvsFDP = list(list(score = "adj_pval", desc = FALSE))
                                            , hierarchy = c("name"), summarizeNA = "t_statistic"
)
sumarry <- benchmark_proDA$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")| nr_missing | name | 
|---|---|
| 0 | 3899 | 
knitr::kable(benchmark_proDA$pAUC_summaries()$ftable$content)| contrast | what | AUC | pAUC_10 | pAUC_20 | 
|---|---|---|---|---|
| all | diff | 94.93411 | 75.93204 | 85.38220 | 
| all | scaled.pval | 95.09318 | 80.01212 | 84.66020 | 
| all | t_statistic | 95.09318 | 80.01212 | 84.66020 | 
| dilution_(4.5/3)_1.5 | diff | 94.63617 | 83.29955 | 87.92716 | 
| dilution_(4.5/3)_1.5 | scaled.pval | 92.90655 | 81.24404 | 83.66242 | 
| dilution_(4.5/3)_1.5 | t_statistic | 92.90655 | 81.24404 | 83.66242 | 
| dilution_(6/4.5)_1.3(3) | diff | 94.98945 | 85.11985 | 89.62882 | 
| dilution_(6/4.5)_1.3(3) | scaled.pval | 96.47262 | 85.71922 | 88.54897 | 
| dilution_(6/4.5)_1.3(3) | t_statistic | 96.47262 | 85.71922 | 88.54897 | 
| dilution_(7.5/6)_1.25 | diff | 93.61285 | 61.76129 | 77.63309 | 
| dilution_(7.5/6)_1.25 | scaled.pval | 93.28098 | 74.51681 | 80.27637 | 
| dilution_(7.5/6)_1.25 | t_statistic | 93.28098 | 74.51681 | 80.27637 | 
| dilution_(9/7.5)_1.2 | diff | 96.27883 | 73.69306 | 85.71381 | 
| dilution_(9/7.5)_1.2 | scaled.pval | 97.49176 | 84.89702 | 89.57879 | 
| dilution_(9/7.5)_1.2 | t_statistic | 97.49176 | 84.89702 | 89.57879 | 
prolfqua::table_facade(benchmark_proDA$smc$summary, "Nr of estimated contrasts")| nr_missing | name | 
|---|---|
| 0 | 3899 | 
benchmark_proDA$plot_score_distribution()
benchmark_proDA$plot_ROC(0.05)
benchmark_proDA$plot_FDRvsFDP()
To use proDA, we need to create an
SummarizedExperiment. We use the to_wide
function of prolfqua to get the data in in the
SummarizedExperiment compatible format.
As usual, two steps are required, first fit the models, then compute the contrasts.
prlm <- msqrobHurdle(pe,
                     i = "protein",
                     formula = ~dilution.,
                     overwrite = TRUE)Since msqrob does not report average abundances, we are computing them for each contrast.
st <- lfqdataNormalized$get_Stats()
protAbundanceIngroup <- st$stats()
protAbundanceIngroup <- protAbundanceIngroup |>
  tidyr::pivot_wider(
    id_cols = protein_Id,
    names_from = dilution.,
    names_prefix = "abd.",
    values_from = meanAbundance)
L <- makeContrast(c("dilution.e-dilution.d=0",
                    "dilution.d-dilution.c=0",
                    "dilution.c-dilution.b=0",
                    "dilution.b=0"),
                  parameterNames = c("dilution.e",
                                     "dilution.d",
                                     "dilution.c",
                                     "dilution.b"))
prlm <- hypothesisTestHurdle(prlm, i = "protein", L, overwrite = TRUE)
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.e.d =  mean( c(abd.e,abd.d), na.rm = TRUE)  )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.d.c =  mean( c(abd.d,abd.c), na.rm = TRUE)  )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.c.b =  mean( c(abd.c,abd.b), na.rm = TRUE)  )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.b.a =  mean( c(abd.b,abd.a), na.rm = TRUE)  )
xx <- rowData(prlm[["protein"]])
hurdle <- xx[grepl("hurdle_",names(xx))]
res <- list()
for (i in names(hurdle)) { 
  hurdle[[i]]$contrast <- i
  res[[i]] <- prolfqua::matrix_to_tibble(hurdle[[i]], preserve_row_names = "name")
}
hurdle <- dplyr::bind_rows(res)Now we need to merge the results of both models msqrobHurdleIntensity
and msqrobHurdleCount. To find out which models were not estimated by
msqrobHurdleIntensity we check for NA’s and use the
anti_join to select those from the
msqrobHurdleCount models.
logFC <- hurdle |> dplyr::select("name","contrast", starts_with("logFC"))
logFC <- dplyr::filter(logFC ,!is.na(logFCt))
logFC$modelName <- "msqrobHurdleIntensity"
names(logFC) <- c("name","contrast","logFC","se","df","t","pval","modelName")
logOR <- hurdle |> dplyr::select("name","contrast", starts_with("logOR"))
logOR$modelName <- "msqrobHurdleCount"
names(logOR) <- c("name","contrast","logFC","se","df","t","pval","modelName")
ddd <- dplyr::anti_join(logOR , logFC, by = c("name", "contrast"))
all <- dplyr::bind_rows(ddd , logFC) |> dplyr::arrange(contrast, name)
all <- prolfqua::adjust_p_values(all, column = "pval", group_by_col = "contrast")
all$contrast |> unique()## [1] "hurdle_dilution.b"              "hurdle_dilution.c - dilution.b"
## [3] "hurdle_dilution.d - dilution.c" "hurdle_dilution.e - dilution.d"
protAbundanceIngroup <- protAbundanceIngroup |> 
  dplyr::select(-starts_with("abd")) |> 
  tidyr::pivot_longer(starts_with("avgAbd"), names_to = "contrast" ,values_to = "avgAbd")
protAbundanceIngroup <- protAbundanceIngroup |> 
  dplyr::mutate(contrast = 
           dplyr::case_when(contrast == "avgAbd.e.d" ~ "dilution_(9/7.5)_1.2",
                     contrast == "avgAbd.d.c" ~ "dilution_(7.5/6)_1.25",
                     contrast == "avgAbd.c.b" ~ "dilution_(6/4.5)_1.3(3)",
                     contrast == "avgAbd.b.a" ~ "dilution_(4.5/3)_1.5",
                     TRUE ~ "something wrong"))
all <- all |> dplyr::mutate(contrast = dplyr::case_when(
  contrast == "hurdle_dilution.e - dilution.d" ~ "dilution_(9/7.5)_1.2",
  contrast == "hurdle_dilution.d - dilution.c" ~ "dilution_(7.5/6)_1.25",
  contrast == "hurdle_dilution.c - dilution.b" ~ "dilution_(6/4.5)_1.3(3)",
  contrast == "hurdle_dilution.b" ~ "dilution_(4.5/3)_1.5",
  TRUE ~ "something wrong"))
stopifnot(sum(all$contrast == "something wrong") == 0 )
stopifnot(sum(all$contrast == "something wrong") == 0 )
bb <- dplyr::inner_join(all, protAbundanceIngroup, by = c("name" = "protein_Id", "contrast" = "contrast"))Here we use proflqua benchmark functions to generate
some summaries.
ttd <- prolfqua::ionstar_bench_preprocess( bb , idcol = "name" )
benchmark_msqrob <- prolfqua::make_benchmark(ttd$data,
                                             contrast = "contrast",
                                             toscale = c("pval"),
                                             fcestimate = "logFC",
                                             benchmark = list(
                                               list(score = "logFC", desc = TRUE),
                                               list(score = "t", desc = TRUE),
                                               list(score = "scaled.pval", desc = TRUE)
                                             ),  
                                             model_description = "msqrob_QFeature",
                                             model_name = "msqrob2",
                                             FDRvsFDP = list(list(score = "FDR", desc = FALSE))
                                             , hierarchy = c("name"), summarizeNA = "t"
)
sumarry <- benchmark_msqrob$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")| nr_missing | name | 
|---|---|
| 0 | 3899 | 
res <- benchmark_msqrob$pAUC_summaries()
knitr::kable(res$ftable$content,caption = res$ftable$caption)| contrast | what | AUC | pAUC_10 | pAUC_20 | 
|---|---|---|---|---|
| all | logFC | 95.28429 | 77.26238 | 86.27762 | 
| all | scaled.pval | 96.11219 | 83.36280 | 87.89590 | 
| all | t | 96.08561 | 83.14210 | 87.76231 | 
| dilution_(4.5/3)_1.5 | logFC | 95.95577 | 85.38291 | 90.42446 | 
| dilution_(4.5/3)_1.5 | scaled.pval | 95.75265 | 85.54075 | 88.85923 | 
| dilution_(4.5/3)_1.5 | t | 95.71773 | 85.23075 | 88.68102 | 
| dilution_(6/4.5)_1.3(3) | logFC | 95.77607 | 85.56448 | 90.53485 | 
| dilution_(6/4.5)_1.3(3) | scaled.pval | 97.05394 | 88.87187 | 91.97448 | 
| dilution_(6/4.5)_1.3(3) | t | 97.03145 | 88.70474 | 91.89117 | 
| dilution_(7.5/6)_1.25 | logFC | 93.49705 | 65.08673 | 79.62722 | 
| dilution_(7.5/6)_1.25 | scaled.pval | 94.18917 | 78.51848 | 83.36609 | 
| dilution_(7.5/6)_1.25 | t | 94.15678 | 78.33434 | 83.18942 | 
| dilution_(9/7.5)_1.2 | logFC | 95.72366 | 73.87153 | 84.78715 | 
| dilution_(9/7.5)_1.2 | scaled.pval | 97.60333 | 86.18294 | 90.60964 | 
| dilution_(9/7.5)_1.2 | t | 97.58486 | 85.98590 | 90.50751 | 
res$barp
ROC curves
#res$ftable
benchmark_msqrob$plot_ROC(xlim = 0.2)
plot ROC curves
benchmark_msqrob$plot_FDRvsFDP()
plot FDR vs FDP
benchmark_msqrob$plot_precision_recall()
annotation <- readxl::read_xlsx(inputAnnotation)
msstats2 <- read.csv(
    unz(inputFragfile,"IonstarWithMSFragger/MSstats.csv"),
    header = TRUE,
    sep = ",",
    stringsAsFactors = FALSE)
msstats2$BioReplicate <- NULL
msstats2$Run <- tolower(msstats2$Run)
msstats2$Condition <- NULL
#msstats$BioReplicate <- NULL
annotation2 <- annotation
annotation2$BioReplicate <- annotation2$run_ID
annotation2$run_ID <- NULL
annotation2$date <- NULL
annotation2$Condition <- annotation2$sample
annotation2$sample <- NULL
msstats2 <- dplyr::inner_join(msstats2 , annotation2, by = c(Run = "raw.file"))
pin <- unique(lfqdataNormalized$data$protein_Id)
# keep only the same proteins which are analysed also with msqrob2, proDA and prolfqua
msstats2 <- dplyr::filter(msstats2, ProteinName %in% pin)
msstats2$ProteinName |> unique() |> length()## [1] 3904
msstatscolumns <- c("ProteinName", "PeptideSequence",  "PrecursorCharge" , "FragmentIon" ,     "ProductCharge"   , "IsotopeLabelType", "Condition","BioReplicate","Run",    
"Fraction" ,"Intensity" )
invisible(utils::capture.output(
    QuantData <- MSstats::dataProcess(msstats2, use_log_file = FALSE, verbose = FALSE)
    ))
# based on multiple comparisons  (T1 vs T3; T1 vs T7; T1 vs T9)
comparison1 <- matrix(c(0,0,0,-1,1),nrow = 1)
comparison2 <- matrix(c(0,0,-1,1,0),nrow = 1)
comparison3 <- matrix(c(0,-1,1,0,0),nrow = 1)
comparison4 <- matrix(c(-1,1,0,0,0),nrow = 1)
comparison <- rbind(comparison1,comparison2, comparison3, comparison4)
row.names(comparison) <- c("dilution_(9/7.5)_1.2","dilution_(7.5/6)_1.25","dilution_(6/4.5)_1.3(3)","dilution_(4.5/3)_1.5")
colnames(comparison) <- c("a","b","c","d","e")
invisible(utils::capture.output(
    testResultMultiComparisons <- MSstats::groupComparison(
    contrast.matrix = comparison,
    data = QuantData,
    verbose = FALSE,
    use_log_file = FALSE)
    ))
bb <- testResultMultiComparisons$ComparisonResult
library(prolfqua)
ttd <- prolfqua::ionstar_bench_preprocess(bb, idcol = "Protein")
benchmark_msstats <- prolfqua::make_benchmark(
    ttd$data,
    contrast = "Label",
    toscale = c("pvalue"),
    fcestimate = "log2FC",
    benchmark = list(
        list(score = "log2FC", desc = TRUE),
        list(score = "Tvalue", desc = TRUE),
        list(score = "scaled.pvalue", desc = TRUE)
    ),  
    model_description = "MSStats",
    model_name = "MSStats",
    FDRvsFDP = list(list(score = "adj.pvalue", desc = FALSE))
    , hierarchy = c("Protein"), summarizeNA = "Tvalue"
)
sum(benchmark_msstats$smc$summary$Protein)## [1] 3897
sumarry <- benchmark_msstats$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")| nr_missing | Protein | 
|---|---|
| 0 | 3798 | 
| 1 | 61 | 
| 2 | 26 | 
| 3 | 6 | 
| 4 | 6 | 
Direct comparison with msqrob2 is impossible since, to fit the
dropout model, the peptide intensities are required, while here, we are
starting the analysis from the combined_proteins.tsv
file.
allB <- list(benchmark_proDA = benchmark_proDA,
             benchmark_prolfqua = benchmark_prolfqua,
             benchmark_msqrob = benchmark_msqrob,
             benchmark_msstats = benchmark_msstats)
bdir <- file.path("../inst/Benchresults/",format( Sys.Date(), "%Y%m%d"))
if (!dir.exists(bdir)) {dir.create(bdir)}
saveRDS(allB
  ,file.path("../inst/Benchresults/",format( Sys.Date(), "%Y%m%d"),"FragPipev14_comb_MSStats.RDS"))
proda <- allB$benchmark_proDA$pAUC_summaries()$ftable$content
proda$package <- "proda"
prolfqua <- allB$benchmark_prolfqua$pAUC_summaries()$ftable$content
prolfqua$package <- "prolfqua"
msqrob2 <- allB$benchmark_msqrob$pAUC_summaries()$ftable$content
msqrob2$package <- "msqrob2"
tmp <- dplyr::bind_rows(proda, prolfqua, msqrob2)
bmsstats <- allB$benchmark_msstats$pAUC_summaries()$ftable$content
bmsstats$package <- "MSstats"
bmsstats$contrast <- bmsstats$Label
bmsstats$Label <- NULL
tmp <- dplyr::bind_rows(list(proda, prolfqua, msqrob2, bmsstats))
tmp$what[tmp$what == "statistic"] <- "t_statistic"
tmp$what[tmp$what == "scaled.pval"] <- "scaled.p.value"
tmp$what[tmp$what == "scaled.pvalue"] <- "scaled.p.value"
tmp$what[tmp$what == "logFC"] <- "diff"
tmp$what[tmp$what == "log2FC"] <- "diff"
tmp$what[tmp$what == "t"] <- "t_statistic"
tmp$what[tmp$what == "Tvalue"] <- "t_statistic"
tmp$what |> unique()## [1] "diff"           "scaled.p.value" "t_statistic"
tmp |> ggplot2::ggplot(ggplot2::aes(x = what, y = pAUC_10, group = package, color = NULL, fill = package)) +
  ggplot2::geom_bar(stat = "identity",  position = ggplot2::position_dodge()) +
  ggplot2::facet_wrap(~ contrast) +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1))
all <- tmp |> dplyr::filter(contrast == "all")
knitr::kable(tmp |> dplyr::filter(contrast == "all"), caption = "Table")| contrast | what | AUC | pAUC_10 | pAUC_20 | package | 
|---|---|---|---|---|---|
| all | diff | 94.93411 | 75.93204 | 85.38220 | proda | 
| all | scaled.p.value | 95.09318 | 80.01212 | 84.66020 | proda | 
| all | t_statistic | 95.09318 | 80.01212 | 84.66020 | proda | 
| all | diff | 95.08941 | 75.62771 | 85.05479 | prolfqua | 
| all | scaled.p.value | 96.04553 | 83.17176 | 87.64209 | prolfqua | 
| all | t_statistic | 96.03922 | 83.09677 | 87.62712 | prolfqua | 
| all | diff | 95.28429 | 77.26238 | 86.27762 | msqrob2 | 
| all | scaled.p.value | 96.11219 | 83.36280 | 87.89590 | msqrob2 | 
| all | t_statistic | 96.08561 | 83.14210 | 87.76231 | msqrob2 | 
| all | t_statistic | 96.57121 | 81.46841 | 87.43125 | MSstats | 
| all | diff | 95.41737 | 72.92764 | 84.40111 | MSstats | 
| all | scaled.p.value | 96.57664 | 81.52092 | 87.43752 | MSstats | 
ggplot2::ggplot(all, ggplot2::aes(x = package, y = pAUC_10)) +
  ggplot2::geom_bar(stat = "identity") +
  ggplot2::facet_wrap(~what)  + 
  ggplot2::theme_minimal() + 
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = -90, vjust = 0.5)) +
  ggplot2::xlab("")
a1 <- allB$benchmark_proDA$smc$summary
names(a1)[2] <- "protein_Id"
a1$name <- allB$benchmark_proDA$model_name
a2 <- allB$benchmark_prolfqua$smc$summary
names(a2)[2] <- "protein_Id"
a2$name <- allB$benchmark_prolfqua$model_name
a3 <- allB$benchmark_msqrob$smc$summary
names(a3)[2] <- "protein_Id"
a3$name <- allB$benchmark_msqrob$model_name
a4 <- allB$benchmark_msstats$smc$summary
names(a4)[2] <- "protein_Id"
a4$name <- allB$benchmark_msstats$model_name
dd <- dplyr::bind_rows(list(a1,a2,a3,a4))
dd <- dd |> dplyr::mutate(nrcontrasts = protein_Id * (4 - nr_missing))
dds <- dd |> dplyr::group_by(name) |> dplyr::summarize(nrcontrasts = sum(nrcontrasts))
dds$percent <- dds$nrcontrasts/max(dds$nrcontrasts) * 100
nrgg <- dds |> ggplot2::ggplot(ggplot2::aes(x = name, y = nrcontrasts )) + 
  ggplot2::geom_bar(stat = "identity", fill = "white", colour = "black") + 
  ggplot2::coord_cartesian(ylim = c(min(dds$nrcontrasts) - 200, max(dds$nrcontrasts) + 10)) +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = -90, vjust = 0.5)) +
  ggplot2::geom_text(ggplot2::aes(label = paste0(round(nrcontrasts, digits = 1), paste0("  (",round(percent, digits = 1),"%)"))),
            vjust = 0, hjust = -0.2, angle = -90) #+ 
nrgg
Number of comparisons for each method