vignettes/BenchmarkFragPipeMSStats.Rmd
BenchmarkFragPipeMSStats.Rmd
Please download and install the prolfquadata
package
from github
conflicted::conflict_prefer("filter", "dplyr")
datadir <- file.path(find.package("prolfquadata") , "quantdata")
inputFragfile <- file.path(datadir, "MSFragger_IonStar2018_PXD003881.zip")
inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx")
annotation <- readxl::read_xlsx(inputAnnotation)
## Error in get(paste0(generic, ".", class), envir = get_method_env()) :
## object 'type_sum.accel' not found
unzip(inputFragfile, list = TRUE)
## Name Length
## 1 IonstarWithMSFragger/combined_peptide.tsv 10202897
## 2 IonstarWithMSFragger/combined_protein.tsv 5288537
## 3 IonstarWithMSFragger/fragger.params 8423
## 4 IonstarWithMSFragger/fragpipe_2021-02-18_20-02-51.config 7509
## 5 IonstarWithMSFragger/log_2021-02-18_23-40-52.txt 585337
## 6 IonstarWithMSFragger/mbr_ion.tsv 52755282
## 7 IonstarWithMSFragger/MSstats.csv 147944334
## 8 IonstarWithMSFragger/reprint.int.tsv 861960
## 9 IonstarWithMSFragger/reprint.spc.tsv 276563
## Date
## 1 2021-02-18 23:40:00
## 2 2021-02-18 23:40:00
## 3 2021-02-18 20:02:00
## 4 2021-02-18 20:02:00
## 5 2021-02-18 23:41:00
## 6 2021-02-18 23:39:00
## 7 2021-02-18 23:40:00
## 8 2021-02-18 23:40:00
## 9 2021-02-18 22:47:00
msstats <- read.csv(
unz(inputFragfile,"IonstarWithMSFragger/MSstats.csv"),
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
msstats <- tibble::tibble(msstats)
msstats$Run <- tolower(msstats$Run)
merged <- dplyr::inner_join(annotation, msstats, by = c("raw.file" = "Run"))
summedPep <- merged |> dplyr::group_by(raw.file, run_ID, sample, ProteinName, PeptideSequence) |> dplyr::summarize(nprecursor = dplyr::n(),pepIntensity = sum(Intensity, na.rm = TRUE), .groups = "drop")
atable <- prolfqua::AnalysisTableAnnotation$new()
atable$fileName = "raw.file"
atable$hierarchy[["protein_Id"]] <- c("ProteinName")
atable$hierarchy[["peptide_Id"]] <- c("PeptideSequence")
atable$hierarchyDepth <- 1
atable$set_response("pepIntensity")
atable$factors[["dilution."]] = "sample"
atable$factors[["run"]] = "run_ID"
atable$factorDepth <- 1
config <- prolfqua::AnalysisConfiguration$new(atable)
adata <- prolfqua::setup_analysis(summedPep, config)
lfqdata <- prolfqua::LFQData$new(adata, config)
lfqdata$hierarchy_counts()
## # A tibble: 1 × 3
## isotopeLabel protein_Id peptide_Id
## <chr> <int> <int>
## 1 light 5122 31107
lfqdata$filter_proteins_by_peptide_count()
lfqdata$hierarchy_counts()
## # A tibble: 1 × 3
## isotopeLabel protein_Id peptide_Id
## <chr> <int> <int>
## 1 light 3904 29889
lfqdata$remove_small_intensities()
lfqdata$hierarchy_counts()
## # A tibble: 1 × 3
## isotopeLabel protein_Id peptide_Id
## <chr> <int> <int>
## 1 light 3904 29889
knitr::kable(lfqdata$hierarchy_counts(), caption = "number of proteins and peptides.")
isotopeLabel | protein_Id | peptide_Id |
---|---|---|
light | 3904 | 29889 |
pl <- lfqdata$get_Plotter()
pl$intensity_distribution_density()
subset_h <- lfqdata$get_copy()$get_Transformer()$log2()$lfq
subset_h$data <- subset_h$data |> dplyr::filter(grepl("HUMAN", protein_Id))
tr <- lfqdata$get_Transformer()
lfqdataPeptideNorm <- tr$log2()$robscale_subset(lfqsubset = subset_h)$lfq
pl <- lfqdataPeptideNorm$get_Plotter()
pl$intensity_distribution_density()
hm <- pl$NA_heatmap()
hm
Contrasts <- c(
"dilution_(9/7.5)_1.2" = "dilution.e - dilution.d",
"dilution_(7.5/6)_1.25" = "dilution.d - dilution.c",
"dilution_(6/4.5)_1.3(3)" = "dilution.c - dilution.b",
"dilution_(4.5/3)_1.5" = "dilution.b - dilution.a"
)
lmmodel <- "~ dilution."
lmmodel <- paste0(lfqdataNormalized$config$table$get_response() , lmmodel)
modelFunction <- prolfqua::strategy_lm( lmmodel, model_name = "Model")
mod <- prolfqua::build_model(lfqdataNormalized$data, modelFunction)
contr <- prolfqua::Contrasts$new(mod, Contrasts)
contrimp <- prolfqua::ContrastsMissing$new(lfqdataNormalized, Contrasts)
merged <- prolfqua::merge_contrasts_results(contr, contrimp)
mergedmod <- prolfqua::ContrastsModerated$new(merged$merged)
cp <- mergedmod$get_Plotter()
cp$volcano()$FDR
ttd <- prolfqua::ionstar_bench_preprocess(mergedmod$get_contrasts())
benchmark_prolfqua <- prolfqua::make_benchmark(ttd$data,
model_description = "prolfqua_merged",
model_name = "prolfqua_merged",
FDRvsFDP = list(list(score = "FDR", desc = FALSE))
)
knitr::kable(benchmark_prolfqua$pAUC_summaries()$ftable$content)
contrast | what | AUC | pAUC_10 | pAUC_20 |
---|---|---|---|---|
all | diff | 95.08941 | 75.62771 | 85.05479 |
all | scaled.p.value | 96.04553 | 83.17176 | 87.64209 |
all | statistic | 96.03922 | 83.09677 | 87.62712 |
dilution_(4.5/3)_1.5 | diff | 93.72267 | 81.39620 | 85.65552 |
dilution_(4.5/3)_1.5 | scaled.p.value | 93.58459 | 83.35680 | 85.19206 |
dilution_(4.5/3)_1.5 | statistic | 93.58722 | 83.28201 | 85.20315 |
dilution_(6/4.5)_1.3(3) | diff | 95.03489 | 82.50829 | 88.05018 |
dilution_(6/4.5)_1.3(3) | scaled.p.value | 96.92545 | 88.24154 | 90.65322 |
dilution_(6/4.5)_1.3(3) | statistic | 96.90698 | 88.16417 | 90.61789 |
dilution_(7.5/6)_1.25 | diff | 94.31033 | 63.86013 | 79.61097 |
dilution_(7.5/6)_1.25 | scaled.p.value | 94.87680 | 78.15999 | 83.92652 |
dilution_(7.5/6)_1.25 | statistic | 94.86947 | 77.91034 | 83.86978 |
dilution_(9/7.5)_1.2 | diff | 96.50924 | 74.39250 | 86.26315 |
dilution_(9/7.5)_1.2 | scaled.p.value | 98.23989 | 88.83473 | 93.14924 |
dilution_(9/7.5)_1.2 | statistic | 98.22705 | 88.70681 | 93.08966 |
prolfqua::table_facade(benchmark_prolfqua$smc$summary, "Nr of estimated contrasts")
nr_missing | protein_Id |
---|---|
0 | 3899 |
benchmark_prolfqua$plot_score_distribution()
benchmark_prolfqua$plot_ROC(0.05)
benchmark_prolfqua$plot_FDRvsFDP()
se <- prolfqua::LFQDataToSummarizedExperiment(lfqdataNormalized)
fit <- proDA::proDA(se, design = ~ dilution. - 1, data_is_log_transformed = TRUE)
contr <- list()
contr[["dilution_(9/7.5)_1.2"]] <- data.frame(
contrast = "dilution_(9/7.5)_1.2",
proDA::test_diff(fit, contrast = "dilution.e - dilution.d"))
contr[["dilution_(7.5/6)_1.25"]] <- data.frame(
contrast = "dilution_(7.5/6)_1.25",
proDA::test_diff(fit, contrast = "dilution.d - dilution.c"))
contr[["dilution_(6/4.5)_1.3(3)"]] <- data.frame(
contrast = "dilution_(6/4.5)_1.3(3)",
proDA::test_diff(fit, contrast = "dilution.c - dilution.b"))
contr[["dilution_(4.5/3)_1.5"]] <- data.frame(
contrast = "dilution_(4.5/3)_1.5",
proDA::test_diff(fit, contrast = "dilution.b - dilution.a" ))
bb <- dplyr::bind_rows(contr)
ttd <- prolfqua::ionstar_bench_preprocess( bb , idcol = "name" )
benchmark_proDA <- prolfqua::make_benchmark(ttd$data,
contrast = "contrast",
toscale = c("pval"),
fcestimate = "diff",
benchmark = list(
list(score = "diff", desc = TRUE),
list(score = "t_statistic", desc = TRUE),
list(score = "scaled.pval", desc = TRUE)
),
model_description = "proDA_medpolishInt",
model_name = "proDA",
FDRvsFDP = list(list(score = "adj_pval", desc = FALSE))
, hierarchy = c("name"), summarizeNA = "t_statistic"
)
sumarry <- benchmark_proDA$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")
nr_missing | name |
---|---|
0 | 3899 |
knitr::kable(benchmark_proDA$pAUC_summaries()$ftable$content)
contrast | what | AUC | pAUC_10 | pAUC_20 |
---|---|---|---|---|
all | diff | 94.93411 | 75.93204 | 85.38220 |
all | scaled.pval | 95.09318 | 80.01212 | 84.66020 |
all | t_statistic | 95.09318 | 80.01212 | 84.66020 |
dilution_(4.5/3)_1.5 | diff | 94.63617 | 83.29955 | 87.92716 |
dilution_(4.5/3)_1.5 | scaled.pval | 92.90655 | 81.24404 | 83.66242 |
dilution_(4.5/3)_1.5 | t_statistic | 92.90655 | 81.24404 | 83.66242 |
dilution_(6/4.5)_1.3(3) | diff | 94.98945 | 85.11985 | 89.62882 |
dilution_(6/4.5)_1.3(3) | scaled.pval | 96.47262 | 85.71922 | 88.54897 |
dilution_(6/4.5)_1.3(3) | t_statistic | 96.47262 | 85.71922 | 88.54897 |
dilution_(7.5/6)_1.25 | diff | 93.61285 | 61.76129 | 77.63309 |
dilution_(7.5/6)_1.25 | scaled.pval | 93.28098 | 74.51681 | 80.27637 |
dilution_(7.5/6)_1.25 | t_statistic | 93.28098 | 74.51681 | 80.27637 |
dilution_(9/7.5)_1.2 | diff | 96.27883 | 73.69306 | 85.71381 |
dilution_(9/7.5)_1.2 | scaled.pval | 97.49176 | 84.89702 | 89.57879 |
dilution_(9/7.5)_1.2 | t_statistic | 97.49176 | 84.89702 | 89.57879 |
prolfqua::table_facade(benchmark_proDA$smc$summary, "Nr of estimated contrasts")
nr_missing | name |
---|---|
0 | 3899 |
benchmark_proDA$plot_score_distribution()
benchmark_proDA$plot_ROC(0.05)
benchmark_proDA$plot_FDRvsFDP()
To use proDA
, we need to create an
SummarizedExperiment
. We use the to_wide
function of prolfqua
to get the data in in the
SummarizedExperiment
compatible format.
As usual, two steps are required, first fit the models, then compute the contrasts.
prlm <- msqrobHurdle(pe,
i = "protein",
formula = ~dilution.,
overwrite = TRUE)
Since msqrob does not report average abundances, we are computing them for each contrast.
st <- lfqdataNormalized$get_Stats()
protAbundanceIngroup <- st$stats()
protAbundanceIngroup <- protAbundanceIngroup |>
tidyr::pivot_wider(
id_cols = protein_Id,
names_from = dilution.,
names_prefix = "abd.",
values_from = meanAbundance)
L <- makeContrast(c("dilution.e-dilution.d=0",
"dilution.d-dilution.c=0",
"dilution.c-dilution.b=0",
"dilution.b=0"),
parameterNames = c("dilution.e",
"dilution.d",
"dilution.c",
"dilution.b"))
prlm <- hypothesisTestHurdle(prlm, i = "protein", L, overwrite = TRUE)
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.e.d = mean( c(abd.e,abd.d), na.rm = TRUE) )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.d.c = mean( c(abd.d,abd.c), na.rm = TRUE) )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.c.b = mean( c(abd.c,abd.b), na.rm = TRUE) )
protAbundanceIngroup <- protAbundanceIngroup |> dplyr::mutate( avgAbd.b.a = mean( c(abd.b,abd.a), na.rm = TRUE) )
xx <- rowData(prlm[["protein"]])
hurdle <- xx[grepl("hurdle_",names(xx))]
res <- list()
for (i in names(hurdle)) {
hurdle[[i]]$contrast <- i
res[[i]] <- prolfqua::matrix_to_tibble(hurdle[[i]], preserve_row_names = "name")
}
hurdle <- dplyr::bind_rows(res)
Now we need to merge the results of both models msqrobHurdleIntensity
and msqrobHurdleCount. To find out which models were not estimated by
msqrobHurdleIntensity
we check for NA’s and use the
anti_join
to select those from the
msqrobHurdleCount
models.
logFC <- hurdle |> dplyr::select("name","contrast", starts_with("logFC"))
logFC <- dplyr::filter(logFC ,!is.na(logFCt))
logFC$modelName <- "msqrobHurdleIntensity"
names(logFC) <- c("name","contrast","logFC","se","df","t","pval","modelName")
logOR <- hurdle |> dplyr::select("name","contrast", starts_with("logOR"))
logOR$modelName <- "msqrobHurdleCount"
names(logOR) <- c("name","contrast","logFC","se","df","t","pval","modelName")
ddd <- dplyr::anti_join(logOR , logFC, by = c("name", "contrast"))
all <- dplyr::bind_rows(ddd , logFC) |> dplyr::arrange(contrast, name)
all <- prolfqua::adjust_p_values(all, column = "pval", group_by_col = "contrast")
all$contrast |> unique()
## [1] "hurdle_dilution.b" "hurdle_dilution.c - dilution.b"
## [3] "hurdle_dilution.d - dilution.c" "hurdle_dilution.e - dilution.d"
protAbundanceIngroup <- protAbundanceIngroup |>
dplyr::select(-starts_with("abd")) |>
tidyr::pivot_longer(starts_with("avgAbd"), names_to = "contrast" ,values_to = "avgAbd")
protAbundanceIngroup <- protAbundanceIngroup |>
dplyr::mutate(contrast =
dplyr::case_when(contrast == "avgAbd.e.d" ~ "dilution_(9/7.5)_1.2",
contrast == "avgAbd.d.c" ~ "dilution_(7.5/6)_1.25",
contrast == "avgAbd.c.b" ~ "dilution_(6/4.5)_1.3(3)",
contrast == "avgAbd.b.a" ~ "dilution_(4.5/3)_1.5",
TRUE ~ "something wrong"))
all <- all |> dplyr::mutate(contrast = dplyr::case_when(
contrast == "hurdle_dilution.e - dilution.d" ~ "dilution_(9/7.5)_1.2",
contrast == "hurdle_dilution.d - dilution.c" ~ "dilution_(7.5/6)_1.25",
contrast == "hurdle_dilution.c - dilution.b" ~ "dilution_(6/4.5)_1.3(3)",
contrast == "hurdle_dilution.b" ~ "dilution_(4.5/3)_1.5",
TRUE ~ "something wrong"))
stopifnot(sum(all$contrast == "something wrong") == 0 )
stopifnot(sum(all$contrast == "something wrong") == 0 )
bb <- dplyr::inner_join(all, protAbundanceIngroup, by = c("name" = "protein_Id", "contrast" = "contrast"))
Here we use proflqua
benchmark functions to generate
some summaries.
ttd <- prolfqua::ionstar_bench_preprocess( bb , idcol = "name" )
benchmark_msqrob <- prolfqua::make_benchmark(ttd$data,
contrast = "contrast",
toscale = c("pval"),
fcestimate = "logFC",
benchmark = list(
list(score = "logFC", desc = TRUE),
list(score = "t", desc = TRUE),
list(score = "scaled.pval", desc = TRUE)
),
model_description = "msqrob_QFeature",
model_name = "msqrob2",
FDRvsFDP = list(list(score = "FDR", desc = FALSE))
, hierarchy = c("name"), summarizeNA = "t"
)
sumarry <- benchmark_msqrob$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")
nr_missing | name |
---|---|
0 | 3899 |
res <- benchmark_msqrob$pAUC_summaries()
knitr::kable(res$ftable$content,caption = res$ftable$caption)
contrast | what | AUC | pAUC_10 | pAUC_20 |
---|---|---|---|---|
all | logFC | 95.28429 | 77.26238 | 86.27762 |
all | scaled.pval | 96.11219 | 83.36280 | 87.89590 |
all | t | 96.08561 | 83.14210 | 87.76231 |
dilution_(4.5/3)_1.5 | logFC | 95.95577 | 85.38291 | 90.42446 |
dilution_(4.5/3)_1.5 | scaled.pval | 95.75265 | 85.54075 | 88.85923 |
dilution_(4.5/3)_1.5 | t | 95.71773 | 85.23075 | 88.68102 |
dilution_(6/4.5)_1.3(3) | logFC | 95.77607 | 85.56448 | 90.53485 |
dilution_(6/4.5)_1.3(3) | scaled.pval | 97.05394 | 88.87187 | 91.97448 |
dilution_(6/4.5)_1.3(3) | t | 97.03145 | 88.70474 | 91.89117 |
dilution_(7.5/6)_1.25 | logFC | 93.49705 | 65.08673 | 79.62722 |
dilution_(7.5/6)_1.25 | scaled.pval | 94.18917 | 78.51848 | 83.36609 |
dilution_(7.5/6)_1.25 | t | 94.15678 | 78.33434 | 83.18942 |
dilution_(9/7.5)_1.2 | logFC | 95.72366 | 73.87153 | 84.78715 |
dilution_(9/7.5)_1.2 | scaled.pval | 97.60333 | 86.18294 | 90.60964 |
dilution_(9/7.5)_1.2 | t | 97.58486 | 85.98590 | 90.50751 |
res$barp
#res$ftable
benchmark_msqrob$plot_ROC(xlim = 0.2)
benchmark_msqrob$plot_FDRvsFDP()
benchmark_msqrob$plot_precision_recall()
annotation <- readxl::read_xlsx(inputAnnotation)
msstats2 <- read.csv(
unz(inputFragfile,"IonstarWithMSFragger/MSstats.csv"),
header = TRUE,
sep = ",",
stringsAsFactors = FALSE)
msstats2$BioReplicate <- NULL
msstats2$Run <- tolower(msstats2$Run)
msstats2$Condition <- NULL
#msstats$BioReplicate <- NULL
annotation2 <- annotation
annotation2$BioReplicate <- annotation2$run_ID
annotation2$run_ID <- NULL
annotation2$date <- NULL
annotation2$Condition <- annotation2$sample
annotation2$sample <- NULL
msstats2 <- dplyr::inner_join(msstats2 , annotation2, by = c(Run = "raw.file"))
pin <- unique(lfqdataNormalized$data$protein_Id)
# keep only the same proteins which are analysed also with msqrob2, proDA and prolfqua
msstats2 <- dplyr::filter(msstats2, ProteinName %in% pin)
msstats2$ProteinName |> unique() |> length()
## [1] 3904
msstatscolumns <- c("ProteinName", "PeptideSequence", "PrecursorCharge" , "FragmentIon" , "ProductCharge" , "IsotopeLabelType", "Condition","BioReplicate","Run",
"Fraction" ,"Intensity" )
invisible(utils::capture.output(
QuantData <- MSstats::dataProcess(msstats2, use_log_file = FALSE, verbose = FALSE)
))
# based on multiple comparisons (T1 vs T3; T1 vs T7; T1 vs T9)
comparison1 <- matrix(c(0,0,0,-1,1),nrow = 1)
comparison2 <- matrix(c(0,0,-1,1,0),nrow = 1)
comparison3 <- matrix(c(0,-1,1,0,0),nrow = 1)
comparison4 <- matrix(c(-1,1,0,0,0),nrow = 1)
comparison <- rbind(comparison1,comparison2, comparison3, comparison4)
row.names(comparison) <- c("dilution_(9/7.5)_1.2","dilution_(7.5/6)_1.25","dilution_(6/4.5)_1.3(3)","dilution_(4.5/3)_1.5")
colnames(comparison) <- c("a","b","c","d","e")
invisible(utils::capture.output(
testResultMultiComparisons <- MSstats::groupComparison(
contrast.matrix = comparison,
data = QuantData,
verbose = FALSE,
use_log_file = FALSE)
))
bb <- testResultMultiComparisons$ComparisonResult
library(prolfqua)
ttd <- prolfqua::ionstar_bench_preprocess(bb, idcol = "Protein")
benchmark_msstats <- prolfqua::make_benchmark(
ttd$data,
contrast = "Label",
toscale = c("pvalue"),
fcestimate = "log2FC",
benchmark = list(
list(score = "log2FC", desc = TRUE),
list(score = "Tvalue", desc = TRUE),
list(score = "scaled.pvalue", desc = TRUE)
),
model_description = "MSStats",
model_name = "MSStats",
FDRvsFDP = list(list(score = "adj.pvalue", desc = FALSE))
, hierarchy = c("Protein"), summarizeNA = "Tvalue"
)
sum(benchmark_msstats$smc$summary$Protein)
## [1] 3897
sumarry <- benchmark_msstats$smc$summary
prolfqua::table_facade(sumarry, caption = "nr of proteins with 0, 1, 2, 3 missing contrasts.")
nr_missing | Protein |
---|---|
0 | 3798 |
1 | 61 |
2 | 26 |
3 | 6 |
4 | 6 |
Direct comparison with msqrob2 is impossible since, to fit the
dropout model, the peptide intensities are required, while here, we are
starting the analysis from the combined_proteins.tsv
file.
allB <- list(benchmark_proDA = benchmark_proDA,
benchmark_prolfqua = benchmark_prolfqua,
benchmark_msqrob = benchmark_msqrob,
benchmark_msstats = benchmark_msstats)
bdir <- file.path("../inst/Benchresults/",format( Sys.Date(), "%Y%m%d"))
if (!dir.exists(bdir)) {dir.create(bdir)}
saveRDS(allB
,file.path("../inst/Benchresults/",format( Sys.Date(), "%Y%m%d"),"FragPipev14_comb_MSStats.RDS"))
proda <- allB$benchmark_proDA$pAUC_summaries()$ftable$content
proda$package <- "proda"
prolfqua <- allB$benchmark_prolfqua$pAUC_summaries()$ftable$content
prolfqua$package <- "prolfqua"
msqrob2 <- allB$benchmark_msqrob$pAUC_summaries()$ftable$content
msqrob2$package <- "msqrob2"
tmp <- dplyr::bind_rows(proda, prolfqua, msqrob2)
bmsstats <- allB$benchmark_msstats$pAUC_summaries()$ftable$content
bmsstats$package <- "MSstats"
bmsstats$contrast <- bmsstats$Label
bmsstats$Label <- NULL
tmp <- dplyr::bind_rows(list(proda, prolfqua, msqrob2, bmsstats))
tmp$what[tmp$what == "statistic"] <- "t_statistic"
tmp$what[tmp$what == "scaled.pval"] <- "scaled.p.value"
tmp$what[tmp$what == "scaled.pvalue"] <- "scaled.p.value"
tmp$what[tmp$what == "logFC"] <- "diff"
tmp$what[tmp$what == "log2FC"] <- "diff"
tmp$what[tmp$what == "t"] <- "t_statistic"
tmp$what[tmp$what == "Tvalue"] <- "t_statistic"
tmp$what |> unique()
## [1] "diff" "scaled.p.value" "t_statistic"
tmp |> ggplot2::ggplot(ggplot2::aes(x = what, y = pAUC_10, group = package, color = NULL, fill = package)) +
ggplot2::geom_bar(stat = "identity", position = ggplot2::position_dodge()) +
ggplot2::facet_wrap(~ contrast) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1))
all <- tmp |> dplyr::filter(contrast == "all")
knitr::kable(tmp |> dplyr::filter(contrast == "all"), caption = "Table")
contrast | what | AUC | pAUC_10 | pAUC_20 | package |
---|---|---|---|---|---|
all | diff | 94.93411 | 75.93204 | 85.38220 | proda |
all | scaled.p.value | 95.09318 | 80.01212 | 84.66020 | proda |
all | t_statistic | 95.09318 | 80.01212 | 84.66020 | proda |
all | diff | 95.08941 | 75.62771 | 85.05479 | prolfqua |
all | scaled.p.value | 96.04553 | 83.17176 | 87.64209 | prolfqua |
all | t_statistic | 96.03922 | 83.09677 | 87.62712 | prolfqua |
all | diff | 95.28429 | 77.26238 | 86.27762 | msqrob2 |
all | scaled.p.value | 96.11219 | 83.36280 | 87.89590 | msqrob2 |
all | t_statistic | 96.08561 | 83.14210 | 87.76231 | msqrob2 |
all | t_statistic | 96.57121 | 81.46841 | 87.43125 | MSstats |
all | diff | 95.41737 | 72.92764 | 84.40111 | MSstats |
all | scaled.p.value | 96.57664 | 81.52092 | 87.43752 | MSstats |
ggplot2::ggplot(all, ggplot2::aes(x = package, y = pAUC_10)) +
ggplot2::geom_bar(stat = "identity") +
ggplot2::facet_wrap(~what) +
ggplot2::theme_minimal() +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = -90, vjust = 0.5)) +
ggplot2::xlab("")
a1 <- allB$benchmark_proDA$smc$summary
names(a1)[2] <- "protein_Id"
a1$name <- allB$benchmark_proDA$model_name
a2 <- allB$benchmark_prolfqua$smc$summary
names(a2)[2] <- "protein_Id"
a2$name <- allB$benchmark_prolfqua$model_name
a3 <- allB$benchmark_msqrob$smc$summary
names(a3)[2] <- "protein_Id"
a3$name <- allB$benchmark_msqrob$model_name
a4 <- allB$benchmark_msstats$smc$summary
names(a4)[2] <- "protein_Id"
a4$name <- allB$benchmark_msstats$model_name
dd <- dplyr::bind_rows(list(a1,a2,a3,a4))
dd <- dd |> dplyr::mutate(nrcontrasts = protein_Id * (4 - nr_missing))
dds <- dd |> dplyr::group_by(name) |> dplyr::summarize(nrcontrasts = sum(nrcontrasts))
dds$percent <- dds$nrcontrasts/max(dds$nrcontrasts) * 100
nrgg <- dds |> ggplot2::ggplot(ggplot2::aes(x = name, y = nrcontrasts )) +
ggplot2::geom_bar(stat = "identity", fill = "white", colour = "black") +
ggplot2::coord_cartesian(ylim = c(min(dds$nrcontrasts) - 200, max(dds$nrcontrasts) + 10)) +
ggplot2::theme(axis.text.x = ggplot2::element_text(angle = -90, vjust = 0.5)) +
ggplot2::geom_text(ggplot2::aes(label = paste0(round(nrcontrasts, digits = 1), paste0(" (",round(percent, digits = 1),"%)"))),
vjust = 0, hjust = -0.2, angle = -90) #+
nrgg