Setup

evalAll <- require("MSstats")
datadir <- file.path(find.package("prolfquadata"), "quantdata")
inputAnnotation <- file.path(datadir, "annotation_Ionstar2018_PXD003881.xlsx")

# Contrast matrix shared by both datasets
comparison1 <- matrix(c(0, 0, 0, -1, 1), nrow = 1)
comparison2 <- matrix(c(0, 0, -1, 1, 0), nrow = 1)
comparison3 <- matrix(c(0, -1, 1, 0, 0), nrow = 1)
comparison4 <- matrix(c(-1, 1, 0, 0, 0), nrow = 1)
comparison <- rbind(comparison1, comparison2, comparison3, comparison4)
row.names(comparison) <- c("dilution_(9/7.5)_1.2", "dilution_(7.5/6)_1.25",
                           "dilution_(6/4.5)_1.3(3)", "dilution_(4.5/3)_1.5")
colnames(comparison) <- c("a", "b", "c", "d", "e")

Dataset 1: IonStar / MaxQuant

inputMQfile <- file.path(datadir, "MAXQuant_IonStar2018_PXD003881.zip")

proteinGroups <- read.table(unz(inputMQfile, "proteinGroups.txt"), sep = "\t", header = TRUE)
infile <- read.table(unz(inputMQfile, "evidence.txt"), sep = "\t", header = TRUE)

annotation <- readxl::read_xlsx(inputAnnotation)
msstas_annotation <- annotation |>
  dplyr::select(raw.file, Condition = sample, BioReplicate = run_ID)
msstas_annotation$IsotopeLabelType <- "L"
msstas_annotation <- msstas_annotation |>
  tidyr::unite("Experiment", Condition, BioReplicate, sep = "_", remove = FALSE)

rf <- unique(infile$Raw.file)
rf <- data.frame(Raw.file = rf, raw.file = tolower(rf))
msstas_annotation <- dplyr::inner_join(rf, msstas_annotation)
msstas_annotation$raw.file <- NULL
raw <- MSstats::MaxQtoMSstatsFormat(
  evidence = infile,
  annotation = msstas_annotation,
  proteinGroups = proteinGroups,
  removeProtein_with1Peptide = TRUE,
  use_log_file = FALSE,
  verbose = FALSE)

Remove reverse sequences and contaminants.

raw2 <- raw[!grepl("REV__", raw$ProteinName), ]
raw3 <- raw2[!grepl("CON__", raw2$ProteinName), ]
invisible(utils::capture.output(
  QuantData_mq <- MSstats::dataProcess(raw3, use_log_file = FALSE, verbose = FALSE)
))
invisible(utils::capture.output(
  testResult_mq <- MSstats::groupComparison(
    contrast.matrix = comparison,
    data = QuantData_mq,
    verbose = FALSE,
    use_log_file = FALSE)
))
bb_mq <- testResult_mq$ComparisonResult
msstats_col_map <- c(
  "Protein" = "protein_id",
  "Label" = "contrast",
  "log2FC" = "log_fc",
  "Tvalue" = "t_statistic",
  "pvalue" = "p_value",
  "adj.pvalue" = "p_value_adjusted"
)

write_contrast_results(
  bb_mq,
  path = "../inst/Benchresults/MSstats_MaxQuant",
  metadata = list(
    dataset = "IonStar_MaxQuant",
    method = "MSstats",
    method_description = "MSstats on MaxQuant evidence.txt",
    input_file = "MAXQuant_IonStar2018_PXD003881.zip/evidence.txt",
    software_version = paste0("MSstats ", packageVersion("MSstats")),
    date = as.character(Sys.Date()),
    ground_truth = list(
      id_column = "protein_id",
      positive = list(label = "ECOLI", pattern = "ECOLI"),
      negative = list(label = "HUMAN", pattern = "HUMAN")
    )
  ),
  col_map = msstats_col_map
)

Dataset 2: IonStar / FragPipe

inputFragfile <- file.path(datadir, "MSFragger_IonStar2018_PXD003881.zip")

msstats_fp <- read.csv(
  unz(inputFragfile, "IonstarWithMSFragger/MSstats.csv"),
  header = TRUE, sep = ",", stringsAsFactors = FALSE)

msstats_fp$BioReplicate <- NULL
msstats_fp$Run <- tolower(msstats_fp$Run)
msstats_fp$Condition <- NULL

annotation_fp <- readxl::read_xlsx(inputAnnotation)
annotation_fp$BioReplicate <- annotation_fp$run_ID
annotation_fp$run_ID <- NULL
annotation_fp$date <- NULL
annotation_fp$Condition <- annotation_fp$sample
annotation_fp$sample <- NULL

msstats_fp <- dplyr::inner_join(msstats_fp, annotation_fp, by = c(Run = "raw.file"))
invisible(utils::capture.output(
  QuantData_fp <- MSstats::dataProcess(msstats_fp, use_log_file = FALSE, verbose = FALSE)
))
invisible(utils::capture.output(
  testResult_fp <- MSstats::groupComparison(
    contrast.matrix = comparison,
    data = QuantData_fp,
    verbose = FALSE,
    use_log_file = FALSE)
))
bb_fp <- testResult_fp$ComparisonResult
write_contrast_results(
  bb_fp,
  path = "../inst/Benchresults/MSstats_FragPipe",
  metadata = list(
    dataset = "IonStar_FragPipe",
    method = "MSstats",
    method_description = "MSstats on FragPipe MSstats.csv",
    input_file = "MSFragger_IonStar2018_PXD003881.zip/IonstarWithMSFragger/MSstats.csv",
    software_version = paste0("MSstats ", packageVersion("MSstats")),
    date = as.character(Sys.Date()),
    ground_truth = list(
      id_column = "protein_id",
      positive = list(label = "ECOLI", pattern = "ECOLI"),
      negative = list(label = "HUMAN", pattern = "HUMAN")
    )
  ),
  col_map = msstats_col_map
)