Decorates LFQData with a row annotation and some protein specific functions.
Source:R/R6_ProteinAnnotation.R
ProteinAnnotation.RdDecorates LFQData with a row annotation and some protein specific functions.
Decorates LFQData with a row annotation and some protein specific functions.
Public fields
row_annotdata.frame containing further information
pIDcolumn with protein ids
full_idcolumn with protein id e.g. sp| can be same as pID
descriptionname of column containing descriptions
cleaned_idsvector with columns containing addition IDs
exp_nr_childrenname of columns with the number of peptides
pattern_contaminantspattern_contaminants
pattern_decoyspattern_decoys
Methods
Method new()
initialize
Usage
ProteinAnnotation$new(
lfqdata,
row_annot = NULL,
description = NULL,
cleaned_ids = NULL,
full_id = NULL,
exp_nr_children = "nr_peptides",
pattern_contaminants = NULL,
pattern_decoys = NULL
)Arguments
lfqdatadata frame from
setup_analysisrow_annotdata frame with row annotation. Must have columns matching
config$hierarchy_keys_depth()descriptionname of column with description
cleaned_idsnames of columns with cleaned Ids
full_idcolumn with full protein ID
exp_nr_childrencolumn with the number of children
pattern_contaminantspattern_contaminants
pattern_decoyspattern_decoys
Examples
istar <- prolfqua::sim_lfq_data_peptide_config(Nprot = 100)
#> creating sampleName from fileName column
#> completing cases
#> completing cases done
#> setup done
xd1 <- prolfqua::nr_obs_experiment(istar$data, istar$config, from_children = TRUE)
xd2 <- prolfqua::nr_obs_experiment(istar$data, istar$config, from_children = FALSE)
xd1$nr_child_exp |> table()
#>
#> 1 2 3 4 5 6 7 8 9 10 11 12
#> 22 26 16 10 9 3 5 4 1 1 1 2
lfqdata <- prolfqua::LFQData$new(istar$data, istar$config)
lfqdata$data$protein_Id <- add_RevCon(lfqdata$data$protein_Id)
pids <- grep("^zz|^REV", unique(lfqdata$data$protein_Id), value = TRUE, invert = TRUE)
addannot <- data.frame(
protein_Id = pids,
description = stringi::stri_rand_strings(length(pids), 13)
)
addannot <- addannot |> tidyr::separate(protein_Id, c("cleanID", NA), remove = FALSE)
# ProteinAnnotation$debug("initialize")
# debug(nr_obs_sample)
xd4 <- prolfqua::nr_obs_sample(lfqdata$data, lfqdata$config)
xd3 <- prolfqua::nr_obs_experiment(lfqdata$data, lfqdata$config, from_children = FALSE)
pannot <- ProteinAnnotation$new(lfqdata,
addannot,
description = "description",
cleaned_ids = "cleanID",
pattern_contaminants = "^zz",
pattern_decoys = "^REV"
)
#> Warning: no exp_nr_children column specified, computing using nr_obs_experiment function
stopifnot(pannot$annotate_decoys() == 10)
stopifnot(pannot$annotate_contaminants() == 5)
dd <- pannot$clean()
pannot$nr_clean()
#> [1] 85
pannot$get_summary()
#> totalNrOfProteins percentOfContaminants percentOfFalsePositives
#> 1 100 5 10
#> NrOfProteinsNoDecoys
#> 1 85
stopifnot(nrow(dd) == 85)
tmp <- lfqdata$get_subset(dd)
#> Joining with `by = join_by(protein_Id)`
dx <- pannot$clean(contaminants = TRUE, decoys = FALSE)
stopifnot(nrow(dx) == 95)
dx <- pannot$clean(contaminants = FALSE, decoys = TRUE)
stopifnot(nrow(dx) == 90)
dx2 <- pannot$filter_by_nr_children(exp_nr_children = 2)
dx3 <- pannot$filter_by_nr_children(exp_nr_children = 3)
stopifnot(nrow(dx2) >= nrow(dx3))