Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 12 additions & 10 deletions R/dataProcess.R
Original file line number Diff line number Diff line change
Expand Up @@ -414,19 +414,19 @@ MSstatsSummarizeSingleLinear = function(single_protein,
}]

if (is_labeled_reference) {
single_protein[, predicted := ifelse(censored & is_labeled_ref == FALSE, predicted, NA)]
single_protein[, newABUNDANCE := ifelse(censored & is_labeled_ref == FALSE, predicted, newABUNDANCE)]
single_protein[!(censored & is_labeled_ref == FALSE), predicted := NA]
single_protein[(censored) & is_labeled_ref == FALSE,
newABUNDANCE := predicted]
} else {
single_protein[, predicted := ifelse(censored, predicted, NA)]
single_protein[, newABUNDANCE := ifelse(censored, predicted, newABUNDANCE)]
single_protein[!(censored), predicted := NA]
single_protein[(censored), newABUNDANCE := predicted]
}

survival = single_protein[, intersect(c(cols, "LABEL", "predicted"), colnames(single_protein)), with = FALSE]
Comment thread
tonywu1999 marked this conversation as resolved.
} else {
survival = single_protein[, intersect(c(cols, "LABEL"), colnames(single_protein)), with = FALSE]
survival[, predicted := NA]
}

if (all(!is.na(single_protein$ANOMALYSCORES))) {
single_protein[, weights :=
anomaly_weights_z_vec(ANOMALYSCORES),
Expand Down Expand Up @@ -569,11 +569,13 @@ MSstatsSummarizeSingleTMP = function(single_protein, impute, censored_symbol,
}

if (is_labeled_reference) {
single_protein[, predicted := ifelse(censored & is_labeled_ref == FALSE, predicted, NA)]
single_protein[, newABUNDANCE := ifelse(censored & is_labeled_ref == FALSE, predicted, newABUNDANCE)]
single_protein[!(censored & is_labeled_ref == FALSE), predicted := NA]
single_protein[(censored) & is_labeled_ref == FALSE,
newABUNDANCE := predicted]
} else {
single_protein[, predicted := ifelse(censored, predicted, NA)]
single_protein[, newABUNDANCE := ifelse(censored, predicted, newABUNDANCE)]
single_protein[!(censored), predicted := NA]
single_protein[(censored),
newABUNDANCE := predicted]
}
survival = single_protein[, intersect(c(cols, "LABEL", "predicted"), colnames(single_protein)), with = FALSE]
} else {
Comment thread
tonywu1999 marked this conversation as resolved.
Expand Down
11 changes: 6 additions & 5 deletions R/utils_checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -211,9 +211,10 @@ MSstatsPrepareForDataProcess = function(input, log_base, fix_missing) {
cols = toupper(cols)
cols = intersect(c(cols, "FRACTION", "TECHREPLICATE"),
colnames(input))
input = input[, cols, with = FALSE]

input$PEPTIDE = paste(input$PEPTIDESEQUENCE,
drop_cols = setdiff(colnames(input), cols)
for (col in drop_cols) data.table::set(input, j = col, value = NULL)

input$PEPTIDE = paste(input$PEPTIDESEQUENCE,
Comment thread
mstaniak marked this conversation as resolved.
Outdated
input$PRECURSORCHARGE, sep = "_")
input$TRANSITION = paste(input$FRAGMENTION,
input$PRODUCTCHARGE, sep = "_")
Expand Down Expand Up @@ -322,8 +323,8 @@ setMethod(".checkDataValidity", "MSstatsValidated", .prepareForDataProcess)
input[, PROTEIN := factor(PROTEIN)]
input[, PEPTIDE := factor(PEPTIDE)]
input[, TRANSITION := factor(TRANSITION)]
input = input[order(LABEL, GROUP_ORIGINAL, SUBJECT_ORIGINAL,
RUN, PROTEIN, PEPTIDE, TRANSITION), ]
data.table::setorder(input, LABEL, GROUP_ORIGINAL, SUBJECT_ORIGINAL,
RUN, PROTEIN, PEPTIDE, TRANSITION)
input[, GROUP := factor(GROUP)]
input[, SUBJECT := factor(SUBJECT)]
input[, FEATURE := factor(FEATURE)]
Expand Down
37 changes: 15 additions & 22 deletions R/utils_feature_selection.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,29 +74,22 @@ MSstatsSelectFeatures = function(input, method, top_n = 3, min_feature_count = 2
#' @return data.table
#' @keywords internal
.selectHighQualityFeatures = function(input, min_feature_count) {
PROTEIN = PEPTIDE = FEATURE = originalRUN = ABUNDANCE = is_censored = NULL
PROTEIN = PEPTIDE = FEATURE = originalRUN = ABUNDANCE = censored = NULL
is_obs = log2inty = LABEL = NULL

cols = c("PROTEIN", "PEPTIDE", "FEATURE", "originalRUN", "LABEL",
"ABUNDANCE", "censored")
cols = intersect(cols, colnames(input))
input = input[, cols, with = FALSE]
if (!("censored" %in% cols)) {
input$censored = FALSE
}
data.table::setnames(input, "censored", "is_censored")
input = input[, list(protein = as.character(PROTEIN),
peptide = as.character(PEPTIDE),
feature = as.character(FEATURE),
run = as.character(originalRUN),
label = as.character(LABEL),
log2inty = ifelse(!(is.na(ABUNDANCE) | is_censored),
ABUNDANCE, NA),
is_censored)]
input[, is_obs := !(is.na(log2inty) | is_censored)]
input[, is_censored := NULL]

features_quality = data.table::rbindlist(lapply(split(input, input$label),

has_censored = is.element("censored", colnames(input))
work = input[, list(protein = as.character(PROTEIN),
Comment thread
Rudhik1904 marked this conversation as resolved.
Outdated
peptide = as.character(PEPTIDE),
feature = as.character(FEATURE),
run = as.character(originalRUN),
label = as.character(LABEL),
log2inty = ifelse(!(is.na(ABUNDANCE) |
if (has_censored) censored else FALSE),
ABUNDANCE, NA),
is_obs = FALSE)]
work[, is_obs := !is.na(log2inty)]

features_quality = data.table::rbindlist(lapply(split(work, work$label),
.flagUninformativeSingleLabel,
min_feature_count = min_feature_count))
features_quality
Expand Down
30 changes: 17 additions & 13 deletions R/utils_normalize.R
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
input[, ABUNDANCE_FRACTION := median(ABUNDANCE_RUN, na.rm = TRUE),
by = "FRACTION"]
input[, ABUNDANCE := ABUNDANCE - ABUNDANCE_RUN + ABUNDANCE_FRACTION]
input = input[, !(colnames(input) %in% c("ABUNDANCE_RUN", "ABUNDANCE_FRACTION")),
with = FALSE]
data.table::set(input, j = "ABUNDANCE_RUN", value = NULL)
data.table::set(input, j = "ABUNDANCE_FRACTION", value = NULL)
getOption("MSstatsLog")("Normalization based on median: OK")
input
}
Expand Down Expand Up @@ -255,7 +255,9 @@ MSstatsNormalize = function(input, normalization_method, peptides_dict = NULL, s
input[, ABUNDANCE := ABUNDANCE - median_by_run + median_by_fraction]

getOption("MSstatsLog")("INFO", "Normalization : normalization with global standards protein - okay")
input[ , !(colnames(input) %in% c("median_by_run", "median_by_fraction")), with = FALSE]
data.table::set(input, j = "median_by_run", value = NULL)
data.table::set(input, j = "median_by_fraction", value = NULL)
input
}


Expand Down Expand Up @@ -344,23 +346,25 @@ MSstatsMergeFractions = function(input) {
match_runs = unique(match_runs[, list(GROUP_ORIGINAL,
SUBJECT_ORIGINAL,
newRun)])

input = merge(input, match_runs,
by = c("GROUP_ORIGINAL", "SUBJECT_ORIGINAL"),
all.x = TRUE)

nr_idx = match_runs[input,
on = c("GROUP_ORIGINAL", "SUBJECT_ORIGINAL"),
which = TRUE, mult = "first"]
data.table::set(input, j = "newRun",
value = match_runs$newRun[nr_idx])
select_fraction = input[!is.na(ABUNDANCE) & input$ABUNDANCE > 0,
list(ncount = .N),
by = c("FEATURE", "FRACTION")]
select_fraction = select_fraction[ncount != 0]
select_fraction[, tmp := paste(FEATURE, FRACTION, sep = "_")]
input$tmp = paste(input$FEATURE, input$FRACTION, sep = "_")
input = input[tmp %in% select_fraction$tmp, ]
keep_idx = select_fraction[input,
on = c("FEATURE", "FRACTION"),
which = TRUE, mult = "first"]
input = input[!is.na(keep_idx)]
input$originalRUN = input$newRun
Comment thread
Rudhik1904 marked this conversation as resolved.
Outdated
input$RUN = input$originalRUN
input$RUN = factor(input$RUN, levels = unique(input$RUN),
input$RUN = factor(input$RUN, levels = unique(input$RUN),
labels = seq_along(unique(input$RUN)))
input = input[, !(colnames(input) %in% c('tmp','newRun')),
with = FALSE]
data.table::set(input, j = "newRun", value = NULL)
}
}
}
Expand Down
66 changes: 39 additions & 27 deletions R/utils_output.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,20 @@
#' output = output = MSstatsSummarizationOutput(input, summarized, processed,
#' method, impute, cens)
#'
MSstatsSummarizationOutput = function(input, summarized, processed,
MSstatsSummarizationOutput = function(input, summarized, processed,
method, impute, censored_symbol) {
LABEL = TotalGroupMeasurements = GROUP = Protein = RUN = NULL

input = .finalizeInput(input, summarized, method, impute, censored_symbol)
summarized = lapply(summarized, function(x) x[[1]])
summarized = data.table::rbindlist(summarized, fill = TRUE)

predicted_survival = data.table::rbindlist(lapply(summarized, function(x) x[[2]]),
fill = TRUE)
for (i in seq_along(summarized)) summarized[[i]][[2]] = NULL
input = .finalizeInput(input, predicted_survival, method, impute, censored_symbol)
rm(predicted_survival)
protein_summaries = lapply(summarized, function(x) x[[1]])
rm(summarized)
summarized = data.table::rbindlist(protein_summaries, fill = TRUE)
rm(protein_summaries)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

if (inherits(summarized, "try-error")) {
Comment thread
mstaniak marked this conversation as resolved.
Outdated
msg = paste("*** error : can't summarize per subplot with ",
method, ".")
Expand Down Expand Up @@ -82,18 +89,21 @@ MSstatsSummarizationOutput = function(input, summarized, processed,
"originalRUN", "censored", "INTENSITY", "ABUNDANCE",
"newABUNDANCE", "predicted", "feature_quality",
"is_outlier", "remove", "is_labeled_ref"), colnames(input))
input = input[, output_cols, with = FALSE]

drop_cols = setdiff(colnames(input), output_cols)
for (col in drop_cols) data.table::set(input, j = col, value = NULL)

if (is.element("remove", colnames(processed))) {
processed = processed[(remove),
intersect(output_cols,
processed = processed[(remove),
intersect(output_cols,
colnames(processed)), with = FALSE]
input = rbind(input, processed, fill = TRUE)
}
list(FeatureLevelData = as.data.frame(input),
ProteinLevelData = as.data.frame(rqall),
data.table::setDF(input)
data.table::setDF(rqall)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
list(FeatureLevelData = input,
ProteinLevelData = rqall,
SummaryMethod = method)

}


Expand All @@ -104,9 +114,9 @@ MSstatsSummarizationOutput = function(input, summarized, processed,
#' @param impute if TRUE, censored missing values were imputed
#' @param censored_symbol censored missing value indicator
#' @keywords internal
.finalizeInput = function(input, summarized, method, impute, censored_symbol) {
.finalizeInput = function(input, predicted_survival, method, impute, censored_symbol) {
# if (method == "TMP") {
input = .finalizeTMP(input, censored_symbol, impute, summarized)
input = .finalizeTMP(input, censored_symbol, impute, predicted_survival)
# } else {
# input = .finalizeLinear(input, censored_symbol)
# }
Expand All @@ -117,21 +127,23 @@ MSstatsSummarizationOutput = function(input, summarized, processed,
#' Summary statistics for output of TMP-based summarization
#' @inheritParams .finalizeInput
#' @keywords internal
.finalizeTMP = function(input, censored_symbol, impute, summarized) {
.finalizeTMP = function(input, censored_symbol, impute, predicted_survival) {
NonMissingStats = NumMeasuredFeature = MissingPercentage = LABEL = NULL
total_features = more50missing = nonmissing_orig = censored = NULL
INTENSITY = newABUNDANCE = NumImputedFeature = NULL

survival_predictions = lapply(summarized, function(x) x[[2]])
predicted_survival = data.table::rbindlist(survival_predictions, fill = TRUE)

if (impute) {
cols = intersect(colnames(input), c("newABUNDANCE",
"cen", "RUN",
"FEATURE", "ref_covariate", "LABEL"))
input = merge(input[, colnames(input) != "newABUNDANCE", with = FALSE],
predicted_survival,
by = setdiff(cols, "newABUNDANCE"),
all.x = TRUE)
join_cols = intersect(intersect(colnames(input),
colnames(predicted_survival)),
c("cen", "RUN", "FEATURE", "ref_covariate",
"LABEL"))
data.table::set(input, j = "newABUNDANCE", value = NULL)
idx = predicted_survival[input, on = join_cols, which = TRUE,
mult = "first"]
data.table::set(input, j = "newABUNDANCE",
value = predicted_survival$newABUNDANCE[idx])
data.table::set(input, j = "predicted",
value = predicted_survival$predicted[idx])
}
input[, NonMissingStats := .getNonMissingFilterStats(.SD, censored_symbol)]
input[, NumMeasuredFeature := sum(NonMissingStats),
Expand All @@ -144,7 +156,7 @@ MSstatsSummarizationOutput = function(input, summarized, processed,
} else {
input[, nonmissing_orig := !is.na(INTENSITY)]
}
input[, nonmissing_orig := ifelse(is.na(newABUNDANCE), TRUE, nonmissing_orig)]
input[is.na(newABUNDANCE), nonmissing_orig := TRUE]
if (impute) {
input[, NumImputedFeature := sum(!nonmissing_orig),
by = c("PROTEIN", "RUN", "LABEL")]
Expand Down Expand Up @@ -175,7 +187,7 @@ MSstatsSummarizationOutput = function(input, summarized, processed,
} else {
input[, nonmissing_orig := !is.na(INTENSITY)]
}
input[, nonmissing_orig := ifelse(is.na(newABUNDANCE), TRUE, nonmissing_orig)]
input[is.na(newABUNDANCE), nonmissing_orig := TRUE]
input[, NumImputedFeature := 0]
}
input
Expand Down
Loading
Loading