Title: | ProteinAnnotation |
---|---|
Description: | Display protein information. |
Authors: | Ben Bruyneel <[email protected]> |
Maintainer: | Ben Bruyneel <[email protected]> |
License: | GPL (>= 3) |
Version: | 0.5.1 |
Built: | 2025-02-15 04:10:36 UTC |
Source: | https://github.com/BenBruyneel/proteinAnnotation |
Takes data.frame with position data and adds it as an extra column to a protein data data.frame Positions not covered in the added data.frame are set to the argument NAValue (default is integer 0)
addDataToProtein( proteinDF, dataframe, dataColumn, dataName, positionColumn = "position", NAValue = 0L, strictPosition = TRUE )
addDataToProtein( proteinDF, dataframe, dataColumn, dataName, positionColumn = "position", NAValue = 0L, strictPosition = TRUE )
proteinDF |
the protein data.frame to add the info to. Main requirements are that there is a column named position, that there is no column named 'newData____' (internal use) and no column with the same name as defined by the 'dataName' argument |
dataframe |
data.frame with the data to be added. Should have two columns: one with the position info and another with the data relating to these positions |
dataColumn |
name of the column in dataframe with the data to be added |
dataName |
name of the new data column in the protein data data.frame |
positionColumn |
name of the position column in dataframe which has the position info (default is 'position') |
NAValue |
value for the position in the protein data data.frame which are missing in the (new) dataframe data |
strictPosition |
logical vector. Default is TRUE: if the positions in the (new) dataframe are outside the range of the protein data, then an error will be generated. If FALSE, then only a warning is generated |
data.frame
bsaProtein <- createProteinData(sequence = standardProtein("bsa"), nterm = 1, start = 25) toAdd <- data.frame(position = c(25:30, 100:110), data = c(rep(1,10), rep(3, 3),rep(2,4))) bsaProtein <- addDataToProtein(proteinDF = bsaProtein, dataframe = toAdd, dataColumn = "data", dataName = "found") bsaProtein$position[bsaProtein$sequence == "K" | bsaProtein$sequence == "R"] trypticSites <- data.frame(position = bsaProtein$position[ bsaProtein$sequence == "K" | bsaProtein$sequence == "R"], data = "tryptic") bsaProtein <- addDataToProtein(proteinDF = bsaProtein, dataframe = trypticSites, dataColumn = "data", dataName = "trypticSite", NAValue = "") bsaProtein[95:110,]
bsaProtein <- createProteinData(sequence = standardProtein("bsa"), nterm = 1, start = 25) toAdd <- data.frame(position = c(25:30, 100:110), data = c(rep(1,10), rep(3, 3),rep(2,4))) bsaProtein <- addDataToProtein(proteinDF = bsaProtein, dataframe = toAdd, dataColumn = "data", dataName = "found") bsaProtein$position[bsaProtein$sequence == "K" | bsaProtein$sequence == "R"] trypticSites <- data.frame(position = bsaProtein$position[ bsaProtein$sequence == "K" | bsaProtein$sequence == "R"], data = "tryptic") bsaProtein <- addDataToProtein(proteinDF = bsaProtein, dataframe = trypticSites, dataColumn = "data", dataName = "trypticSite", NAValue = "") bsaProtein[95:110,]
takes a 3d object of the class 'r3dmol' as created via the 'r3dmol' library amd
adds the colors as defined in the protein data. The function m_add_style
is used
for this.
addStyleElement( object3D, proteinDF, positionColumn = "position", colorColumn, colorsReduce = T, style = r3dmol::m_style_cartoon, ... )
addStyleElement( object3D, proteinDF, positionColumn = "position", colorColumn, colorsReduce = T, style = r3dmol::m_style_cartoon, ... )
object3D |
a r3dmol object of a protein information file (pdf, cif) |
proteinDF |
protein data data.frame with at least two columns: position & color |
positionColumn |
name of the column containing the positions to be colored (default: 'position') |
colorColumn |
name of the column containing the colors to be used for the positions |
colorsReduce |
this allows for a smaller number of style objects to be added to the object3D |
style |
function that defines the style to be 'added' to the object3D object (default:
|
... |
for adding sdditional argunebts to the style-function. For 'm_style_cartoon', e.g.
|
a r3dmol object
# create protein data ovaProtein <- proteinCoverage(sequence = standardProtein(), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1) # mapping Abundance information ovaProtein <- mapPeptidesToProtein(proteinDF = ovaProtein, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1L, variable = "Abundances_1", dataName = "Abundance", combineFunction = sum) # translate the colors logColors <- translateToGradientColors(minValue = 1E3, maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10) ovaProtein$AbundanceLogColors <- logColors(ovaProtein$Abundance) library(r3dmol) # get pdb file from alphafold downloadedFile <- tempfile(fileext = ".pdb") download.file("https://alphafold.ebi.ac.uk/files/AF-P01012-F1-model_v4.pdb", destfile = downloadedFile) # create r3dmol object ## Not run: p3d <- r3dmol(viewer_spec = m_viewer_spec( cartoonQuality = 20, lowerZoomLimit = 50, upperZoomLimit = 350 )) %>% m_add_model(downloadedFile) %>% m_set_style(style = m_style_cartoon(arrows = F)) %>% m_zoom_to() p3d # add colors addStyleElement(p3d, proteinDF = ovaProtein, colorColumn = "AbundanceColors", arrows = FALSE) addStyleElement(p3d, proteinDF = ovaProtein, colorColumn = "AbundanceLogColors", arrows = FALSE) ## End(Not run)
# create protein data ovaProtein <- proteinCoverage(sequence = standardProtein(), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1) # mapping Abundance information ovaProtein <- mapPeptidesToProtein(proteinDF = ovaProtein, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1L, variable = "Abundances_1", dataName = "Abundance", combineFunction = sum) # translate the colors logColors <- translateToGradientColors(minValue = 1E3, maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10) ovaProtein$AbundanceLogColors <- logColors(ovaProtein$Abundance) library(r3dmol) # get pdb file from alphafold downloadedFile <- tempfile(fileext = ".pdb") download.file("https://alphafold.ebi.ac.uk/files/AF-P01012-F1-model_v4.pdb", destfile = downloadedFile) # create r3dmol object ## Not run: p3d <- r3dmol(viewer_spec = m_viewer_spec( cartoonQuality = 20, lowerZoomLimit = 50, upperZoomLimit = 350 )) %>% m_add_model(downloadedFile) %>% m_set_style(style = m_style_cartoon(arrows = F)) %>% m_zoom_to() p3d # add colors addStyleElement(p3d, proteinDF = ovaProtein, colorColumn = "AbundanceColors", arrows = FALSE) addStyleElement(p3d, proteinDF = ovaProtein, colorColumn = "AbundanceLogColors", arrows = FALSE) ## End(Not run)
Takes a data.frame and adds a 'row' and 'col' column which together define which cell in the rows*columns-matrix a data.frame-row maps to
boxData( data, ncols = NA, adjustRows = TRUE, emptySequence = "", replaceNA = NA )
boxData( data, ncols = NA, adjustRows = TRUE, emptySequence = "", replaceNA = NA )
data |
data.frame, should not already have a 'row' and/or 'col' column |
ncols |
number of required columns for the matrix |
adjustRows |
logical vector, default is TRUE. Defines whether the number of rows for the data.frame is allowed to be adjusted. Setting this to FALSE may lead to 'stop'-error if data doesn't fit the requested matrix. |
emptySequence |
if data.frame is extended, this character vector is used to sequence in the new rows. The position column is simply filled with increasing integers. |
replaceNA |
default is NA, if defined as something else, then ALL NA values in the data.frame will be replace with this argument. This is an admittedly crude way to replace NA values. If more finesse is needed, leave at default and handle NA values in followup functions such as 'replace_na' from the tidyr package |
a data.frame with extra columns (row and col) and possible extra rows
If the data.frame extended: the position column and the sequence are filled with increasing integers (starting from the max position) and the 'emptySequence' argument respectively. Other columns in the extended using NA
boxData(createProteinData(standardProtein(), start = 2, nterm = 1), ncols = 50) |> head(20) boxData(createProteinData(standardProtein(), start = 2, nterm = 1), ncols = 50) |> tail(20) boxData(createProteinData(standardProtein(), start = 2, nterm = 1), ncols = 50)[40:60,]
boxData(createProteinData(standardProtein(), start = 2, nterm = 1), ncols = 50) |> head(20) boxData(createProteinData(standardProtein(), start = 2, nterm = 1), ncols = 50) |> tail(20) boxData(createProteinData(standardProtein(), start = 2, nterm = 1), ncols = 50)[40:60,]
Calculates/defines the dimensions of the 'matrix' to use to display data of a certain length (e.g. a protein sequence)
boxDimensions( vectorLength = 100, ratio = NA, increaseColumns = FALSE, ncol = NA, nrow = NA, noWarning = FALSE )
boxDimensions( vectorLength = 100, ratio = NA, increaseColumns = FALSE, ncol = NA, nrow = NA, noWarning = FALSE )
vectorLength |
minimum number of cells for the resulting columns*rows matrix |
ratio |
the intended ratio the resulting columns*rows matrix. Increase ratio to get more columns, decrease to get more rows. |
increaseColumns |
if TRUE, then the number of columns will be increased to reach the number of matrix cells required. If FALSE then the number of rows will be increased to do this. Default is FALSE |
ncol |
defines the number of columns to use. If not also defined, the number of rows is adapted to fit all of the required number of cells for the matrix |
nrow |
defines the number of rows to use. If not also defined, the number of columns is adapted to fit all of the required number of cells for the matrix |
noWarning |
logical vector, if product of the number of rows and the number of columns is too small for the number of matrix cells (vector length) required then a warning will displayed (default noWarning is FALSE), unless noWarning is TRUE |
a named numeric vector (names: ncol, nrow)
Order of importance for the ratio, ncol and nrow arguments: if the ratio is NA (default), then the ncol argument is used, if possible with the nrow argument. If ncol is NA, then the nrow argument is used.
boxDimensions(385, 5) boxDimensions(385, nrow = 9) boxDimensions(400, 10) boxDimensions(400, ncol = 60)
boxDimensions(385, 5) boxDimensions(385, nrow = 9) boxDimensions(400, 10) boxDimensions(400, ncol = 60)
Helper function to check whether a character vector of format modification,splitCahracter,modification,splitCahracter, (etc etc) contains a certain modification
containsModification( modifications, whichModification, strict = TRUE, splitCharacter = ",", trim = TRUE )
containsModification( modifications, whichModification, strict = TRUE, splitCharacter = ",", trim = TRUE )
modifications |
character vector of format |
whichModification |
the modification to search for (character vector) |
strict |
logical vector, default is TRUE: the search for the modification is strict. If FALSE then the search goes via the 'grepl' function and regular expressions can be used |
splitCharacter |
character vector used to split the modifications argument |
trim |
logical vector. Default is TRUE: the modifications will be trimmed (spaces on both sides removed). When FALSE, this is not done |
logical vector
containsModification(c('Oxidation,Phospho','Carbamidomethyl,Phospho'), whichModification = "Phospho") containsModification(c('Oxidation , Phospho','Carbamidomethyl, Phospho'), whichModification = "Phospho") containsModification(c('Oxidation , Phospho','Carbamidomethyl, Phospho'), whichModification = "Phospho", trim = TRUE) containsModification(c('Oxidation , Phospho','Carbamidomethyl, Phospho'), whichModification = "Phospho", strict = FALSE)
containsModification(c('Oxidation,Phospho','Carbamidomethyl,Phospho'), whichModification = "Phospho") containsModification(c('Oxidation , Phospho','Carbamidomethyl, Phospho'), whichModification = "Phospho") containsModification(c('Oxidation , Phospho','Carbamidomethyl, Phospho'), whichModification = "Phospho", trim = TRUE) containsModification(c('Oxidation , Phospho','Carbamidomethyl, Phospho'), whichModification = "Phospho", strict = FALSE)
Create a protein data.frame with basic information: position & sequence. In the resulting data.frame, every row has an amino acid (letter, sequence column) and the position of that amino acid in the protein (position column)
createProteinData( sequence, start = 1, end = nchar(sequence), nterm = NA, cterm = NA, emptySequence = " " )
createProteinData( sequence, start = 1, end = nchar(sequence), nterm = NA, cterm = NA, emptySequence = " " )
sequence |
character vector, protein sequence in the form of the usual letter codes |
start |
first position in the sequence to be used |
end |
last position in the sequence to be used |
nterm |
start position 'adjustment'. Adds a number of 'empty' rows to the head of the resulting data.frame to have it start at position nterm |
cterm |
end position 'adjustment'. Adds a number of 'empty' rows to the tail of the resulting data.frame to have it ends at position cterm |
emptySequence |
default " ", character vector for the sequence column to use when adding rows. Note: other columns will also be extended and usually add NA's as a value |
data.frame with two columns: position (integer vector) and sequence (amino acid letters: character vector)
nterm stands for n-terminus, cterm stands for c-terminus
createProteinData(sequence = standardProtein("bsa"), start = 25) |> head(25) createProteinData(sequence = standardProtein("bsa"), nterm = 1, start = 25) |> head(25) createProteinData(sequence = standardProtein("bsa"), start = 25) |> tail(10) createProteinData(sequence = standardProtein("bsa"), start = 25, nterm = 1, cterm = 610) |> tail(10)
createProteinData(sequence = standardProtein("bsa"), start = 25) |> head(25) createProteinData(sequence = standardProtein("bsa"), nterm = 1, start = 25) |> head(25) createProteinData(sequence = standardProtein("bsa"), start = 25) |> tail(10) createProteinData(sequence = standardProtein("bsa"), start = 25, nterm = 1, cterm = 610) |> tail(10)
Takes a single modification table row with one or more modification definitions (as produced by modPositionsToDF
)
and makes it into a single-modification-per-row data.frame. A row with 'x' modifications becomes 'x' number of rows. Essentially
the opposite of modPositionsToDF
.
dfToModPositions(df, splitChar = ",", returnEmptyRow = TRUE)
dfToModPositions(df, splitChar = ",", returnEmptyRow = TRUE)
df |
a single row data.frame having the 'collapsed' columns (modifications, positions, letters and numbers) Each modification(-type) is separated by the argument 'splitChar' |
splitChar |
character vector specifying the separation 'character' that is placed between the modification definitions |
returnEmptyRow |
logical vector, default is TRUE. TRUE: if no modifications are present in the 'df' argument, a data.frame will be returned without rows. FALSE: in that case a data.frame with a single row, with in every column an empty character vector, will be returned. |
a data.frame with columns: modifications, positions, letters and numbers
modPositions(OVATable("peptide")$Modifications[32]) |> modPositionsToDF() |> dfToModPositions() modPositions(OVATable("peptide")$Modifications[2]) |> modPositionsToDF() |> dfToModPositions() modPositions(OVATable("peptide")$Modifications[1]) modPositions(OVATable("peptide")$Modifications[1]) |> modPositionsToDF() modPositions(OVATable("peptide")$Modifications[1]) |> modPositionsToDF() |> dfToModPositions()
modPositions(OVATable("peptide")$Modifications[32]) |> modPositionsToDF() |> dfToModPositions() modPositions(OVATable("peptide")$Modifications[2]) |> modPositionsToDF() |> dfToModPositions() modPositions(OVATable("peptide")$Modifications[1]) modPositions(OVATable("peptide")$Modifications[1]) |> modPositionsToDF() modPositions(OVATable("peptide")$Modifications[1]) |> modPositionsToDF() |> dfToModPositions()
displays the protein data in a data frame via ggplot2's geom_tile
displayProtein( proteinData, columns = NA, emptySequence = "", replaceNA = NA, backGroundColor = "white", frontColor = "black", borderColor = backGroundColor, defaultTextColor = "black", textColorColumn = NA, replaceTextNA = "0", textColorLevels = NA, textColorLabels = NA, textColors = c("black"), textColorFunction = NA, showTextLegend = F, textLegendTitle = textColorColumn, textLegendDirection = "vertical", fillColorColumn = NA, replaceFillNA = NA, fillColorLevels = NA, fillColorLabels = NA, fillColors = backGroundColor, fillTransformation = NA, fillColorsMinMax = NA, fillFunction = NA, showFillLegend = T, fillLegendTitle = fillColorColumn, fillLegendDirection = "horizontal", legendPosition = "bottom", legendBox = "vertical", naFillColor = backGroundColor, textSize = 4, textFontFace = "bold", linewidth = 0.35, alpha = 1, tileWidth = 1, tileHeight = 1, title = ggplot2::waiver(), subtitle = ggplot2::waiver(), caption = ggplot2::waiver() )
displayProtein( proteinData, columns = NA, emptySequence = "", replaceNA = NA, backGroundColor = "white", frontColor = "black", borderColor = backGroundColor, defaultTextColor = "black", textColorColumn = NA, replaceTextNA = "0", textColorLevels = NA, textColorLabels = NA, textColors = c("black"), textColorFunction = NA, showTextLegend = F, textLegendTitle = textColorColumn, textLegendDirection = "vertical", fillColorColumn = NA, replaceFillNA = NA, fillColorLevels = NA, fillColorLabels = NA, fillColors = backGroundColor, fillTransformation = NA, fillColorsMinMax = NA, fillFunction = NA, showFillLegend = T, fillLegendTitle = fillColorColumn, fillLegendDirection = "horizontal", legendPosition = "bottom", legendBox = "vertical", naFillColor = backGroundColor, textSize = 4, textFontFace = "bold", linewidth = 0.35, alpha = 1, tileWidth = 1, tileHeight = 1, title = ggplot2::waiver(), subtitle = ggplot2::waiver(), caption = ggplot2::waiver() )
proteinData |
the protein data data.frame |
columns |
numeric vector that specifies the width (in characters) of the protein data display |
emptySequence |
if the protein data is extended, this character vector is used to put in the sequence column in the new rows. The position column is simply filled with increasing integers. |
replaceNA |
default is NA, if defined as something else, then ALL NA values in the protein data will be replace with this argument. Ignored if columns is not NA. |
backGroundColor |
background color to use for the 'tiles' if no fill parameters are set. Default is 'white' |
frontColor |
color to use to border the 'tiles' in the fill color legend. If no text color parameters are set Default = 'black' |
borderColor |
color to use for the 'tiles'. Default is 'backGroundColor' |
defaultTextColor |
color to use for the text in the 'tiles' if no text color parameters are set Default = 'black' |
textColorColumn |
character vector: name of the column in the proteinData data.frame that contains info for the color of the text in the 'tiles'. Note: this should not be continuous data |
replaceTextNA |
vector that specifies with what to replace NA if there are NA values in the textColorColumn in the protein data. Default is "0" (for coverage purposes) |
textColorLevels |
if the data in textColor column is non-continous, eg character data, then this argument allows definition/ordering of the levels during the 'transformation' into factors (done) during visualization of the text colors |
textColorLabels |
if the data in textColor Column is non-continous, eg character data, then this argument allows definition/ordering of the names of the levels
during the 'transformation' into factors (done) during visualization of the text colors. See also |
textColors |
vector with the text colors to use. Note: the number required is defined by the number of 'discrete' values in the textColor column of the protein data |
textColorFunction |
allows custom definition of the function scale_color_ - function to be used during display, see example on 'fillFunction' argument |
showTextLegend |
logical vector, if TRUE show the legend for the text color(s) |
textLegendTitle |
title to use for the legend of the text colors |
textLegendDirection |
direction text color legend. Default is 'vertical'. Alternative is 'horizontal' |
fillColorColumn |
character vector: name of the column in the proteinData data.frame that contains info for the fillcolor of the 'tiles'. Note: this should be continuous data. If discrete data, then fillColorLevels can be defined for more exact coloring |
replaceFillNA |
vector that specifies with what to replace NA if there are NA values in the fillColorColumn in the protein data |
fillColorLevels |
if the data in fillColor column is non-continous, eg character data, then this argument allows definition/ordering of the names of the levels
during the 'transformation' into factors (done) during visualization of the text colors. See also |
fillColorLabels |
vector with the text colors to use. Note: the number required is defined by the number of 'discrete' values in the fillColor column of the protein data |
fillColors |
the lower (minimum) and upper (maximum) bound colors to use for fillcolor of the tiles. Usually this is just two different colors: the 'tiles' will be filled with a mixture of both depending on the values in the textColorColumn |
fillTransformation |
this allows the scale of the fill colors to be non-linear. Default is NA. One frequently used is 'log10' to generate
a logarithmic scale. Other options can be found in |
fillColorsMinMax |
the minimum and maximum value to use for forming gradient colors for the tiles. If some values in the textColorColumn are outside the range c(minimum, maximum), the some unexpected things may happen. Usually this is used to 'stretch' the color scale. Only works when working with a continuous scake |
fillFunction |
allows for precise setting of the fill scale, see examples |
showFillLegend |
logical vector, if TRUE show the legend for the fill color(s) |
fillLegendTitle |
title to use for the legend of the text colors |
fillLegendDirection |
direction fill color legend. Default is 'horizontal'. Alternative is 'vertical' |
legendPosition |
character vector: where to put the legends ("none", "left", "right", "bottom", "top"). Default is 'bottom' |
legendBox |
arrangement of multiple legends ("horizontal" or "vertical") |
naFillColor |
still used ????????????? <———— |
textSize |
default is 4. Sets the size of the text in the 'tiles' |
textFontFace |
allows manipulation of the text font used. Default is 'bold'. Alternatives are "plain", "italic", & "bold.italic" |
linewidth |
sets the width of the lines between the 'tiles', default is 0.35 |
alpha |
sets the alpha for the fill color of the 'tiles'. Default is 1 |
tileWidth |
sets the width for the 'tiles', default is 1 |
tileHeight |
sets the height for the 'tiles', default is 1 |
title |
specifies the title |
subtitle |
specifies the subtitle |
caption |
specifies the caption |
a ggplot object
newTable <- proteinCoverage(sequence = standardProtein("OVA"), start = 2, nterm = 1, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins") newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_1", dataName = "Abundances_1") newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_2", dataName = "Abundances_2") newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "XCorrbySearchEngine_1", dataName = "Score") newTable[35:40,] # display protein sequence info displayProtein(newTable, columns = 50) displayProtein(newTable, columns = 50, textColorColumn = "sequence", textColors = rainbow(21)) displayProtein(newTable, columns = 50, textColorColumn = "sequence", textColors = terrain.colors(21)) # simply display sequencing coverage displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red")) displayProtein(boxData(newTable, ncols = 50), textColorColumn = "coverage", textColors = c("black","red")) displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), backGroundColor = "lightblue") displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin") # display score & coverage displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin", fillColorColumn = "Score", fillColors = c("white","brown"), alpha = 0.5) displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin", fillColorColumn = "Score", fillColors = c("white","brown"), alpha = 0.5, fillColorsMinMax = c(0,50)) # display abundance & coverage displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_1", fillColors = c("white","brown"), alpha = 0.5) displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_1", fillColors = c("white","brown"), alpha = 0.5, fillColorsMinMax = c(1E3, 1.5E10)) displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_2", fillColors = c("white","brown"), alpha = 0.5, fillColorsMinMax = c(1E3, 1.5E10)) # display abundance logarithmic color scale & coverage displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_1", fillColors = c("white","#8b0000"), alpha = 0.5, fillColorsMinMax = c(1E6, 1.5E10), fillTransformation = "log10") displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_2", fillColors = c("white","#8b0000"), alpha = 0.5, fillColorsMinMax = c(1E6, 1E11), fillTransformation = "log10") displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_1", fillColors = c("white","#8b0000"), alpha = 0.5, fillColorsMinMax = c(1E6, 1.5E10), fillTransformation = "log10", fillLegendTitle = "Abundance", fillLegendDirection = "vertical", legendPosition = "right") # use discrete data for fill can be done via 'fillFunction' argument trypticSites <- getPeptideStart(proteinSequence = standardProtein("OVA"), peptideSequence = "(?<!P)R|(?<!P)K") trypticSites <- data.frame(position = trypticSites, data = "tryptic") trypticSites newTable <- addDataToProtein(proteinDF = newTable, dataframe = trypticSites, dataColumn = "data", dataName = "trypticSite", NAValue = "") newTable[75:90,] # w/o using the fillFunction argument displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin - tryptic sites", fillColorColumn = "trypticSite", fillColors = c("white","yellow"), replaceFillNA = "", fillColorLevels = c("","tryptic"), fillColorLabels = c("Non-tryptic site", "Tryptic site"), showFillLegend = FALSE, borderColor = "black") # using the fillFunction argument displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin - tryptic sites", fillColorColumn = "trypticSite", fillFunction = ggplot2::scale_fill_manual(values = c("white", "yellow"), na.value = "white"), showFillLegend = FALSE, borderColor = "black")
newTable <- proteinCoverage(sequence = standardProtein("OVA"), start = 2, nterm = 1, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins") newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_1", dataName = "Abundances_1") newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_2", dataName = "Abundances_2") newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "XCorrbySearchEngine_1", dataName = "Score") newTable[35:40,] # display protein sequence info displayProtein(newTable, columns = 50) displayProtein(newTable, columns = 50, textColorColumn = "sequence", textColors = rainbow(21)) displayProtein(newTable, columns = 50, textColorColumn = "sequence", textColors = terrain.colors(21)) # simply display sequencing coverage displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red")) displayProtein(boxData(newTable, ncols = 50), textColorColumn = "coverage", textColors = c("black","red")) displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), backGroundColor = "lightblue") displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin") # display score & coverage displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin", fillColorColumn = "Score", fillColors = c("white","brown"), alpha = 0.5) displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin", fillColorColumn = "Score", fillColors = c("white","brown"), alpha = 0.5, fillColorsMinMax = c(0,50)) # display abundance & coverage displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_1", fillColors = c("white","brown"), alpha = 0.5) displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_1", fillColors = c("white","brown"), alpha = 0.5, fillColorsMinMax = c(1E3, 1.5E10)) displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_2", fillColors = c("white","brown"), alpha = 0.5, fillColorsMinMax = c(1E3, 1.5E10)) # display abundance logarithmic color scale & coverage displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_1", fillColors = c("white","#8b0000"), alpha = 0.5, fillColorsMinMax = c(1E6, 1.5E10), fillTransformation = "log10") displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_2", fillColors = c("white","#8b0000"), alpha = 0.5, fillColorsMinMax = c(1E6, 1E11), fillTransformation = "log10") displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), title = "Ovalbumin", fillColorColumn = "Abundances_1", fillColors = c("white","#8b0000"), alpha = 0.5, fillColorsMinMax = c(1E6, 1.5E10), fillTransformation = "log10", fillLegendTitle = "Abundance", fillLegendDirection = "vertical", legendPosition = "right") # use discrete data for fill can be done via 'fillFunction' argument trypticSites <- getPeptideStart(proteinSequence = standardProtein("OVA"), peptideSequence = "(?<!P)R|(?<!P)K") trypticSites <- data.frame(position = trypticSites, data = "tryptic") trypticSites newTable <- addDataToProtein(proteinDF = newTable, dataframe = trypticSites, dataColumn = "data", dataName = "trypticSite", NAValue = "") newTable[75:90,] # w/o using the fillFunction argument displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin - tryptic sites", fillColorColumn = "trypticSite", fillColors = c("white","yellow"), replaceFillNA = "", fillColorLevels = c("","tryptic"), fillColorLabels = c("Non-tryptic site", "Tryptic site"), showFillLegend = FALSE, borderColor = "black") # using the fillFunction argument displayProtein(newTable, columns = 50, textColorColumn = "coverage", textColors = c("black","red"), textLegendTitle = "Coverage", showTextLegend = TRUE, textColorLabels = c("No Coverage", "Coverage"), title = "Ovalbumin - tryptic sites", fillColorColumn = "trypticSite", fillFunction = ggplot2::scale_fill_manual(values = c("white", "yellow"), na.value = "white"), showFillLegend = FALSE, borderColor = "black")
displays the protein data in a data frame as text (for console use) or HTML (for use with gt table package). Obviously more limited than the 'displayProtein' function
displayProteinText( proteinData, columns = NA, emptySequence = "", textColorColumn = NA, textColors = NA, textBackgroundColorColumn = NA, textBackgroundColors = NA, HTMLoutput = FALSE, linebreakChar = ifelse(HTMLoutput, "<br>", "\n"), returnText = FALSE )
displayProteinText( proteinData, columns = NA, emptySequence = "", textColorColumn = NA, textColors = NA, textBackgroundColorColumn = NA, textBackgroundColors = NA, HTMLoutput = FALSE, linebreakChar = ifelse(HTMLoutput, "<br>", "\n"), returnText = FALSE )
proteinData |
the protein data data.frame |
columns |
numeric vector that specifies the width (in characters) of the protein data display |
emptySequence |
character vector that specifies which value is used for empty pieces of sequence at the beginning or the end of the protein data.frame
please note that these rows, with this in the sequence column, at the beginning of the data.frame are removed for display. The first non-'emptySequence' row
will be the first part displayed (see also examples). If this argument is not the same as the 'emptySequence' argument used in eg |
textColorColumn |
character vector: name of the column in the proteinData data.frame that contains info for the color of the text in the 'tiles'. Note: this should not be continuous data |
textColors |
vector withthe text colors to use. Note: the number required is defined by the number of 'discrete' values in the textColor column of the protein data |
textBackgroundColorColumn |
character vector: name of the column in the proteinData data.frame that contains info for the backcolor of the text in the 'tiles'. Note: this should not be continuous data |
textBackgroundColors |
vector with the text background colors to use. Note: the number required is defined by the number of 'discrete' values in the textBackground color column of the protein data |
HTMLoutput |
logical value. Default is FALSE, if TRUE then the output will be a character vector formatted for display in an HTML environment |
linebreakChar |
character vector that is used to split the resulting character vector. For console text this is '\n', for HTML output it's '< br >'. Can be adjusted to eg '< hr >' for HTML output |
returnText |
default is FALSE. If no textColor and no textBackgroundColor columns are defined, then setting this to TRUE can be used to retrieve the result w/o printing it to the screen (it will contain line breaks) |
character vector
colors for display in the console are limited. Only the ones defined by the package 'crayon' should be used. For HTML use there's (obviously) more freedom
newTable <- proteinCoverage(sequence = standardProtein("OVA"), start = 2, nterm = 1, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", emptySequence = " ") trypticSites <- getPeptideStart(proteinSequence = standardProtein("OVA"), peptideSequence = "(?<!P)R|(?<!P)K") trypticSites <- data.frame(position = trypticSites, data = 1) trypticSites newTable <- addDataToProtein(proteinDF = newTable, dataframe = trypticSites, dataColumn = "data", dataName = "trypticSite", NAValue = 0) newTable[46:55,] # print to console # displayProteinText(proteinData = newTable, columns = 75, # textColorColumn = "coverage", textColors = c("white","red"), emptySequence = " ") # displayProteinText(proteinData = newTable, columns = 75, textColorColumn = "coverage", # textColors = c("white","red"), textBackgroundColorColumn = "trypticSite", # textBackgroundColors = c("black","yellow"), emptySequence = " ") # HTML output via gt package withTryptic <- displayProteinText(proteinData = newTable, columns = 75, textColorColumn = "coverage", textColors = c("black","red"), textBackgroundColorColumn = "trypticSite", textBackgroundColors = c("white","yellow"), emptySequence = " ", HTMLoutput = TRUE) withoutTryptic <- displayProteinText(proteinData = newTable, columns = 75, textColorColumn = "coverage", textColors = c("black","red"), emptySequence = " ", HTMLoutput = TRUE) different <- displayProteinText(proteinData = newTable, columns = 75, textColorColumn = "coverage", textColors = c("blue","green"), emptySequence = " ", HTMLoutput = TRUE, linebreakChar = "<hr>") library(gt) data.frame(Description = c("Coverage without tryptic sites", "Coverage with tryptic sites", "A different display..."), Sequence = c(withoutTryptic, withTryptic, different)) |> gt::gt() |> gt::fmt_markdown(columns = Sequence) |> gt::tab_style( style = list( gt::cell_text(font = "courier") ), locations = gt::cells_body( columns = "Sequence" ) )
newTable <- proteinCoverage(sequence = standardProtein("OVA"), start = 2, nterm = 1, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", emptySequence = " ") trypticSites <- getPeptideStart(proteinSequence = standardProtein("OVA"), peptideSequence = "(?<!P)R|(?<!P)K") trypticSites <- data.frame(position = trypticSites, data = 1) trypticSites newTable <- addDataToProtein(proteinDF = newTable, dataframe = trypticSites, dataColumn = "data", dataName = "trypticSite", NAValue = 0) newTable[46:55,] # print to console # displayProteinText(proteinData = newTable, columns = 75, # textColorColumn = "coverage", textColors = c("white","red"), emptySequence = " ") # displayProteinText(proteinData = newTable, columns = 75, textColorColumn = "coverage", # textColors = c("white","red"), textBackgroundColorColumn = "trypticSite", # textBackgroundColors = c("black","yellow"), emptySequence = " ") # HTML output via gt package withTryptic <- displayProteinText(proteinData = newTable, columns = 75, textColorColumn = "coverage", textColors = c("black","red"), textBackgroundColorColumn = "trypticSite", textBackgroundColors = c("white","yellow"), emptySequence = " ", HTMLoutput = TRUE) withoutTryptic <- displayProteinText(proteinData = newTable, columns = 75, textColorColumn = "coverage", textColors = c("black","red"), emptySequence = " ", HTMLoutput = TRUE) different <- displayProteinText(proteinData = newTable, columns = 75, textColorColumn = "coverage", textColors = c("blue","green"), emptySequence = " ", HTMLoutput = TRUE, linebreakChar = "<hr>") library(gt) data.frame(Description = c("Coverage without tryptic sites", "Coverage with tryptic sites", "A different display..."), Sequence = c(withoutTryptic, withTryptic, different)) |> gt::gt() |> gt::fmt_markdown(columns = Sequence) |> gt::tab_style( style = list( gt::cell_text(font = "courier") ), locations = gt::cells_body( columns = "Sequence" ) )
Gives the end position of a character vector with format 'xxx-yyy'. Convenience function
endPosition(string, splitCharacter = "-")
endPosition(string, splitCharacter = "-")
string |
character vector with format 'xxx-yyy', where xxx is the start position and yyy is the end position |
splitCharacter |
default is "-", which is used by Proteome Discoverer |
numeric vector
endPosition("901-902") endPosition("1-102")
endPosition("901-902") endPosition("1-102")
takes a data.frame with peptide information (like a peptide table from Proteome Discoverer) and gives the position(S) of a specified modification(s) (if present)
getAllModPositionInProtein( peptideTable, Accession, exact = TRUE, whichModification, positionColumn, clearPosition = 1L, unclearPosition = 2L, sumPositions = FALSE )
getAllModPositionInProtein( peptideTable, Accession, exact = TRUE, whichModification, positionColumn, clearPosition = 1L, unclearPosition = 2L, sumPositions = FALSE )
peptideTable |
a data.frame with columns 'Sequence', 'Modifications' and a position column (see 'positionColumn' argument). This is what Proteome Discoverer produces |
Accession |
character vector that specifies from which protein Accession to get the position |
exact |
logical vector which defines how the Accession argument is compared to the string argument. Default is TRUE which means that the Accession needs to be exactly the same letters/numbers. If FALSE then a 'grepl' statement is used with the Accession as a pattern |
whichModification |
character vector that specifies for which modification the protein position is calculated (if present, name needs to be exact) |
positionColumn |
character vector which specifies which column contains the peptide position information. Proteome Discoverer uses either 'PositionsinProteins' or 'PositionsinMasterProteins' (depending on the settings used in the consensus method) |
clearPosition |
vector (default is 1L) that is used for the clear column when the position of the modification in the peptide is unambiguous (certain) |
unclearPosition |
vector (default is 2L) that is used when a modification position is ambiguous (uncertain) |
sumPositions |
logical vector which (if TRUE) causes the position information to be somewhat simplified: the clear-fields of all identical positions are summed. As an example, this causes a position which is present twice (clear and unclear) to become a single row with the clear field (clearPosition + unclearPosition) |
a named list of data.frame's that contain the position information
getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Carbamidomethyl","Phospho"), positionColumn = "PositionsinProteins") getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Carbamidomethyl","Phospho"), positionColumn = "PositionsinProteins", sumPositions = TRUE) getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Oxidation"), positionColumn = "PositionsinProteins") getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Oxidation"), positionColumn = "PositionsinProteins", sumPositions = TRUE) getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Something"), positionColumn = "PositionsinProteins")
getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Carbamidomethyl","Phospho"), positionColumn = "PositionsinProteins") getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Carbamidomethyl","Phospho"), positionColumn = "PositionsinProteins", sumPositions = TRUE) getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Oxidation"), positionColumn = "PositionsinProteins") getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Oxidation"), positionColumn = "PositionsinProteins", sumPositions = TRUE) getAllModPositionInProtein(peptideTable = OVATable("peptide"), Accession = "P01012", whichModification = c("Something"), positionColumn = "PositionsinProteins")
takes a row of a data.frame with peptide information (like a peptide table from Proteome Discoverer) with added modification info
(like coming from modPositionsToDF
) and gives the position of a specified modification (if present)
getModPositionInProtein( peptideTableRow, Accession, exact = TRUE, whichModification, positionColumn, clearPosition = 1L, unclearPosition = 2L )
getModPositionInProtein( peptideTableRow, Accession, exact = TRUE, whichModification, positionColumn, clearPosition = 1L, unclearPosition = 2L )
peptideTableRow |
single row of a data.frame with a 'position column' and 'translated' modification (see examples) |
Accession |
character vector that specifies from which protein Accession to get the position |
exact |
logical vector which defines how the Accession argument is compared to the string argument. Default is TRUE which means that the Accession needs to be exactly the same letters/numbers. If FALSE then a 'grepl' statement is used with the Accession as a pattern |
whichModification |
character vector that specifies for which modification the protein position is calculated (if present, name needs to be exact) |
positionColumn |
character vector which specifies which column contains the peptide position information. Proteome Discoverer uses either 'PositionsinProteins' or 'PositionsinMasterProteins' (depending on the settings used in the consensus method) |
clearPosition |
vector (default is 1L) that is used for the clear column when the position of the modification in the peptide is unambiguous (certain) |
unclearPosition |
vector (default is 2L) that is used when a modification position is ambiguous (uncertain) |
data.frame with two columns: 'position' (position of modification in protein) and 'clear' which details if a modification position is ambiguous
OVATable("peptide")$Modifications # generate data.frame of all these modifications theModifications <- purrr::map_df(OVATable("peptide")$Modifications, ~modPositions(.x, returnEmptyRow = FALSE) |> modPositionsToDF(returnEmptyRow = FALSE)) theModifications |> head() # bind together the modifications woth the original table newTable <- dplyr::bind_cols(OVATable("peptide"), theModifications) newTable |> head() newTable[15,] getModPositionInProtein(peptideTableRow = newTable[15,], Accession = "P01012", whichModification = "Oxidation", positionColumn = "PositionsinProteins") newTable[32,] getModPositionInProtein(peptideTableRow = newTable[32,], Accession = "P01012", whichModification = "Phospho", positionColumn = "PositionsinProteins") getModPositionInProtein(peptideTableRow = newTable[32,], Accession = "P01012", whichModification = "Carbamidomethyl", positionColumn = "PositionsinProteins") getModPositionInProtein(peptideTableRow = newTable[32,], Accession = "P01012", whichModification = "Carbamido", positionColumn = "PositionsinProteins") newTable[1,] getModPositionInProtein(peptideTableRow = newTable[1,], Accession = "P01012", whichModification = "Carbamidomethyl", positionColumn = "PositionsinProteins") newTable[26,] # # of possible positions for the phospho modification stringr::str_count(newTable$Sequence[26], pattern = "S|T") getModPositionInProtein(peptideTableRow = newTable[26,], Accession = "P01012", whichModification = "Phospho", positionColumn = "PositionsinProteins") getModPositionInProtein(peptideTableRow = newTable[26,], Accession = "P01012", whichModification = "Carbamidomethyl", positionColumn = "PositionsinProteins") # to get all Phospho positions in the protein purrr::map_df(1:nrow(newTable), ~getModPositionInProtein(newTable[.x,], Accession = "P01012", whichModification = "Phospho", positionColumn = "PositionsinProteins")) |> dplyr::distinct(position, clear) |> dplyr::arrange(position)
OVATable("peptide")$Modifications # generate data.frame of all these modifications theModifications <- purrr::map_df(OVATable("peptide")$Modifications, ~modPositions(.x, returnEmptyRow = FALSE) |> modPositionsToDF(returnEmptyRow = FALSE)) theModifications |> head() # bind together the modifications woth the original table newTable <- dplyr::bind_cols(OVATable("peptide"), theModifications) newTable |> head() newTable[15,] getModPositionInProtein(peptideTableRow = newTable[15,], Accession = "P01012", whichModification = "Oxidation", positionColumn = "PositionsinProteins") newTable[32,] getModPositionInProtein(peptideTableRow = newTable[32,], Accession = "P01012", whichModification = "Phospho", positionColumn = "PositionsinProteins") getModPositionInProtein(peptideTableRow = newTable[32,], Accession = "P01012", whichModification = "Carbamidomethyl", positionColumn = "PositionsinProteins") getModPositionInProtein(peptideTableRow = newTable[32,], Accession = "P01012", whichModification = "Carbamido", positionColumn = "PositionsinProteins") newTable[1,] getModPositionInProtein(peptideTableRow = newTable[1,], Accession = "P01012", whichModification = "Carbamidomethyl", positionColumn = "PositionsinProteins") newTable[26,] # # of possible positions for the phospho modification stringr::str_count(newTable$Sequence[26], pattern = "S|T") getModPositionInProtein(peptideTableRow = newTable[26,], Accession = "P01012", whichModification = "Phospho", positionColumn = "PositionsinProteins") getModPositionInProtein(peptideTableRow = newTable[26,], Accession = "P01012", whichModification = "Carbamidomethyl", positionColumn = "PositionsinProteins") # to get all Phospho positions in the protein purrr::map_df(1:nrow(newTable), ~getModPositionInProtein(newTable[.x,], Accession = "P01012", whichModification = "Phospho", positionColumn = "PositionsinProteins")) |> dplyr::distinct(position, clear) |> dplyr::arrange(position)
Helper function to get the start positions of amino acid sequences. The function
works with either a protein sequence and the peptide sequence to be searched
(regular expressions can be used) or a peptide table (data.frame) containing
position information in the format used by Proteome Discoverer. See also
getPositions
getPeptideStart( peptideTable = NA, Accession = "", positionColumn, exact = TRUE, proteinSequence = NA, peptideSequence = NA, noWarnings = TRUE )
getPeptideStart( peptideTable = NA, Accession = "", positionColumn, exact = TRUE, proteinSequence = NA, peptideSequence = NA, noWarnings = TRUE )
peptideTable |
data.frame having a.position column in the Proteome
Discoverer format (see |
Accession |
character vector that specifies from which protein Accession to get the position |
positionColumn |
character vector which specifies which column contains the peptide position information. Proteome Discoverer uses either 'PositionsinProteins' or 'PositionsinMasterProteins' (depending on the settings used in the consensus method) |
exact |
logical vector which defines how the Accession argument is compared to the string argument. Default is TRUE which means that the Accession needs to be exactly the same letters/numbers. If FALSE then a 'grepl' statement is used with the Accession as a pattern |
proteinSequence |
character vector representing the protein sequence to be search. Ignored if peptideTable is not NA |
peptideSequence |
character vector representing the peptide sequence to search for. Regular Expressions can be used (see examples). Ignored if peptideTable is not NA |
noWarnings |
logical vector, default is TRUE: no warnings when a peptide sequence is present more than once in a protein sequence. If FALSE then a warning will be generated. Ignored if peptideTable is not NA |
numeric vector
# find all start positions for all peptides in the peptide table getPeptideStart(peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins") # find all locations of a peptide sequence getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "DILNQITK") # find all locations of a single amino acid getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "P", noWarnings = FALSE) # find all tryptic sites, not taking into account Proline (P) getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "K|R") getPeptideStart(proteinSequence = standardProtein("BSA"), peptideSequence = "K|R") # find all tryptic sites w/o a preceding Proline (P) getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "(?<!P)R|(?<!P)K") getPeptideStart(proteinSequence = standardProtein("BSA"), peptideSequence = "(?<!P)R|(?<!P)K")
# find all start positions for all peptides in the peptide table getPeptideStart(peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins") # find all locations of a peptide sequence getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "DILNQITK") # find all locations of a single amino acid getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "P", noWarnings = FALSE) # find all tryptic sites, not taking into account Proline (P) getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "K|R") getPeptideStart(proteinSequence = standardProtein("BSA"), peptideSequence = "K|R") # find all tryptic sites w/o a preceding Proline (P) getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "(?<!P)R|(?<!P)K") getPeptideStart(proteinSequence = standardProtein("BSA"), peptideSequence = "(?<!P)R|(?<!P)K")
helper function which extracts the letter codes (for amino acids) from the position info part of a modification
getPositionLetters(position)
getPositionLetters(position)
position |
character vector with format |
character vector with format
internal function
helper function which extracts the positions from the position info part of a modification
getPositionNumbers(position)
getPositionNumbers(position)
position |
character vector with format |
character vector with format
internal function
position info which does not have a letter will return a zero as position
Gets the part of the string containing position information. Works with data organized in the 'PositionsinProteins' and/or 'PositionsinMasterProteins' column of peptide tables as defined in Proteome Discoverer
getPositions(string, Accession, exact = TRUE, splitCharacter = ";")
getPositions(string, Accession, exact = TRUE, splitCharacter = ";")
string |
character vector in the format: 'Accession' 'position'; 'Accession' 'position'; etc |
Accession |
character vector that specifies from which protein Accession to get the position |
exact |
logical vector which defines how the Accession argument is compared to the string argument. Default is TRUE which means that the Accession needs to be exactly the same letters/numbers. If FALSE then a 'grepl' statement is used with the Accession as a pattern |
splitCharacter |
character vector which specifies the beginning/end of 'Accession' 'position' pair. Default is ";" which is used by Proteome Discoverer |
a character vector containing the position of the specified Accession
this function expects Accessions to be unique. If this is not true or the parameter exact is set to FALSE it may return character vectors with length > 1
getPositions("P00000 [352-360]; P01012 [351-359]", Accession = "P01012") getPositions("P00000 [352-360]; P01012 [351-359]", Accession = "P00000") getPositions("P00000 [352-360]; P01012 [351-359]", Accession = "P0", exact = FALSE)
getPositions("P00000 [352-360]; P01012 [351-359]", Accession = "P01012") getPositions("P00000 [352-360]; P01012 [351-359]", Accession = "P00000") getPositions("P00000 [352-360]; P01012 [351-359]", Accession = "P0", exact = FALSE)
Make adjustments to a graph eg zoom, titles etc etc
graphsAdjust( graphs, vertical = FALSE, xDiscrete = FALSE, yDiscrete = FALSE, xReverse = FALSE, yReverse = FALSE, xDefault = FALSE, yDefault = FALSE, xLimits = c(0, NA), yLimits = c(0, NA), xExpand = ggplot2::expansion(mult = 0, add = 0), yExpand = ggplot2::expansion(mult = 0, add = 0), xLabelFormat = ggplot2::waiver(), yLabelFormat = ggplot2::waiver(), xOob = scales::oob_squish_infinite, yOob = scales::oob_squish_infinite, xLog = FALSE, yLog = FALSE, xIsDate = FALSE, yIsDate = FALSE, titles = NA, xLabel = NA, yLabel = NA, setTheme = theme_minimal_adapted(), plot.margins.default = TRUE, plot.margins = c(5, 5, 5, 5), plot.margins.units = "points" )
graphsAdjust( graphs, vertical = FALSE, xDiscrete = FALSE, yDiscrete = FALSE, xReverse = FALSE, yReverse = FALSE, xDefault = FALSE, yDefault = FALSE, xLimits = c(0, NA), yLimits = c(0, NA), xExpand = ggplot2::expansion(mult = 0, add = 0), yExpand = ggplot2::expansion(mult = 0, add = 0), xLabelFormat = ggplot2::waiver(), yLabelFormat = ggplot2::waiver(), xOob = scales::oob_squish_infinite, yOob = scales::oob_squish_infinite, xLog = FALSE, yLog = FALSE, xIsDate = FALSE, yIsDate = FALSE, titles = NA, xLabel = NA, yLabel = NA, setTheme = theme_minimal_adapted(), plot.margins.default = TRUE, plot.margins = c(5, 5, 5, 5), plot.margins.units = "points" )
graphs |
list of ggplot-objects to which the adjustments have to be made Note: MUST be a list |
vertical |
if TRUE, flips x- and y-axis |
xDiscrete |
specifies whether the x-axis should be discrete |
yDiscrete |
specifies whether the y-axis should be discrete |
xReverse |
specifies whether to reverse the x-axis |
yReverse |
specifies whether to reverse the y-axis |
xDefault |
if TRUE, then xExpand, xLimits and xOob are ignored (essentially autoscaling the x-axis) |
yDefault |
same as xDefault, but for y-axis |
xLimits |
range of the x-axis, normally xLimits = c(minimum, maximum) if minimum and/or maximum is NA, then they are autoscaled, if xLimits = NA then the range is 0, putting all datapoints in one line (x-axis-wise) |
yLimits |
range of the y-axis (see xLimits) |
xExpand |
allows for padding around data (x-axis), see ?ggplot2::expansion for proper explanation |
yExpand |
allows for padding around data (y-axis), see ?ggplot2::expansion for proper explanation |
xLabelFormat |
defines the numeric format to be used for the x-axis labels (see fromatDigits() & formatScientificDigits() for examples) |
yLabelFormat |
defines the numeric format to be used for the y-axis labels (see fromatDigits() & formatScientificDigits() for examples) |
xOob |
defines what to do with data that's out of range of the data, see ?scales::oob for proper explanation. Note: only deals with x-axis |
yOob |
defines what to do with data that's out of range of the data, see ?scales::oob for proper explanation. Note: only deals with y-axis |
xLog |
if TRUE then automatic transformation of the x-axis to logarihmic scale |
yLog |
if TRUE then automatic transformation of the y-axis to logarihmic scale |
xIsDate |
if TRUE then all settings regarding the x-axis are ignored, except xLimits which should be dates (defaults are not dates). Alternatively xDefault can be set to TRUE for autoscaling |
yIsDate |
if TRUE then all settings regarding the y-axis are ignored except yLimits which should be dates (defaults are not dates). Alternatively xDefault can be set to TRUE for autoscaling |
titles |
sets title of graph |
xLabel |
sets x-axis title |
yLabel |
set y-axos title |
setTheme |
if NA, then no theme is applied, otherwise uses the defined theme (can also be ggplot2 included themes, such as theme_bw()) |
plot.margins.default |
if TRUE, ignore plot.margins and other parameters |
plot.margins |
defines margins (from the border) of the plot c(top, right, bottom, left) |
plot.margins.units |
default = "points", other possibilities: ?grid::unit , examples "cm", "points", "inches", "npc" (viewport) etc etc |
a list of ggplot objects
also part of the personal package 'BBPersonalR'
ifelse replacement for properly returning all datatypes.
ifelseProper(logicValue = NULL, ifTrue = NULL, ifFalse = NULL)
ifelseProper(logicValue = NULL, ifTrue = NULL, ifFalse = NULL)
logicValue |
variable or expression resulting in TRUE or FALSE, if missing or not logical then the function will return NULL. |
ifTrue |
variable or expression to be returned when logicValue == TRUE |
ifFalse |
variable or expression to be returned when logicValue == FALSE |
depending on logicValue, ifTrue ir ifFalse.
not vectorized
internal function
helper function to determine if object == whichClass or a descendant of whichClass.
is.Class(object, whichClass)
is.Class(object, whichClass)
object |
a data object of some class |
whichClass |
character string: class name to be tested |
TRUE or FALSE
internal function
function factoru that generates a function that takes a numeric value, adds a
specified value and then gives the logarithmic value with base 10. This is similair tp
log1p
. It prevents problems with zero. It does add a small value to
the original value, but as this is here meant for log10 transformation of values over a
large range (like Abundances of peptides in a digest), it should not be a problem.
log10add(valueToAdd)
log10add(valueToAdd)
valueToAdd |
numeric value to add to each value before taking the log10 in the resulting function |
function
log10(0:10) log10add(1)(0:10) log10add(0)(0:10) log10(c(0,1E5, 1E7, 1E8)) log10add(1)(c(0,1E5, 1E7, 1E8)) log10add(1E3)(c(0,1E5, 1E7, 1E8))
log10(0:10) log10add(1)(0:10) log10add(0)(0:10) log10(c(0,1E5, 1E7, 1E8)) log10add(1)(c(0,1E5, 1E7, 1E8)) log10add(1E3)(c(0,1E5, 1E7, 1E8))
takes a numeric value, adds 1 and then gives the logarithmic value with base 10.
This is essentially the log10 version of log1p
. This prevents problems with
zero. It does add a small value to the original value, but as this is here meant for log10
transformation of values over a large range (like Abundances of peptides in a digest),
so it should not be a problem.
log10p(value)
log10p(value)
value |
numeric value |
numeric value
log10(0:10) log10p(0:10) log10(c(0,1E5, 1E7, 1E8)) log10p(c(0,1E5, 1E7, 1E8))
log10(0:10) log10p(0:10) log10(c(0,1E5, 1E7, 1E8)) log10p(c(0,1E5, 1E7, 1E8))
takes a data.frame with peptide information (like a peptide table from Proteome Discoverer) and adds data from it to the proteinData
mapPeptidesToProtein( proteinDF, peptideTable, Accession, positionColumn, shiftPosition = 0, variable, dataName = variable, NAValue = 0L, combineFunction = sum, na.rm = TRUE )
mapPeptidesToProtein( proteinDF, peptideTable, Accession, positionColumn, shiftPosition = 0, variable, dataName = variable, NAValue = 0L, combineFunction = sum, na.rm = TRUE )
proteinDF |
the protein data.frame to add the info to. Main requirements are that there is a column named position, that there is no column named 'newData____' (internal use) and no column with the same name as defined by the 'dataName' argument |
peptideTable |
a data.frame with at least a position column (see 'positionColumn' argument) and a column with the same name as the 'variable' argument. This is what Proteome Discoverer produces |
Accession |
character vector that specifies from which protein Accession to get the position |
positionColumn |
character vector which specifies which column contains the peptide position information. Proteome Discoverer uses either 'PositionsinProteins' or 'PositionsinMasterProteins' (depending on the settings used in the consensus method) |
shiftPosition |
integer value, if the data in the proteinDF and the data in the peptideTable are shifted, this parameter can be used to align them. Default is 0 |
variable |
character vector: name of the column in the 'peptideTable' that needs to be mapped onto the protein data |
dataName |
character vector: name of the new column in the protein data data.frame |
NAValue |
vector to be used for positions where no peptide data maps to |
combineFunction |
function (default is |
na.rm |
default is TRUE. If TRUE then 'na.rm=TRUE' is added to the 'combineFunction', If FALSE, then it is ignored |
a data.frame
newTable <- createProteinData(sequence = standardProtein("OVA"), start = 2, nterm = 1, ) newTable |> head() newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_1", dataName = "SummedAbundances") newTable |> head() newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_1", dataName = "Abundances", combineFunction = NA) newTable[35:45,] newTable[100:125,]
newTable <- createProteinData(sequence = standardProtein("OVA"), start = 2, nterm = 1, ) newTable |> head() newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_1", dataName = "SummedAbundances") newTable |> head() newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_1", dataName = "Abundances", combineFunction = NA) newTable[35:45,] newTable[100:125,]
helper function which 'translates' a character vector representing a (peptide) modification to a data.frame
modPositions(modifications, collapseChar = ";", returnEmptyRow = TRUE)
modPositions(modifications, collapseChar = ";", returnEmptyRow = TRUE)
modifications |
character vector representing one or more modifications. The format is 'number × modification name [position<letter(+number)>]'. If more than one modification is present, the character vector is 'combined' with separation character ';' (collapseChar) |
collapseChar |
character vector separating the modifications in the argument 'modifications' |
returnEmptyRow |
logical vector, default is TRUE. TRUE: if no modification is present in the 'modifications' argument, a data.frame will be returned without rows. FALSE: in that case a data.frame with a single row, with in every column an empty character vector, will be returned. |
a data.frame with columns: modifications, positions, letters and numbers
if there are two or more modification present, then number of rows in the returned data.frame will also be two or more
the modification format is what Proteome Discoverer/Sequest works with in its peptide tables
OVATable("peptide")$Modifications[26] modPositions(OVATable("peptide")$Modifications[26]) OVATable("peptide")$Modifications[2] modPositions(OVATable("peptide")$Modifications[2]) OVATable("peptide")$Modifications[1] modPositions(OVATable("peptide")$Modifications[1]) modPositions(OVATable("peptide")$Modifications[1], returnEmptyRow = FALSE) purrr::map_df(OVATable("peptide")$Modifications, ~modPositions(.x))
OVATable("peptide")$Modifications[26] modPositions(OVATable("peptide")$Modifications[26]) OVATable("peptide")$Modifications[2] modPositions(OVATable("peptide")$Modifications[2]) OVATable("peptide")$Modifications[1] modPositions(OVATable("peptide")$Modifications[1]) modPositions(OVATable("peptide")$Modifications[1], returnEmptyRow = FALSE) purrr::map_df(OVATable("peptide")$Modifications, ~modPositions(.x))
helper function that 'collapses' two or more rows in a modification data.frane (coming from modPositions
)
into a single data.frame row
modPositionsToDF(mods, collapseChar = ",", returnEmptyRow = TRUE)
modPositionsToDF(mods, collapseChar = ",", returnEmptyRow = TRUE)
mods |
data.frame coming from |
collapseChar |
character vector (default ',') which is used to collapse the columns of the rows (needed to undo
the 'collapse' in |
returnEmptyRow |
logical vector, default is TRUE. TRUE: if no modification is present in the 'mods' argument, a data.frame will be returned without rows. FALSE: in that case a data.frame with a single row, with in every column an empty character vector, will be returned. |
a data.frame with columns: modifications, positions, letters and numbers
OVATable("peptide")$Modifications[32] modPositions(OVATable("peptide")$Modifications[32]) modPositions(OVATable("peptide")$Modifications[32]) |> modPositionsToDF() OVATable("peptide")$Modifications[2] modPositions(OVATable("peptide")$Modifications[2]) modPositions(OVATable("peptide")$Modifications[2]) |> modPositionsToDF()
OVATable("peptide")$Modifications[32] modPositions(OVATable("peptide")$Modifications[32]) modPositions(OVATable("peptide")$Modifications[32]) |> modPositionsToDF() OVATable("peptide")$Modifications[2] modPositions(OVATable("peptide")$Modifications[2]) modPositions(OVATable("peptide")$Modifications[2]) |> modPositionsToDF()
Helper function for examples & testing. Generates a data.frame, of protein or peptide information (both Ovalbumin), which is similar to what comes out of Proteome Discoverer
OVATable(whichone = "protein")
OVATable(whichone = "protein")
whichone |
character vector which specifies what to get: 'protein' for a protein table, 'peptide' for a peptide table |
data.frame or NA
OVATable("protein") OVATable("Peptide") OVATable("Info")
OVATable("protein") OVATable("Peptide") OVATable("Info")
creates a line plot of protein data with the amino acid position on the x-axis
plotProteinVariable( proteinDF, column, positionColumn = "position", addSequence = FALSE, sequenceColumn = "sequence", sequenceLevel = NA, textColor = "black", textSize = 2, textAlpha = 1, textNudgeX = 0, textNudgeY = 0, textAngle = 0, textFontFace = "plain", lineColor = "red", lineWidth = 0.25, lineType = "solid", lineAlpha = 1, drawYzero = FALSE, zeroColor = "blue", zeroWidth = 0.5, zeroType = "solid", zeroAlpha = 0.5, yLimits = "auto", yLog = FALSE, ylogSetValue = 1, incrY = ifelse(yLog, 0.5, 0.025), decrY = 0, xLimits = "auto", xLabel = positionColumn, yLabel = column, xLabelFormat = ggplot2::waiver(), yLabelFormat = ggplot2::waiver(), title = ggplot2::waiver(), subtitle = ggplot2::waiver(), caption = ggplot2::waiver(), setTheme = theme_minimal_adapted() )
plotProteinVariable( proteinDF, column, positionColumn = "position", addSequence = FALSE, sequenceColumn = "sequence", sequenceLevel = NA, textColor = "black", textSize = 2, textAlpha = 1, textNudgeX = 0, textNudgeY = 0, textAngle = 0, textFontFace = "plain", lineColor = "red", lineWidth = 0.25, lineType = "solid", lineAlpha = 1, drawYzero = FALSE, zeroColor = "blue", zeroWidth = 0.5, zeroType = "solid", zeroAlpha = 0.5, yLimits = "auto", yLog = FALSE, ylogSetValue = 1, incrY = ifelse(yLog, 0.5, 0.025), decrY = 0, xLimits = "auto", xLabel = positionColumn, yLabel = column, xLabelFormat = ggplot2::waiver(), yLabelFormat = ggplot2::waiver(), title = ggplot2::waiver(), subtitle = ggplot2::waiver(), caption = ggplot2::waiver(), setTheme = theme_minimal_adapted() )
proteinDF |
the protein data data.frame. Obligatory columns are 'position' and 'sequence' (in case this needs to be displayed) |
column |
character vector: name(s) of the data column(s) that need to be displayed |
positionColumn |
name of the position column in the protein data which has the position info (default is 'position') |
addSequence |
logical vector (default FALSE). If TRUE, then the info in the argument 'sequenceColumn' is displayed in the plot |
sequenceColumn |
name of the column containing the sequence info, usually the amino acid sequence, tough it can be something else |
sequenceLevel |
defines where to display the 'sequenceColumn' info. Default is NA (ignored): sequence is displayed at the height (y-axis) of the line plot itself. Possible other values: 'min' (display at minimum y-value), 'max' (display at maximum y-value) or a numeric value (display at exactly this y-value) |
textColor |
color of the sequence information displayed |
textSize |
size of the sequence information displayed |
textAlpha |
alpha of the sequence information displayed |
textNudgeX |
offset (x-axis direction) of the sequence information displayed |
textNudgeY |
offset (y-axis direction) of the sequence information displayed |
textAngle |
number of degrees to rotate the sequence information displayed, default is 0 |
textFontFace |
allows manipulation of the text font used. Default is 'plain'. Alternatives are "bold", "italic", & "bold.italic" |
lineColor |
specifies color of the line |
lineWidth |
specifies width of the line |
lineType |
specifies linetype of the line |
lineAlpha |
specifies alpha of the line |
drawYzero |
logical vector. If TRUE then a horizontal line a y = 0 is drawn/ (default is FALSE) |
zeroColor |
specifies color of the y=0 line |
zeroWidth |
specifies width of the y=0 line |
zeroType |
specifies linetype of the y=0 line |
zeroAlpha |
specifies alpha of the y=0 line |
yLimits |
range of the y-axis, normally yLimits = c(minimum, maximum) or 'auto' for automatic (default) |
yLog |
if TRUE then automatic transformation of the y-axis to logarihmic scale (default is FALSE) |
ylogSetValue |
in case yLog is numeric, then this value allows any y-value equal to zero or below zero to be set to this value. Default is 1. Can be set to FALSE: no change of any value |
incrY |
increases the upper limit of the y-axis by 100*incrY percent. Only works if yLimits is set to 'auto' |
decrY |
decreases the lower limit of the y-axis by 100*decrY percent. Only works if yLimits is set to 'auto' |
xLimits |
range of the x-axis, normally xLimits = c(minimum, maximum) or 'auto' for automatic (default) |
xLabel |
sets x-axis title |
yLabel |
set y-axos title |
xLabelFormat |
defines the numeric format to be used for the x-axis labels (see BBPersonalR::fromatDigits() & BBPersonalR::formatScientificDigits() for examples) |
yLabelFormat |
defines the numeric format to be used for the y-axis labels (see BBPersonalR::fromatDigits() & BBPersonalR::formatScientificDigits() for examples) |
title |
specifies the title |
subtitle |
specifies the subtitle |
caption |
specifies the caption |
setTheme |
specifies the theme see also |
a ggplot object
newTable <- proteinCoverage(sequence = standardProtein("OVA"), start = 2, nterm = 1, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", emptySequence = " ") trypticSites <- getPeptideStart(proteinSequence = standardProtein("OVA"), peptideSequence = "(?<!P)R|(?<!P)K") trypticSites <- data.frame(position = trypticSites, data = 1) trypticSites |> head() newTable <- addDataToProtein(proteinDF = newTable, dataframe = trypticSites, dataColumn = "data", dataName = "trypticSite", NAValue = 0) newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "XCorrbySearchEngine_1", combineFunction = max) newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_1") newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_2") newTable[41:50,] plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score") plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score", addSequence = TRUE) plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score", addSequence = TRUE, textColor = "blue", xLimits = c(51,100), yLimits = c(-0.1, 11.1)) plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score", addSequence = TRUE, textColor = "blue", sequenceLevel = "min", xLimits = c(51,100), yLimits = c(-0.1, 11.1)) plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score", addSequence = TRUE, textColor = "blue", sequenceLevel = 5, xLimits = c(51,100), yLimits = c(-0.1, 11.1)) plotProteinVariable(proteinDF = newTable, column = "Abundances_1", yLabel = "Abundance") plotProteinVariable(proteinDF = newTable, column = "Abundances_1", yLabel = "Abundance", yLog = TRUE, ylogSetValue = FALSE, yLimits = c(1E3, 2E10), drawYzero = TRUE, zeroType = "dotted", zeroAlpha = 0.5) plotProteinVariable(proteinDF = newTable, column = "Abundances_1", yLabel = "Abundance", yLog = TRUE, ylogSetValue = 1E4, yLimits = c(1E3, 2E10), drawYzero = TRUE, zeroType = "dotted", zeroAlpha = 0.5) plotProteinVariable(proteinDF = newTable, column = c("Abundances_1", "Abundances_2"), lineColor = c("blue", "red"), yLabel = "Abundance", yLog = TRUE, ylogSetValue = 1E4, yLimits = c(1E3, 2E10), drawYzero = TRUE, zeroType = "dotted", zeroAlpha = 0.5)
newTable <- proteinCoverage(sequence = standardProtein("OVA"), start = 2, nterm = 1, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", emptySequence = " ") trypticSites <- getPeptideStart(proteinSequence = standardProtein("OVA"), peptideSequence = "(?<!P)R|(?<!P)K") trypticSites <- data.frame(position = trypticSites, data = 1) trypticSites |> head() newTable <- addDataToProtein(proteinDF = newTable, dataframe = trypticSites, dataColumn = "data", dataName = "trypticSite", NAValue = 0) newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "XCorrbySearchEngine_1", combineFunction = max) newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_1") newTable <- mapPeptidesToProtein(proteinDF = newTable, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", variable = "Abundances_2") newTable[41:50,] plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score") plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score", addSequence = TRUE) plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score", addSequence = TRUE, textColor = "blue", xLimits = c(51,100), yLimits = c(-0.1, 11.1)) plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score", addSequence = TRUE, textColor = "blue", sequenceLevel = "min", xLimits = c(51,100), yLimits = c(-0.1, 11.1)) plotProteinVariable(proteinDF = newTable, column = "XCorrbySearchEngine_1", yLabel = "Score", addSequence = TRUE, textColor = "blue", sequenceLevel = 5, xLimits = c(51,100), yLimits = c(-0.1, 11.1)) plotProteinVariable(proteinDF = newTable, column = "Abundances_1", yLabel = "Abundance") plotProteinVariable(proteinDF = newTable, column = "Abundances_1", yLabel = "Abundance", yLog = TRUE, ylogSetValue = FALSE, yLimits = c(1E3, 2E10), drawYzero = TRUE, zeroType = "dotted", zeroAlpha = 0.5) plotProteinVariable(proteinDF = newTable, column = "Abundances_1", yLabel = "Abundance", yLog = TRUE, ylogSetValue = 1E4, yLimits = c(1E3, 2E10), drawYzero = TRUE, zeroType = "dotted", zeroAlpha = 0.5) plotProteinVariable(proteinDF = newTable, column = c("Abundances_1", "Abundances_2"), lineColor = c("blue", "red"), yLabel = "Abundance", yLog = TRUE, ylogSetValue = 1E4, yLimits = c(1E3, 2E10), drawYzero = TRUE, zeroType = "dotted", zeroAlpha = 0.5)
Helper function to add (empty) rows to proteinData,
prepare( data, start = min(data$position), end = max(data$position), emptySequence = " " )
prepare( data, start = min(data$position), end = max(data$position), emptySequence = " " )
data |
data.frame with two columns: position (integer vector) and sequence (amino acid letters: character vector) |
start |
integer, should be less than the start position (in first row of data): adds positions with 'emptySequence' to the head of data |
end |
integer, should be maximum positione: adds positions with 'emptySequence' to the tail of data |
emptySequence |
default " ", character vector for the sequence column to use when adding rows. Note: other columns will also be extended and usually add NA's as a value |
data.frame with two columns: position (integer vector) and sequence (amino acid letters: character vector)
createProteinData(sequence = standardProtein("bsa"), start = 25) |> prepare(start = 1) |> head() createProteinData(sequence = standardProtein("bsa"), start = 25) |> prepare(end = 610) |> tail()
createProteinData(sequence = standardProtein("bsa"), start = 25) |> prepare(start = 1) |> head() createProteinData(sequence = standardProtein("bsa"), start = 25) |> prepare(end = 610) |> tail()
Create a protein data.frame with basic information: position, sequence & coverage. In the resulting data.frame, every row has an amino acid (letter, sequence column), the position of that amino acid in the protein (position column) and a coverage column where 0 is no coverage and 1 is coverage.
The function can work with a protein table row (as in Proteome Discoverer results), which should contain the Sequence and the SequenceCoverage columns. An alternative is using a sequence character vector containing a protein amino acid sequence and a peptide table (as in Proteome Discoverer results) of that same sequence (protein).
proteinCoverage( proteinTableRow = NA, checkSequenceLength = TRUE, noWarnings = FALSE, prepareColumns = NA, prepareRows = NA, emptySequence = "", noCoverage = 0, coverage = 1, sequence = NA, start = 1, end = ifelse(is.na(sequence), 1, nchar(sequence)), nterm = NA, cterm = NA, peptideTable, Accession, positionColumn, shiftPosition = 0 )
proteinCoverage( proteinTableRow = NA, checkSequenceLength = TRUE, noWarnings = FALSE, prepareColumns = NA, prepareRows = NA, emptySequence = "", noCoverage = 0, coverage = 1, sequence = NA, start = 1, end = ifelse(is.na(sequence), 1, nchar(sequence)), nterm = NA, cterm = NA, peptideTable, Accession, positionColumn, shiftPosition = 0 )
proteinTableRow |
data.frame row that has at least a Sequence column with the whole protein amino acid sequence and a SequenceCoverage column specifying the coverage. The SequenceCoverage needs to be in format: 'full sequence length';'start-end 1st part';'start-end 2nd part'; etc etc The start-end parts together specify which parts of the protein have been sequenced (coverage). Note: the positions SequenceCoverage field in the proteinTableow are zero based: the first amino acid position is zero, not one! This behavior is consistent with protein tables as used by Proteome Discoverer and can be changed by the proteinTableRowZero argument. |
checkSequenceLength |
logical vector: default is TRUE, defined for odd situations where there may be malformed SequenceCoverage fields. Might be removed in future versions |
noWarnings |
logical vector: default is FALSE. If the check for the sequence length fails, it will generate a warning. If TRUE then no warning is generated. Might be removed in future versions |
prepareColumns |
defines the number of columns for a 2D matrix (puts the amino acid letters in a rows*columns matrix) |
prepareRows |
defines the number of rows for a 2D matrix (puts the amino acid letters in a rows*columns matrix) |
emptySequence |
default "-", character vector for the sequence column to use when adding rows. Note: other columns will also be extended and usually add NA's as a value. Ignored when proteinTableRow is not NA. |
noCoverage |
defines what to use when a position is not 'covered'. Can be a number, logical or character. It will be changed into a character vector at the end of the function. Default is 0 (integer) |
coverage |
defines what to use when a position is 'covered'. Can be a number, logical or character. It will be changed into a character vector at the end of the function. Default is 1 (integer) |
sequence |
character vector, protein sequence in the form of the usual letter codes. Ignored when proteinTableRow is not NA. |
start |
first position in the sequence to be used. Ignored when proteinTableRow is not NA. |
end |
last position in the sequence to be used. Ignored when proteinTableRow is not NA. |
nterm |
start position 'adjustment'. Adds a number of 'empty' rows to the head of the resulting data.frame to have it start at position nterm. Ignored when proteinTableRow is not NA. |
cterm |
end position 'adjustment'. Adds a number of 'empty' rows to the tail of the resulting data.frame to have it ends at position cterm |
peptideTable |
data.frame containing the peptide information to be mapped to the sequence. Sholud contain either a column named 'PositionsinProteins'
or 'PositionsinMasterProteins'. This column should be character vectors of one or more of following element: |
Accession |
character vector that specifies from which protein Accession to get the position. This is needed for when using the peptideTable as a peptide may be present in more than one protein. Ignored when proteinTableRow is not NA. |
positionColumn |
character vector which specifies which column contains the peptide position information. Proteome Discoverer uses either 'PositionsinProteins' or 'PositionsinMasterProteins' (depending on the settings used in the consensus method) |
shiftPosition |
integer value, if the data in the protein sequence and the data in the peptideTable are shifted, this parameter can be used to align them. Default is 0 and ignored when proteinTableRow is not NA. |
data.frame with same format as output by createProteinData
or boxData
with the 'coverage' column added. The values
in the coverage column are either 0 (no coverage) or 1 (coverage)
it is possible to use both prepareColumns and prepareRows at the same time, but this may lead to errors and unexpected results. It is therefore not recommended to do so.
proteinCoverage(OVATable()) proteinCoverage(OVATable(), prepareColumns = 50) proteinCoverage(OVATable()) |> boxData(ncols = 50) proteinCoverage(sequence = substr(standardProtein(), 2, 386), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins")
proteinCoverage(OVATable()) proteinCoverage(OVATable(), prepareColumns = 50) proteinCoverage(OVATable()) |> boxData(ncols = 50) proteinCoverage(sequence = substr(standardProtein(), 2, 386), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins")
takes protein data with color information and creates a smaller data.frame contaning three columns: start, end (both positions) and color. This can be used for mapping the colors onto the 3D object. It is used inside the
reduceColors(proteinDF, positionColumn = "position", colorColumn)
reduceColors(proteinDF, positionColumn = "position", colorColumn)
proteinDF |
protein data data.frame |
positionColumn |
name of the column containing the position information |
colorColumn |
name of the column containing the color information |
data.frame with three columns: start, end (both positions) and color
internal function
Calculates number of columns and rows of a matrix from number of cells and the ratio number of columns/ number of rows The number of cells is a defined as a minimum number of cells. Either the number of rows or the number of columns is increased to accommodate this. The column/row ratio and number of cells for the 'solution' might therefore not be exact. The reason for this is that the intended target for this function is to come up with a cell matrix big enough for a data of a certain size (vectorLength)
rowsAndcolumns( vectorLength, ratio, increaseColumns = FALSE, minimumRatio = 0.01, maximumRatio = 100 )
rowsAndcolumns( vectorLength, ratio, increaseColumns = FALSE, minimumRatio = 0.01, maximumRatio = 100 )
vectorLength |
minimum number of cells for the resulting columns*rows matrix |
ratio |
the intended ratio the resulting columns*rows matrix. Increase ratio to get more columns, decrease to get more rows. Note: ratio = number of columns divided by the number of rows. |
increaseColumns |
if TRUE, then the number of columns will be increased to reach the number of matrix cells required. If FALSE then the number of rows will be increased to do this. Default is FALSE |
minimumRatio |
due to the iterative nature of part of the function, this puts a minimum limit on the possible ratios to prevent the function from 'getting stuck'. Default is 0.01 |
maximumRatio |
due to the iterative nature of part of the function, this puts a maximum limit on the possible ratios to prevent the function from 'getting stuck'. Default is 100 |
a named numeric vector (names: columns, rows)
rowsAndcolumns(385, 5) rowsAndcolumns(385, 0.2) rowsAndcolumns(400, 10) rowsAndcolumns(400, 0.1)
rowsAndcolumns(385, 5) rowsAndcolumns(385, 0.2) rowsAndcolumns(400, 10) rowsAndcolumns(400, 0.1)
Helper function for examples & testing. Generate a character vector of the amino acid sequence of one of the (2) standard proteins.
standardProtein(short = "OVA")
standardProtein(short = "OVA")
short |
shortened name of the protein to retrieve. Only 'bsa' and 'ova' (or their upper case equivalents) are valid inputs. Other input will return NA |
an amino acid sequence in the form of a character vector or NA
The data is coming from UniProt
OVA is short for P01012 · OVAL_CHICK Ovalbumin (Gallus gallus)
BSA is short for P02769 · ALBU_BOVIN Bovine Serum Albumin (Bos taurus)
standardProtein() standardProtein("bsa") standardProtein("Insulin")
standardProtein() standardProtein("bsa") standardProtein("Insulin")
Gives the start position of a character vector with format 'xxx-yyy'. Convenience function
startPosition(string, splitCharacter = "-")
startPosition(string, splitCharacter = "-")
string |
character vector with format 'xxx-yyy', where xxx is the start position and yyy is the end position |
splitCharacter |
default is "-", which is used by Proteome Discoverer |
numeric vector
startPosition("901-902") startPosition("1-102")
startPosition("901-902") startPosition("1-102")
helper function that replaces multiple patterns in a character vector, essentially an extension of stringr::str_replace_all
strMultiReplaceAll(strings, patterns, replacements = "")
strMultiReplaceAll(strings, patterns, replacements = "")
strings |
character vector (can be longer than 1) in which all the patterns (arguments) are to be replaced |
patterns |
character vector of patterns to be replaced |
replacements |
character vector of replacements for the patterns in the strings argument (if found). Should be either length = 1 or same length as the replacements. Default is "" (empty character vector), which means that all patterns in strings will be removed |
character vector
internal function
to be able to use the theme theme_minimal with some adjustments
theme_minimal_adapted( base_size = 11, base_family = "", base_line_size = base_size/22, axis_line_size = base_line_size/2, xAxis = TRUE, yAxis = TRUE, showLegend = TRUE, legend.position = "bottom", gridLines = TRUE, gridLinesX = TRUE, gridLinesY = TRUE, titleSize = NA )
theme_minimal_adapted( base_size = 11, base_family = "", base_line_size = base_size/22, axis_line_size = base_line_size/2, xAxis = TRUE, yAxis = TRUE, showLegend = TRUE, legend.position = "bottom", gridLines = TRUE, gridLinesX = TRUE, gridLinesY = TRUE, titleSize = NA )
base_size |
size of the lettering of axis, title etc |
base_family |
letter type of axis, title etc |
base_line_size |
width of gridlines, set to 0 for none |
axis_line_size |
width of axis lines, set to 0 for none |
xAxis |
if TRUE then display xAxis title |
yAxis |
if TRUE then display yAxis title |
showLegend |
if TRUE then show legend |
legend.position |
defines where to place the legend |
gridLines |
if TRUE then display gridlines |
gridLinesX |
if TRUE then display gridlines 'along' the x-axis |
gridLinesY |
if TRUE then display gridlines 'along' the y-axis |
titleSize |
if NA, use default title size, else use titleSize value To be used as ggplot-object + theme_minimal_adapted() |
theme definition
also part of the personal package 'BBPersonalR'
more advanced function that takes a data.frame of information and a protein data data.frame and generates colors matching the instructions in the information data.frame applied to the protein data.frame. This allows for more complicated coloring schemes
translateRangesToColors(ranges, proteinData, outsideColor)
translateRangesToColors(ranges, proteinData, outsideColor)
ranges |
data.frame with 4 columns: 'rangeValues', 'insideColor', 'inbetween'
(similair in use to in |
proteinData |
a proteinData data.frame with at least the columns specified in the ranges data.frame |
outsideColor |
color to apply to any row in the proteinData that falls outside any of the ranges |
a vector of colors with the same length as the number of rows in the protein data
# create protein data ovaProtein <- proteinCoverage(sequence = standardProtein(), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1) # create ranges data.frame dfColors <- data.frame(rangeValues = NA, insideColors = c("#0053D6", "#65CBF3", "#FFDB13", "#FF7D45"), columnName = "position", inbetween = FALSE) dfColors$rangeValues <- list(c(1:10), c(11:20), c(21:30), c(31:40)) dfColors translateRangesToColors(ranges = dfColors, proteinData = ovaProtein, outsideColor = "white")[1:45] # create ranges data.frame dfColors <- data.frame(rangeValues = NA, insideColors = c("#0053D6", "#65CBF3", "#FFDB13", "#FF7D45"), columnName = "sequence", inbetween = FALSE) dfColors$rangeValues <- list(c("A","C","D"), c("E","F"), c("G","H"), c("K","L")) dfColors translateRangesToColors(ranges = dfColors, proteinData = ovaProtein, outsideColor = "white")[1:45]
# create protein data ovaProtein <- proteinCoverage(sequence = standardProtein(), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1) # create ranges data.frame dfColors <- data.frame(rangeValues = NA, insideColors = c("#0053D6", "#65CBF3", "#FFDB13", "#FF7D45"), columnName = "position", inbetween = FALSE) dfColors$rangeValues <- list(c(1:10), c(11:20), c(21:30), c(31:40)) dfColors translateRangesToColors(ranges = dfColors, proteinData = ovaProtein, outsideColor = "white")[1:45] # create ranges data.frame dfColors <- data.frame(rangeValues = NA, insideColors = c("#0053D6", "#65CBF3", "#FFDB13", "#FF7D45"), columnName = "sequence", inbetween = FALSE) dfColors$rangeValues <- list(c("A","C","D"), c("E","F"), c("G","H"), c("K","L")) dfColors translateRangesToColors(ranges = dfColors, proteinData = ovaProtein, outsideColor = "white")[1:45]
function factory that generates a color translation function which assigns a color based on whether the argument value is between two values or if the argument value is part of a range values
translateRangeToColors( rangeValues, insideColor, outsideColor = NA, inbetween = TRUE )
translateRangeToColors( rangeValues, insideColor, outsideColor = NA, inbetween = TRUE )
rangeValues |
numeric vector, range of values: if 'inbetween' is TRUE, then the resulting function will determine if a value falls in between the first two values. If 'inbetween' is FALSE, then the resulting function will check if a value is part of the rangeValues argument. Note that in that case one can work with non-numeric values, see example#' |
insideColor |
color to assign to values that fall within or are part of a range |
outsideColor |
color to assign to values that fall outside or are not part of a range |
inbetween |
logical value that determines the type of range checking |
a function that takes the argument 'value', which it then translates to the correct color (vectorized)
translateRangeToColors(rangeValues = c(1, 10), insideColor = "white")(1:20) translateRangeToColors(rangeValues = c(1, 10), insideColor = "white", inbetween = FALSE)(1:20) translateRangeToColors(rangeValues = 1:10, insideColor = "white", inbetween = FALSE)(1:20) translateRangeToColors(rangeValues = c(1,2,3, 11, 12, 13), insideColor = "white", outsideColor = "red", inbetween = FALSE)(1:20) translateRangeToColors(rangeValues = c("A","B","C"), insideColor = "white", outsideColor = "red", inbetween = FALSE)(LETTERS)
translateRangeToColors(rangeValues = c(1, 10), insideColor = "white")(1:20) translateRangeToColors(rangeValues = c(1, 10), insideColor = "white", inbetween = FALSE)(1:20) translateRangeToColors(rangeValues = 1:10, insideColor = "white", inbetween = FALSE)(1:20) translateRangeToColors(rangeValues = c(1,2,3, 11, 12, 13), insideColor = "white", outsideColor = "red", inbetween = FALSE)(1:20) translateRangeToColors(rangeValues = c("A","B","C"), insideColor = "white", outsideColor = "red", inbetween = FALSE)(LETTERS)
function factory that generates a function that 'tramslates' (maps) protein data
to colors. Meant for non-continous data. If the colors are then used as a columnn for the
protein data data.frame, it can be used modify colors on a 3d protein object. See
addStyleElement
for more info'
translateToFixedColors(translateDF, notPresentColor)
translateToFixedColors(translateDF, notPresentColor)
translateDF |
data.frame with two columns: one has the different data elements that need to be translated to a certain color. The second column should be named 'color' and should contain the colors to which the values in the first column are translated. Examples: data.frame(position = 1:10, color = c("blue","red")) or data.frame(residue = c("C", "A", "P"), color = c("yellow", "red", "blue")) |
notPresentColor |
color(code) for when a value is not in the translation data.frame |
function that takes the argument 'value', which it then translates the argument to its corresponding color(code)s. The function is vectorized so the argument can be multi-element
# create protein data ovaProtein <- proteinCoverage(sequence = standardProtein(), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1) # add tryptic sites ovaProtein <- addDataToProtein(proteinDF = ovaProtein, dataframe = data.frame(position = getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "(?<!P)R|(?<!P)K"), data = "tryptic"), dataColumn = "data", dataName = "realTrypticSite", NAValue = "non tryptic") # add colors for the tryptic sites ovaProtein$trypticColor <- translateToFixedColors(translateDF = data.frame(position = c("tryptic"), color = "red"), notPresentColor = "white")(ovaProtein$realTrypticSite) # add colors for proline positions ovaProtein$proline <- translateToFixedColors(translateDF = data.frame(sequence = "P", color = "red"), notPresentColor = "white")(ovaProtein$sequence) ovaProtein |> dplyr::slice(15:20)
# create protein data ovaProtein <- proteinCoverage(sequence = standardProtein(), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1) # add tryptic sites ovaProtein <- addDataToProtein(proteinDF = ovaProtein, dataframe = data.frame(position = getPeptideStart(proteinSequence = standardProtein(), peptideSequence = "(?<!P)R|(?<!P)K"), data = "tryptic"), dataColumn = "data", dataName = "realTrypticSite", NAValue = "non tryptic") # add colors for the tryptic sites ovaProtein$trypticColor <- translateToFixedColors(translateDF = data.frame(position = c("tryptic"), color = "red"), notPresentColor = "white")(ovaProtein$realTrypticSite) # add colors for proline positions ovaProtein$proline <- translateToFixedColors(translateDF = data.frame(sequence = "P", color = "red"), notPresentColor = "white")(ovaProtein$sequence) ovaProtein |> dplyr::slice(15:20)
function factory that generates a function that 'tramslates' (maps) protein data
to colors. Meant for continous data. If the colors are then used as a columnn for the
protein data data.frame, it can be used modify colors on a 3d protein object. See
addStyleElement
for more info'
translateToGradientColors( minValue, maxValue, colorSteps = 1000, colors, transformFunction = NA )
translateToGradientColors( minValue, maxValue, colorSteps = 1000, colors, transformFunction = NA )
minValue |
minimum value to use when translating to colors. If a value is lower than this, it will be set to this |
maxValue |
maximum value to use when translating to colors. If a value is higher than thism it will be set to this |
colorSteps |
essentially the resolution of the gradient. If set to 1000 (default), then a gradient of 1000 individual colors will be created to translate to. Do not set excessively high or low |
colors |
two element character vector. Specifies the colors for the minimumValue amd the maximumValue. The translation will have 'colorSteps' gradations between these two colors |
transformFunction |
allows for passing a transformation function to the values to be translated. The minValue and maxValue arguments will also be transformed. This can be used where the value-range is (very) large |
function that takes the argument 'value', which is then used to translate the argument to the corresponding color(code)s. The function is vectorized so the argument can be multi-element. Note: the 'transformFunction' specified is applied to the argument!
# create protein data ovaProtein <- proteinCoverage(sequence = standardProtein(), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1) # mapping Abundance information ovaProtein <- mapPeptidesToProtein(proteinDF = ovaProtein, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1L, variable = "Abundances_1", dataName = "Abundance", combineFunction = sum) # create the color translation function linearColors <- translateToGradientColors(minValue = min(ovaProtein$Abundance), maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue")) linearColors(c(0, 1E5, 1E9)) # create a logarithmic translation function logColors <- translateToGradientColors(minValue = 1E3, maxValue = 1E10, colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10p) logColors(c(0, 1E5, 1E9)) # this doesn't work, due to log10 not generating NA's logColors <- translateToGradientColors(minValue = min(ovaProtein$Abundance), maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10) logColors(c(0, 1E5, 1E9)) # this does work logColors <- translateToGradientColors(minValue = min(ovaProtein$Abundance), maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10p) logColors(c(0, 1E5, 1E9)) # limit the value range logColors <- translateToGradientColors(minValue = 1E3, maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10) logColors(c(0, 1E5, 1E9)) # same result in a different way logColors <- translateToGradientColors(minValue = min(ovaProtein$Abundance), maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10add(1E3)) logColors(c(0, 1E5, 1E9)) # add the colors to the protein data ovaProtein$AbundanceColors <- linearColors(ovaProtein$Abundance) ovaProtein$AbundanceLogColors <- logColors(ovaProtein$Abundance) ovaProtein |> dplyr::slice(35:45) ovaProtein |> dplyr::slice(360:375)
# create protein data ovaProtein <- proteinCoverage(sequence = standardProtein(), peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1) # mapping Abundance information ovaProtein <- mapPeptidesToProtein(proteinDF = ovaProtein, peptideTable = OVATable("peptide"), Accession = "P01012", positionColumn = "PositionsinProteins", shiftPosition = 1L, variable = "Abundances_1", dataName = "Abundance", combineFunction = sum) # create the color translation function linearColors <- translateToGradientColors(minValue = min(ovaProtein$Abundance), maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue")) linearColors(c(0, 1E5, 1E9)) # create a logarithmic translation function logColors <- translateToGradientColors(minValue = 1E3, maxValue = 1E10, colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10p) logColors(c(0, 1E5, 1E9)) # this doesn't work, due to log10 not generating NA's logColors <- translateToGradientColors(minValue = min(ovaProtein$Abundance), maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10) logColors(c(0, 1E5, 1E9)) # this does work logColors <- translateToGradientColors(minValue = min(ovaProtein$Abundance), maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10p) logColors(c(0, 1E5, 1E9)) # limit the value range logColors <- translateToGradientColors(minValue = 1E3, maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10) logColors(c(0, 1E5, 1E9)) # same result in a different way logColors <- translateToGradientColors(minValue = min(ovaProtein$Abundance), maxValue = max(ovaProtein$Abundance), colorSteps = 1000, colors = c("white", "blue"), transformFunction = log10add(1E3)) logColors(c(0, 1E5, 1E9)) # add the colors to the protein data ovaProtein$AbundanceColors <- linearColors(ovaProtein$Abundance) ovaProtein$AbundanceLogColors <- logColors(ovaProtein$Abundance) ovaProtein |> dplyr::slice(35:45) ovaProtein |> dplyr::slice(360:375)
Helper function to list modifications in a character vector of format modification,splitCahracter,modification,splitCahracter, etc etc
whichModifications(modification, splitCharacter = ",", trim = TRUE)
whichModifications(modification, splitCharacter = ",", trim = TRUE)
modification |
character vector of format |
splitCharacter |
character vector used to split the modification argument |
trim |
logical vector. Default is TRUE: the result will be trimmed (spaces on both sides removed). When FALSE, this is not done |
character vector
whichModifications('Oxidation,Phospho') whichModifications('Oxidation , Phospho') whichModifications('Oxidation , Phospho', trim = FALSE) whichModifications('Oxidation , Phospho, Carbamidomethyl') whichModifications(c('Oxidation , Phospho','Carbamidomethyl, Phospho'))
whichModifications('Oxidation,Phospho') whichModifications('Oxidation , Phospho') whichModifications('Oxidation , Phospho', trim = FALSE) whichModifications('Oxidation , Phospho, Carbamidomethyl') whichModifications(c('Oxidation , Phospho','Carbamidomethyl, Phospho'))