@article{Etourneau2023,
abstract = {Label-free bottom-up proteomics using mass spectrometry and liquid chromatography has long established as one of the most popular high-throughput analysis workflow for proteome characterization. However, it produces data hindered by complex and heterogeneous missing values, which imputation has long remained problematic. To cope with this, we introduce Pirat, an algorithm that harnesses this challenge following an unprecedented approach. Notably, it models the instrument limit by estimating a global censoring mechanism from the data available. Moreover, it leverages the correlations between enzymatic cleavage products (i.e., peptides or precursor ions), while offering a natural way to integrate complementary transcriptomic information, when available. Our benchmarking on several datasets covering a variety of experimental designs (number of samples, acquisition mode, missingness patterns, etc.) and using a variety of metrics (differential analysis ground truth or imputation errors) shows that Pirat outperforms all pre-existing imputation methods. These results pinpoint the potential of Pirat as an advanced tool for imputation in proteomic data analysis, and more generally underscore the worthiness of improving imputation by explicitly modeling the correlation structures either grounded to the analytical pipeline or to the molecular biology central dogma governing multiple omic approaches.Competing Interest StatementThe authors have declared no competing interest.},
author = {Etourneau, Lucas and Fancello, Laura and Wieczorek, Samuel and Varoquaux, Nelle and Burger, Thomas},
doi = {10.1101/2023.11.09.566355},
journal = {bioRxiv},
month = {jan},
pages = {2023.11.09.566355},
title = {{A new take on missing value imputation for bottom-up label-free LC-MS/MS proteomics}},
url = {http://biorxiv.org/content/early/2023/11/13/2023.11.09.566355.abstract},
year = {2023}
}

@article{Chen2014,
abstract = {Missing data rates could depend on the targeted values in many settings, including mass spectrometry-based proteomic profiling studies. Here, we consider mean and covariance estimation under a multivariate Gaussian distribution with non-ignorable missingness, including scenarios in which the dimension (p) of the response vector is equal to or greater than the number (n) of independent observations. A parameter estimation procedure is developed by maximizing a class of penalized likelihood functions that entails explicit modeling of missing data probabilities. The performance of the resulting "penalized EM algorithm incorporating missing data mechanism (PEMM)" estimation procedure is evaluated in simulation studies and in a proteomic data illustration. {\textcopyright} 2014, The International Biometric Society.},
author = {Chen, Lin S. and Prentice, Ross L. and Wang, Pei},
doi = {10.1111/BIOM.12149},
file = {:C\:/Users/DELL/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Chen, Prentice, Wang - 2014 - A penalized EM algorithm incorporating missing data mechanism for Gaussian parameter estimation.pdf:pdf;:C\:/Users/DELL/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Chen, Prentice, Wang - 2014 - A penalized EM algorithm incorporating missing data mechanism for Gaussian parameter estimation(2).pdf:pdf},
issn = {1541-0420},
journal = {Biometrics},
keywords = {Expectation,Maximum penalized likelihood estimate,Not,at,maximization (EM) algorithm,missing,random (NMAR)},
mendeley-groups = {multi-omics,Papers bib/ECML 2022,only proteomic imputation,imputation},
month = {jun},
number = {2},
pages = {312--322},
publisher = {John Wiley & Sons, Ltd},
title = {{A penalized EM algorithm incorporating missing data mechanism for Gaussian parameter estimation}},
url = {https://onlinelibrary.wiley.com/doi/full/10.1111/biom.12149 https://onlinelibrary.wiley.com/doi/abs/10.1111/biom.12149 https://onlinelibrary.wiley.com/doi/10.1111/biom.12149},
volume = {70},
year = {2014}
}

