<?xml version="1.0" encoding="UTF-8" standalone="yes"?> <ProteomeXchangeDataset id="PXD019086" formatVersion="1.4.0" xsi:noNamespaceSchemaLocation="proteomeXchange-1.4.0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> <CvList> <Cv fullName="PSI-MS" uri="https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo" id="MS"/> <Cv fullName="PSI-MOD" uri="https://raw.githubusercontent.com/MICommunity/psidev/master/psi/mod/data/PSI-MOD.obo" id="MOD"/> <Cv fullName="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" id="UNIMOD"/> </CvList> <ChangeLog> <ChangeLogEntry date="2021-04-06">Updated publication reference for PubMed record(s): 33608539.</ChangeLogEntry> </ChangeLog> <DatasetSummary announceDate="2021-01-18" hostingRepository="PRIDE" title="Deep learning the collisional cross sections of the peptide universe from a million experimental values"> <Description>The size and shape of peptide ions in the gas phase are an under-explored dimension for mass spectrometry-based proteomics. To explore the nature and utility of the entire peptide collisional cross section (CCS) space, we measure more than a million data points from whole-proteome digests of five organisms with trapped ion mobility spectrometry (TIMS) and parallel accumulation – serial fragmentation (PASEF). The scale and precision (CV <1%) of our data is sufficient to train a recurrent neural network that accurately predicts CCS values solely based on the peptide sequence. Cross section predictions for the synthetic ProteomeTools library validate the model within a 1.3% median relative error (R > 0.99). Hydrophobicity, position of prolines and histidines are main determinants of the cross sections in addition to sequence-specific interactions. CCS values can now be predicted for any peptide and organism, forming a basis for advanced proteomics workflows that make full use of the additional information.</Description> <ReviewLevel> <cvParam cvRef="MS" accession="MS:1002854" name="Peer-reviewed dataset"/> </ReviewLevel> <RepositorySupport> <cvParam cvRef="MS" accession="MS:1002857" name="Unsupported dataset by repository"/> </RepositorySupport> </DatasetSummary> <DatasetIdentifierList> <DatasetIdentifier> <cvParam cvRef="MS" accession="MS:1001919" name="ProteomeXchange accession number" value="PXD019086"/> <cvParam cvRef="MS" accession="MS:1001921" name="ProteomeXchange accession number version number" value="2"/> </DatasetIdentifier> </DatasetIdentifierList> <DatasetOriginList> <DatasetOrigin> <cvParam cvRef="MS" accession="MS:1002868" name="Original data"/> </DatasetOrigin> </DatasetOriginList> <SpeciesList> <Species> <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Escherichia coli"/> <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="562"/> </Species> <Species> <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Caenorhabditis elegans"/> <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="6239"/> </Species> <Species> <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Homo sapiens (Human)"/> <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="9606"/> </Species> <Species> <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Saccharomyces cerevisiae (Baker's yeast)"/> <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="4932"/> </Species> <Species> <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Drosophila melanogaster (Fruit fly)"/> <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="7227"/> </Species> </SpeciesList> <InstrumentList> <Instrument id="Instrument_1"> <cvParam cvRef="MS" accession="MS:1003005" name="timsTOF Pro"/> </Instrument> </InstrumentList> <ModificationList> <cvParam cvRef="MS" accession="MS:1002864" name="No PTMs are included in the dataset"/> </ModificationList> <ContactList> <Contact id="project_submitter"> <cvParam cvRef="MS" accession="MS:1000586" name="contact name" value="Mario Oroshi"/> <cvParam cvRef="MS" accession="MS:1000589" name="contact email" value="oroshi@biochem.mpg.de"/> <cvParam cvRef="MS" accession="MS:1000590" name="contact affiliation" value="Proteomics"/> <cvParam cvRef="MS" accession="MS:1002037" name="dataset submitter"/> </Contact> <Contact id="project_lab_head"> <cvParam cvRef="MS" accession="MS:1002332" name="lab head"/> <cvParam cvRef="MS" accession="MS:1000586" name="contact name" value="Matthias Mann"/> <cvParam cvRef="MS" accession="MS:1000589" name="contact email" value="mmann@biochem.mpg.de"/> <cvParam cvRef="MS" accession="MS:1000590" name="contact affiliation" value="Department Proteomics and Signal Transduction Max Planck Institute of Biochemistry Am Klopferspitz 18 82152 Martinsried Germany"/> </Contact> </ContactList> <PublicationList> <Publication id="PMID33608539"> <cvParam cvRef="MS" accession="MS:1000879" name="PubMed identifier" value="33608539"/> <cvParam cvRef="MS" accession="MS:1002866" name="Reference" value="Meier F, Köhler ND, Brunner AD, Wanka JH, Voytik E, Strauss MT, Theis FJ, Mann M. Deep learning the collisional cross sections of the peptide universe from a million experimental values. Nat Commun. 2021 12(1):1185"/> </Publication> </PublicationList> <KeywordList> <cvParam cvRef="MS" accession="MS:1001925" name="submitter keyword" value="Technical, ion mobility, CCS, TIMS, deep learning"/> </KeywordList> <FullDatasetLinkList> <FullDatasetLink> <cvParam cvRef="MS" accession="MS:1002852" name="Dataset FTP location" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086"/> </FullDatasetLink> <FullDatasetLink> <cvParam cvRef="MS" accession="MS:1001930" name="PRIDE project URI" value="http://www.ebi.ac.uk/pride/archive/projects/PXD019086"/> </FullDatasetLink> </FullDatasetLinkList> <DatasetFileList> <DatasetFile id="FILE_0" name="checksum.txt"> <cvParam cvRef="MS" accession="MS:1002851" name="Other type file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/checksum.txt"/> </DatasetFile> <DatasetFile id="FILE_1" name="Libraries_diaPASEF.zip"> <cvParam cvRef="MS" accession="MS:1002845" name="Associated file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Libraries_diaPASEF.zip"/> </DatasetFile> <DatasetFile id="FILE_2" name="Raw_Celegans.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Celegans.zip"/> </DatasetFile> <DatasetFile id="FILE_3" name="Raw_Drosophila_LysC.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Drosophila_LysC.zip"/> </DatasetFile> <DatasetFile id="FILE_4" name="Raw_Drosophila_LysN.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Drosophila_LysN.zip"/> </DatasetFile> <DatasetFile id="FILE_5" name="Raw_Drosophila_Trp.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Drosophila_Trp.zip"/> </DatasetFile> <DatasetFile id="FILE_6" name="Raw_Ecoli_LysC.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Ecoli_LysC.zip"/> </DatasetFile> <DatasetFile id="FILE_7" name="Raw_Ecoli_LysN.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Ecoli_LysN.zip"/> </DatasetFile> <DatasetFile id="FILE_8" name="Raw_Ecoli_Trp.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Ecoli_Trp.zip"/> </DatasetFile> <DatasetFile id="FILE_9" name="Raw_HeLa_LysC.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_HeLa_LysC.zip"/> </DatasetFile> <DatasetFile id="FILE_10" name="Raw_HeLa_LysN.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_HeLa_LysN.zip"/> </DatasetFile> <DatasetFile id="FILE_11" name="Raw_HeLa_Trp.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_HeLa_Trp.zip"/> </DatasetFile> <DatasetFile id="FILE_12" name="Raw_ProteomeTools_MissingGenes.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_ProteomeTools_MissingGenes.zip"/> </DatasetFile> <DatasetFile id="FILE_13" name="Raw_ProteomeTools_Proteotypic.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_ProteomeTools_Proteotypic.zip"/> </DatasetFile> <DatasetFile id="FILE_14" name="Raw_ProteomeTools_SRMatlas.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_ProteomeTools_SRMatlas.zip"/> </DatasetFile> <DatasetFile id="FILE_15" name="Raw_Yeast_LysC.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Yeast_LysC.zip"/> </DatasetFile> <DatasetFile id="FILE_16" name="Raw_Yeast_LysN.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Yeast_LysN.zip"/> </DatasetFile> <DatasetFile id="FILE_17" name="Raw_Yeast_Trp.zip"> <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Yeast_Trp.zip"/> </DatasetFile> <DatasetFile id="FILE_18" name="Results_Celegans.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_Celegans.zip"/> </DatasetFile> <DatasetFile id="FILE_19" name="Results_Drosophila.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_Drosophila.zip"/> </DatasetFile> <DatasetFile id="FILE_20" name="Results_Ecoli.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_Ecoli.zip"/> </DatasetFile> <DatasetFile id="FILE_21" name="Results_HeLa_LysC_LysN.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_HeLa_LysC_LysN.zip"/> </DatasetFile> <DatasetFile id="FILE_22" name="Results_HeLa_trypsin.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_HeLa_trypsin.zip"/> </DatasetFile> <DatasetFile id="FILE_23" name="Results_ProteomeTools_MissingGenes.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_ProteomeTools_MissingGenes.zip"/> </DatasetFile> <DatasetFile id="FILE_24" name="Results_ProteomeTools_Proteotypic.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_ProteomeTools_Proteotypic.zip"/> </DatasetFile> <DatasetFile id="FILE_25" name="Results_ProteomeTools_SRMatlas.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_ProteomeTools_SRMatlas.zip"/> </DatasetFile> <DatasetFile id="FILE_26" name="Results_Yeast.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_Yeast.zip"/> </DatasetFile> <DatasetFile id="FILE_27" name="Results_diaPASEF.zip"> <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_diaPASEF.zip"/> </DatasetFile> </DatasetFileList> <RepositoryRecordList> <RepositoryRecord name="Deep learning the collisional cross sections of the peptide universe from a million experimental values" label="PRIDE project" recordID="PXD019086" repositoryID="PRIDE" uri="http://www.ebi.ac.uk/pride/archive/projects/PXD019086"/> </RepositoryRecordList> </ProteomeXchangeDataset>