<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<ProteomeXchangeDataset id="PXD019086" formatVersion="1.4.0" xsi:noNamespaceSchemaLocation="proteomeXchange-1.4.0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <CvList>
        <Cv fullName="PSI-MS" uri="https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo" id="MS"/>
        <Cv fullName="PSI-MOD" uri="https://raw.githubusercontent.com/MICommunity/psidev/master/psi/mod/data/PSI-MOD.obo" id="MOD"/>
        <Cv fullName="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" id="UNIMOD"/>
    </CvList>
    <ChangeLog>
        <ChangeLogEntry date="2021-04-06">Updated publication reference for PubMed record(s): 33608539.</ChangeLogEntry>
    </ChangeLog>
    <DatasetSummary announceDate="2021-01-18" hostingRepository="PRIDE" title="Deep learning the collisional cross sections of the peptide universe from a million experimental values">
        <Description>The size and shape of peptide ions in the gas phase are an under-explored dimension for mass spectrometry-based proteomics. To explore the nature and utility of the entire peptide collisional cross section (CCS) space, we measure more than a million data points from whole-proteome digests of five organisms with trapped ion mobility spectrometry (TIMS) and parallel accumulation – serial fragmentation (PASEF). The scale and precision (CV &lt;1%) of our data is sufficient to train a recurrent neural network that accurately predicts CCS values solely based on the peptide sequence.  Cross section predictions for the synthetic ProteomeTools library validate the model within a 1.3% median relative error (R &gt; 0.99). Hydrophobicity, position of prolines and histidines are main determinants of the cross sections in addition to sequence-specific interactions. CCS values can now be predicted for any peptide and organism, forming a basis for advanced proteomics workflows that make full use of the additional information.</Description>
        <ReviewLevel>
            <cvParam cvRef="MS" accession="MS:1002854" name="Peer-reviewed dataset"/>
        </ReviewLevel>
        <RepositorySupport>
            <cvParam cvRef="MS" accession="MS:1002857" name="Unsupported dataset by repository"/>
        </RepositorySupport>
    </DatasetSummary>
    <DatasetIdentifierList>
        <DatasetIdentifier>
            <cvParam cvRef="MS" accession="MS:1001919" name="ProteomeXchange accession number" value="PXD019086"/>
            <cvParam cvRef="MS" accession="MS:1001921" name="ProteomeXchange accession number version number" value="2"/>
        </DatasetIdentifier>
    </DatasetIdentifierList>
    <DatasetOriginList>
        <DatasetOrigin>
            <cvParam cvRef="MS" accession="MS:1002868" name="Original data"/>
        </DatasetOrigin>
    </DatasetOriginList>
    <SpeciesList>
        <Species>
            <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Escherichia coli"/>
            <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="562"/>
        </Species>
        <Species>
            <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Caenorhabditis elegans"/>
            <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="6239"/>
        </Species>
        <Species>
            <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Homo sapiens (Human)"/>
            <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="9606"/>
        </Species>
        <Species>
            <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Saccharomyces cerevisiae (Baker's yeast)"/>
            <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="4932"/>
        </Species>
        <Species>
            <cvParam cvRef="MS" accession="MS:1001469" name="taxonomy: scientific name" value="Drosophila melanogaster (Fruit fly)"/>
            <cvParam cvRef="MS" accession="MS:1001467" name="taxonomy: NCBI TaxID" value="7227"/>
        </Species>
    </SpeciesList>
    <InstrumentList>
        <Instrument id="Instrument_1">
            <cvParam cvRef="MS" accession="MS:1003005" name="timsTOF Pro"/>
        </Instrument>
    </InstrumentList>
    <ModificationList>
        <cvParam cvRef="MS" accession="MS:1002864" name="No PTMs are included in the dataset"/>
    </ModificationList>
    <ContactList>
        <Contact id="project_submitter">
            <cvParam cvRef="MS" accession="MS:1000586" name="contact name" value="Mario Oroshi"/>
            <cvParam cvRef="MS" accession="MS:1000589" name="contact email" value="oroshi@biochem.mpg.de"/>
            <cvParam cvRef="MS" accession="MS:1000590" name="contact affiliation" value="Proteomics"/>
            <cvParam cvRef="MS" accession="MS:1002037" name="dataset submitter"/>
        </Contact>
        <Contact id="project_lab_head">
            <cvParam cvRef="MS" accession="MS:1002332" name="lab head"/>
            <cvParam cvRef="MS" accession="MS:1000586" name="contact name" value="Matthias Mann"/>
            <cvParam cvRef="MS" accession="MS:1000589" name="contact email" value="mmann@biochem.mpg.de"/>
            <cvParam cvRef="MS" accession="MS:1000590" name="contact affiliation" value="Department Proteomics and Signal Transduction  Max Planck Institute of Biochemistry  Am Klopferspitz 18  82152 Martinsried   Germany"/>
        </Contact>
    </ContactList>
    <PublicationList>
        <Publication id="PMID33608539">
            <cvParam cvRef="MS" accession="MS:1000879" name="PubMed identifier" value="33608539"/>
            <cvParam cvRef="MS" accession="MS:1002866" name="Reference" value="Meier F, Köhler ND, Brunner AD, Wanka JH, Voytik E, Strauss MT, Theis FJ, Mann M. Deep learning the collisional cross sections of the peptide universe from a million experimental values. Nat Commun. 2021 12(1):1185"/>
        </Publication>
    </PublicationList>
    <KeywordList>
        <cvParam cvRef="MS" accession="MS:1001925" name="submitter keyword" value="Technical, ion mobility, CCS, TIMS, deep learning"/>
    </KeywordList>
    <FullDatasetLinkList>
        <FullDatasetLink>
            <cvParam cvRef="MS" accession="MS:1002852" name="Dataset FTP location" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086"/>
        </FullDatasetLink>
        <FullDatasetLink>
            <cvParam cvRef="MS" accession="MS:1001930" name="PRIDE project URI" value="http://www.ebi.ac.uk/pride/archive/projects/PXD019086"/>
        </FullDatasetLink>
    </FullDatasetLinkList>
    <DatasetFileList>
        <DatasetFile id="FILE_0" name="checksum.txt">
            <cvParam cvRef="MS" accession="MS:1002851" name="Other type file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/checksum.txt"/>
        </DatasetFile>
        <DatasetFile id="FILE_1" name="Libraries_diaPASEF.zip">
            <cvParam cvRef="MS" accession="MS:1002845" name="Associated file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Libraries_diaPASEF.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_2" name="Raw_Celegans.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Celegans.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_3" name="Raw_Drosophila_LysC.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Drosophila_LysC.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_4" name="Raw_Drosophila_LysN.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Drosophila_LysN.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_5" name="Raw_Drosophila_Trp.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Drosophila_Trp.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_6" name="Raw_Ecoli_LysC.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Ecoli_LysC.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_7" name="Raw_Ecoli_LysN.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Ecoli_LysN.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_8" name="Raw_Ecoli_Trp.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Ecoli_Trp.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_9" name="Raw_HeLa_LysC.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_HeLa_LysC.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_10" name="Raw_HeLa_LysN.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_HeLa_LysN.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_11" name="Raw_HeLa_Trp.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_HeLa_Trp.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_12" name="Raw_ProteomeTools_MissingGenes.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_ProteomeTools_MissingGenes.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_13" name="Raw_ProteomeTools_Proteotypic.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_ProteomeTools_Proteotypic.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_14" name="Raw_ProteomeTools_SRMatlas.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_ProteomeTools_SRMatlas.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_15" name="Raw_Yeast_LysC.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Yeast_LysC.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_16" name="Raw_Yeast_LysN.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Yeast_LysN.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_17" name="Raw_Yeast_Trp.zip">
            <cvParam cvRef="MS" accession="MS:1002846" name="Associated raw file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Raw_Yeast_Trp.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_18" name="Results_Celegans.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_Celegans.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_19" name="Results_Drosophila.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_Drosophila.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_20" name="Results_Ecoli.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_Ecoli.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_21" name="Results_HeLa_LysC_LysN.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_HeLa_LysC_LysN.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_22" name="Results_HeLa_trypsin.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_HeLa_trypsin.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_23" name="Results_ProteomeTools_MissingGenes.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_ProteomeTools_MissingGenes.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_24" name="Results_ProteomeTools_Proteotypic.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_ProteomeTools_Proteotypic.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_25" name="Results_ProteomeTools_SRMatlas.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_ProteomeTools_SRMatlas.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_26" name="Results_Yeast.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_Yeast.zip"/>
        </DatasetFile>
        <DatasetFile id="FILE_27" name="Results_diaPASEF.zip">
            <cvParam cvRef="MS" accession="MS:1002849" name="Search engine output file URI" value="ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/01/PXD019086/Results_diaPASEF.zip"/>
        </DatasetFile>
    </DatasetFileList>
    <RepositoryRecordList>
        <RepositoryRecord name="Deep learning the collisional cross sections of the peptide universe from a million experimental values" label="PRIDE project" recordID="PXD019086" repositoryID="PRIDE" uri="http://www.ebi.ac.uk/pride/archive/projects/PXD019086"/>
    </RepositoryRecordList>
</ProteomeXchangeDataset>