<?xml version="1.0" encoding="US-ASCII"?><?xml-stylesheet href="/styles/xsl/ch.xsl" type="text/xsl" title="ch"?><?xml-stylesheet href="/styles/xsl/chfn.xsl" type="text/xsl" title="chfn" alternate="yes"?><?xml-stylesheet href="/styles/xsl/chfig.xsl" type="text/xsl" title="chfig" alternate="yes"?><?xml-stylesheet href="/styles/xsl/chtable.xsl" type="text/xsl" title="chtable" alternate="yes"?><?xml-stylesheet href="/styles/xsl/chsec.xsl" type="text/xsl" title="chsec" alternate="yes"?><?xml-stylesheet href="/styles/xsl/chreferences.xsl" type="text/xsl" title="chreferences" alternate="yes"?><?xml-stylesheet href="/styles/xsl/chsindex.xsl" type="text/xsl" title="chsindex" alternate="yes"?><?xml-stylesheet href="/styles/xsl/chrelated.xsl" type="text/xsl" title="chrelated" alternate="yes"?><?xml-stylesheet href="/styles/xsl/chconts.xsl" type="text/xsl" title="chconts" alternate="yes"?><?xml-stylesheet href="/styles/xsl/html-identity.xsl" type="text/xsl" title="identity" alternate="yes"?>
<!DOCTYPE wrap
  SYSTEM "../../xml/dtds/local/wrap/wrap.dtd">
<wrap>
  <variables>
    <partno>5</partno>
    <copyright>International Union of Crystallography</copyright>
    <chnumo>5o5</chnumo>
    <published_year>2006</published_year>
    <copyright_year>2006</copyright_year>
    <isbn>1-4020-3138-6</isbn>
    <doi_dep_url>http://xrpp.iucr.org/cgi-bin/itr?url_ver=Z39.88-2003&amp;rft_dat=what%3Dchapter%26volid%3DGa%26chnumo%3D5o5%26chvers%3Dv0001</doi_dep_url>
    <epubmo/>
    <chapter_dir>/Local/Ix86/Linux/ITGEN/httpd_axkit/htdocs/Ga/ch5o5v0001</chapter_dir>
    <doi>10.1107/97809553602060000755</doi>
    <partid>gapart5</partid>
    <shortpart_title>Applications</shortpart_title>
    <chid>Gach5o5</chid>
    <ch_title>The use of mmCIF architecture for PDB data management</ch_title>
    <epubyr/>
    <next_chapter_dir>/Local/Ix86/Linux/ITGEN/httpd_axkit/htdocs/Ga/ch5o6v0001/</next_chapter_dir>
    <doi_rfr_linking_iucr_html>http://dx.doi.org/openurl?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:iucr.org&amp;rft_id=doi:10.1107/97809553602060000755&amp;rfr_dat=cr%5FsetVer%3D01%26cr%5Fpub%3D10%2E1107%26cr%5Fwork%3DThe%20use%20of%20mmCIF%20architecture%20for%20PDB%20data%20management%26cr%5Fsrc%3D10%2E1107%26cr%5FsrvTyp%3Dhtml</doi_rfr_linking_iucr_html>
    <doi_rfr_linking_iucr_pdf>http://dx.doi.org/openurl?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:iucr.org&amp;rft_id=doi:10.1107/97809553602060000755&amp;rfr_dat=cr%5FsetVer%3D01%26cr%5Fpub%3D10%2E1107%26cr%5Fwork%3DThe%20use%20of%20mmCIF%20architecture%20for%20PDB%20data%20management%26cr%5Fsrc%3D10%2E1107%26cr%5FsrvTyp%3Dpdf</doi_rfr_linking_iucr_pdf>
    <xref_ch_title>The use of mmCIF architecture for PDB data management</xref_ch_title>
    <doi_test_url>http://xrpp.iucr.org/cgi-bin/itr?url_ver=Z39.88-2003&amp;rft_dat=what%3Dchapter%26volid%3DGa%26chnumo%3D5o5%26chvers%3Dv0001&amp;rfr_id=ori:rid:iucr.org&amp;rft_id=doi:10.1107/97809553602060000755&amp;rfr_dat=cr%5FsetVer%3D01%26cr%5Fpub%3D10%2E1107%26cr%5Fwork%3DThe%20use%20of%20mmCIF%20architecture%20for%20PDB%20data%20management%26cr%5Fsrc%3D10%2E1107%26cr%5FsrvTyp%3Dhtml</doi_test_url>
    <volid>Ga</volid>
    <fpage>539</fpage>
    <series_title>International Tables for Crystallography</series_title>
    <previous_chapter_dir>/Local/Ix86/Linux/ITGEN/httpd_axkit/htdocs/Ga/ch5o4v0001/</previous_chapter_dir>
    <volume_title>International Tables for Crystallography Volume G</volume_title>
    <doi_rfr_linking_springer_html>http://dx.doi.org/openurl?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:springer.com&amp;rft_id=doi:10.1107/97809553602060000755&amp;rfr_dat=cr%5FsetVer%3D01%26cr%5Fpub%3D10%2E1107%26cr%5Fwork%3DThe%20use%20of%20mmCIF%20architecture%20for%20PDB%20data%20management%26cr%5Fsrc%3D10%2E1007%26cr%5FsrvTyp%3Dhtml</doi_rfr_linking_springer_html>
    <editor>S. R. Hall and B. McMahon</editor>
    <chnum>5.5</chnum>
    <previous_chapter_durl>/Ga/ch5o4v0001/</previous_chapter_durl>
    <lpage>543</lpage>
    <shortch_title>The use of mmCIF architecture for PDB data management</shortch_title>
    <meta_kwds>ADIT; PDB exchange data dictionary; Protein Data Bank; Research Collaboratory for Structural Bioinformatics; data exchange standards; mmCIF software; macromolecular Crystallographic Information File software; computer programs; databases; ontologies</meta_kwds>
    <volume>G</volume>
    <doi_rfr_linking_springer_pdf>http://dx.doi.org/openurl?url_ver=Z39.88-2003&amp;rfr_id=ori:rid:springer.com&amp;rft_id=doi:10.1107/97809553602060000755&amp;rfr_dat=cr%5FsetVer%3D01%26cr%5Fpub%3D10%2E1107%26cr%5Fwork%3DThe%20use%20of%20mmCIF%20architecture%20for%20PDB%20data%20management%26cr%5Fsrc%3D10%2E1007%26cr%5FsrvTyp%3Dpdf</doi_rfr_linking_springer_pdf>
    <volrevision>a</volrevision>
    <eisbn>1-4020-5411-4</eisbn>
    <next_chapter_durl>/Ga/ch5o6v0001/</next_chapter_durl>
    <epubday/>
    <chvers>v0001</chvers>
    <chapter_durl>/Ga/ch5o5v0001/</chapter_durl>
    <volume_subtitle>Definition and exchange of crystallographic data</volume_subtitle>
<volumes>
<value subtitle="Space-group symmetry">A</value>
<value subtitle="Symmetry relations between space groups">A1</value>
<value subtitle="Reciprocal space">B</value>
<value subtitle="Mathematical, physical and chemical tables">C</value>
<value subtitle="Physical properties of crystals">D</value>
<value subtitle="Subperiodic group symmetry">E</value>
<value subtitle="Crystallography of biological macromolecules">F</value>
<value subtitle="Definition and exchange of crystallographic data">G</value>
</volumes>
  </variables>
<fm>

<aug><div class="aug">
<div class="au">
<b> <span class="au">J. D. Westbrook</span>,<a class="linkclass" href="#a"><sup>a</sup></a><a class="linkclass" href="#cor"><sup>*</sup></a> <span class="au">H. Yang</span>,<a class="linkclass" href="#a"><sup>a</sup></a> <span class="au">Z. Feng</span><a class="linkclass" href="#a"><sup>a</sup></a> and&#160;<span class="au">H. M. Berman</span><a class="linkclass" href="#a"><sup>a</sup></a></b>
</div>

<div class="aff">
<p><span class="small"><a class="linkclass" name="a"><sup><b>a</b></sup></a>Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers, The State University of New Jersey, Department of Chemistry and Chemical Biology, 610 Taylor Road, Piscataway, NJ 08854&#8211;8087, <span class="cny">USA</span><br/><a name="cor">Correspondence e-mail:</a>&#160; <a class="linkclass" href="mailto:jwest@rcsb.rutgers.edu">jwest@rcsb.rutgers.edu</a></span></p>
</div>

</div>
</aug>

<authorlist>
<span class="au">J. D. Westbrook</span>
<span class="au">H. Yang</span>
<span class="au">Z. Feng</span>
<span class="au">H. M. Berman</span>
  <authorsearch>DC%2Ecreator%3D%22J%2E%22%20AND%20DC%2Ecreator%3D%22D%2E%22%20AND%20DC%2Ecreator%3D%22Westbrook%22</authorsearch>
  <authorsearch>DC%2Ecreator%3D%22H%2E%22%20AND%20DC%2Ecreator%3D%22Yang%22</authorsearch>
  <authorsearch>DC%2Ecreator%3D%22Z%2E%22%20AND%20DC%2Ecreator%3D%22Feng%22</authorsearch>
  <authorsearch>DC%2Ecreator%3D%22H%2E%22%20AND%20DC%2Ecreator%3D%22M%2E%22%20AND%20DC%2Ecreator%3D%22Berman%22</authorsearch>
</authorlist>
<contribaudata>
<aug>
<au snmindx="Westbrook, J. D."><span class="au">J. D. Westbrook</span></au>
<email>jwest@rcsb.rutgers.edu</email>
<aff id="a"><a class="linkclass" name="a"><sup><b>a</b></sup></a>Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers, The State University of New Jersey, Department of Chemistry and Chemical Biology, 610 Taylor Road, Piscataway, NJ 08854&#8211;8087, <span class="cny">USA</span></aff>
</aug>
<aug>
<au snmindx="Yang, H."><span class="au">H. Yang</span></au>
<email/>
<aff id="a"><a class="linkclass" name="a"><sup><b>a</b></sup></a>Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers, The State University of New Jersey, Department of Chemistry and Chemical Biology, 610 Taylor Road, Piscataway, NJ 08854&#8211;8087, <span class="cny">USA</span></aff>
</aug>
<aug>
<au snmindx="Feng, Z."><span class="au">Z. Feng</span></au>
<email/>
<aff id="a"><a class="linkclass" name="a"><sup><b>a</b></sup></a>Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers, The State University of New Jersey, Department of Chemistry and Chemical Biology, 610 Taylor Road, Piscataway, NJ 08854&#8211;8087, <span class="cny">USA</span></aff>
</aug>
<aug>
<au snmindx="Berman, H. M."><span class="au">H. M. Berman</span></au>
<email/>
<aff id="a"><a class="linkclass" name="a"><sup><b>a</b></sup></a>Protein Data Bank, Research Collaboratory for Structural Bioinformatics, Rutgers, The State University of New Jersey, Department of Chemistry and Chemical Biology, 610 Taylor Road, Piscataway, NJ 08854&#8211;8087, <span class="cny">USA</span></aff>
</aug>
  <authorsearch>DC%2Ecreator%3D%22J%2E%22%20AND%20DC%2Ecreator%3D%22D%2E%22%20AND%20DC%2Ecreator%3D%22Westbrook%22</authorsearch>
  <authorsearch>DC%2Ecreator%3D%22H%2E%22%20AND%20DC%2Ecreator%3D%22Yang%22</authorsearch>
  <authorsearch>DC%2Ecreator%3D%22Z%2E%22%20AND%20DC%2Ecreator%3D%22Feng%22</authorsearch>
  <authorsearch>DC%2Ecreator%3D%22H%2E%22%20AND%20DC%2Ecreator%3D%22M%2E%22%20AND%20DC%2Ecreator%3D%22Berman%22</authorsearch>
</contribaudata>

<xrefauthorinfo>
<au>
<fnm>J. D.</fnm>
<snm>Westbrook</snm>
<nee/>
<jr/>
</au>
<au>
<fnm>H.</fnm>
<snm>Yang</snm>
<nee/>
<jr/>
</au>
<au>
<fnm>Z.</fnm>
<snm>Feng</snm>
<nee/>
<jr/>
</au>
<au>
<fnm>H. M.</fnm>
<snm>Berman</snm>
<nee/>
<jr/>
</au>
</xrefauthorinfo>

<abs><div id="abs"><p>The Protein Data Bank (PDB) has grown from an archive of seven entries in 1973 to a large and rapidly growing collection of more than 30&#160;000 structures as of May 2005. The PDB now provides a key information resource for the structural biology community. A software framework has been developed that supports automation and scalability, and that can adapt to changes in data content and delivery technology, to permit future development. The mmCIF data dictionary and an accompanying extended dictionary of local data items provide the core ontology which is the basis for software development. The associated software tools exchange and validate data, create and load databases, translate data formats, and serve application program interfaces. They are made available to the scientific community through an open-source licence.</p>
</div>
</abs>
</fm>
<bdy>
<subch>
<div id="divsec5o5o1" class="sec1" secnum="5.5.1" fpage="539" lpage="539">
<div class="sectionheaders">
<h3 class="sectionheaders"><a name="sec5o5o1"><tree level="1"/></a>5.5.1. Introduction<indexg><index id="gach5o5index00001" type="s" significance="standard">Protein Data Bank</index></indexg><indexg><index id="gach5o5index00002" type="s" significance="standard">mmCIF<index id="gach5o5index00003" type="s" significance="standard">use in PDB data management</index></index></indexg><indexg><index id="gach5o5index00004" type="s" significance="standard">PDB exchange data dictionary</index></indexg><indexg><index id="gach5o5index00005" type="s" significance="standard">Research Collaboratory for Structural Bioinformatics</index></indexg></h3>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o1.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o1" secnum="5.5.1">Introduction<indexg><index id="gach5o5index00001" type="s" significance="standard">Protein Data Bank</index></indexg><indexg><index id="gach5o5index00002" type="s" significance="standard">mmCIF<index id="gach5o5index00003" type="s" significance="standard">use in PDB data management</index></index></indexg><indexg><index id="gach5o5index00004" type="s" significance="standard">PDB exchange data dictionary</index></indexg><indexg><index id="gach5o5index00005" type="s" significance="standard">Research Collaboratory for Structural Bioinformatics</index></indexg></st>
<p>The Protein Data Bank (PDB) is an archive for macromolecular structures (Bernstein <span class="it"><i>et al.</i></span>, 1977<bbr id="bb3"/>; Berman <span class="it"><i>et al.</i></span>, 2000<bbr id="bb2"/>) and a major component of a global resource for macromolecular structural science (Berman <span class="it"><i>et al.</i></span>, 2003<bbr id="bb1"/>). The scale of its data handling operations is large, and depends on the effective exploitation of the latest developments in the science and technology of informatics. A significant component of its data storage and retrieval strategy is the management of structural data in mmCIF format with appropriate extensions.</p>
<p>Over its 30-year history, the PDB archive has grown from seven entries in 1973 to a collection of over 30&#160;000 structures as of May 2005. The growth in the size of the archive has been accompanied by increases in both data content and in the structural complexity of individual entries. As the PDB has grown, there has been a significant broadening of its user community. In response to this change, the role of the PDB has expanded from being simply a provider of structure data files to providing a key information resource for the structural biology community.</p>
<p>Looking forward, an acceleration in the growth of the PDB archive is anticipated owing to developments in high-throughput structural determination methodologies and worldwide structural genomics efforts. To support the continued growth and evolution of the PDB archive, a framework is required that supports automation and scalability, and that can adapt to changes in both data content and delivery technology.</p>
<p>At the core of the PDB informatics infrastructure is an <indexg><index id="gach5o5index00006" type="s" significance="standard">ontology</index></indexg>ontology of data definitions which electronically encode domain information in the form of precise definitions, examples and controlled vocabularies. In addition to domain information, data definitions also encode information such as data type, data relationships, range restrictions and presentation units.</p>
<p>The software-accessible PDB exchange data dictionary (<related volume="G" chnum="3.6" url="/Ga/ch3o6v0001/#app3o6o2"><relchtitle>Classification and use of macromolecular data</relchtitle><relau>P. M. D. Fitzgerald</relau><relau>J. D. Westbrook</relau><relau>P. E. Bourne</relau><relau>B. McMahon</relau><relau>K. D. Watenpaugh</relau><relau>H. M. Berman</relau></related>Appendix 3.6.2<a href="/Ga/ch3o6v0001/#app3o6o2"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
) is the key part of the PDB informatics infrastructure. The exchange dictionary is an extension of the macromolecular Crystallographic Information File (mmCIF) data dictionary (Bourne <span class="it"><i>et al.</i></span>, 1997<bbr id="bb4"/>). The dictionary provides the foundation for software tools which exchange and validate data, create and load databases, translate data formats, and serve application program interfaces. The components of the informatics infrastructure developed by the PDB are being used to build a data pipeline to support high-throughput structure determination.</p>
</div>

<div id="divsec5o5o2" class="sec1" secnum="5.5.2" fpage="539" lpage="541">
<div class="sectionheaders">
<h3 class="sectionheaders"><a name="sec5o5o2"><tree level="1"/></a>5.5.2. Representing macromolecular structure data</h3>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o2.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o2" secnum="5.5.2">Representing macromolecular structure data</st>
<p>Macromolecular structure data have historically been represented in a simple record-oriented format developed by the PDB; this format has been widely used in structural and computational biology. While this PDB format has in general been adequate for representing coordinate data, it has proved less satisfactory for the description of related information such as chemical and biological features and experimental methodology. To provide a more rigorous data encoding that includes all of this related information, the Protein Data Bank has in recent years adopted a comprehensive <indexg><index id="gach5o5index00007" type="s" significance="standard">ontology</index></indexg>ontology of structure and experiment based on the content of the mmCIF data dictionary.</p>

<div id="divsec5o5o2o1" class="sec2" secnum="5.5.2.1" fpage="539" lpage="539">
<div class="sectionheaders">
<h4 class="sectionheaders"><a name="sec5o5o2o1"><tree level="2"/></a>5.5.2.1. PDB format<indexg><index id="gach5o5index00008" type="s" significance="standard">data exchange standards<index id="gach5o5index00009" type="s" significance="standard">PDB</index></index></indexg></h4>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o2o1.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o2o1" secnum="5.5.2.1">PDB format<indexg><index id="gach5o5index00008" type="s" significance="standard">data exchange standards<index id="gach5o5index00009" type="s" significance="standard">PDB</index></index></indexg></st>
<p>For the past 30 years, the PDB has served as the single central repository for macromolecular structure data. The data format used to store archival entries in the PDB is a column-oriented data format resembling many data formats developed to accommodate the limitations of paper punched-card technology (see Chapter <related volume="G" chnum="1.1" url="/Ga/ch1o1v0001/"><relchtitle>Genesis of the Crystallographic Information File</relchtitle><relau>S. R. Hall</relau><relau>B. McMahon</relau></related>1.1<a href="/Ga/ch1o1v0001/"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
). An example of the data format is shown in Fig. 5.5.2.1<figr id="fig5o5o2o1" loc="float"/>.</p>
<figplace id="fig5o5o2o1"/>
<p>Many of the data records in this format are prefixed with a record tag (<span class="it"><i>e.g.</i></span> CRYST1, ATOM) followed by individual items of data. The specifications for the records in this data format are described informally by Callaway <span class="it"><i>et al.</i></span> (1996<bbr id="bb5"/>). In addition to the labelled records as in Fig. 5.5.2.1<figr id="fig5o5o2o1" loc="float"/>, many data records in the PDB format are presented as unstructured or only semi-structured remark records.</p>
</div>

<div id="divsec5o5o2o2" class="sec2" secnum="5.5.2.2" fpage="539" lpage="540">
<div class="sectionheaders">
<h4 class="sectionheaders"><a name="sec5o5o2o2"><tree level="2"/></a>5.5.2.2. Ontology representation of macromolecular structure data </h4>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o2o2.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o2o2" secnum="5.5.2.2">Ontology representation of macromolecular structure data </st>
<p>In 1998, the Research Collaboratory for Structural Bioinformatics (RCSB) assumed the management responsibilities for the PDB. One important outcome was the change in the underlying data representation used to process PDB data. The PDB now collects and processes data using a data representation based on a comprehensive <indexg><index id="gach5o5index00010" type="s" significance="standard">ontology</index></indexg>ontology of macromolecular structure and experiment: the PDB exchange data dictionary. This representation is an extension of the mmCIF data dictionary, now the standard data representation for experimentally determined three-dimensional macromolecular structures. The dictionary and data files based on this data <indexg><index id="gach5o5index00011" type="s" significance="standard">STAR File<index id="gach5o5index00012" type="s" significance="standard">basis for macromolecular structure ontology</index></index></indexg>ontology (Westbrook &amp; Bourne, 2000<bbr id="bb10"/>) are expressed using Self-defining Text Archival and Retrieval (STAR) syntax (Chapter <related volume="G" chnum="2.1" url="/Ga/ch2o1v0001/"><relchtitle>Specification of the STAR File</relchtitle><relau>S. R. Hall</relau><relau>N. Spadaccini</relau></related>2.1<a href="/Ga/ch2o1v0001/"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
).</p>
<p>Although the mmCIF dictionary was developed within the crystallographic community, the metadata model employed by mmCIF is quite general and has been adopted by other application domains including <indexg><index id="gach5o5index00013" type="s" significance="standard">NMR structures</index></indexg>NMR, molecular modelling and molecular recognition (dictionaries are available at <indexg><index id="gach5o5index00014" type="s" significance="standard">web sites<index id="gach5o5index00015" type="s" significance="standard">PDB mmCIF site: <a class="linkclass" href="http://mmcif.pdb.org/">http://mmcif.pdb.org/</a>
</index></index></indexg><a class="linkclass" href="http://mmcif.pdb.org/">http://mmcif.pdb.org/</a>
). Within the crystallographic community, <indexg><index id="gach5o5index00016" type="s" significance="standard">metadata</index></indexg>metadata dictionaries have also been developed for other types of diffraction experiments, electron-microscopy data and for the general description of image data. The <indexg><index id="gach5o5index00017" type="s" significance="standard">metadata</index></indexg>metadata concepts and tools that have been developed to support mmCIF are sufficiently general that they may be applied to the description of data in virtually any application.</p>
<p>The demands of <indexg><index id="gach5o5index00018" type="s" significance="standard">structural genomics</index></indexg>structural genomics projects have driven the development of extensions to capture an increased level of experimental detail. These are available at <indexg><index id="gach5o5index00019" type="s" significance="standard">web sites<index id="gach5o5index00020" type="s" significance="standard">PDB mmCIF site: <a class="linkclass" href="http://mmcif.pdb.org/">http://mmcif.pdb.org/</a>
</index></index></indexg><a class="linkclass" href="http://mmcif.pdb.org/">http://mmcif.pdb.org/</a>
. Extensions have also been introduced to describe <indexg><index id="gach5o5index00021" type="s" significance="standard">NMR structures</index></indexg>NMR, cryo-electron microscopy and all aspects of protein production. The ability to rapidly add extensions and incorporate these into the PDB data-processing system is an important feature for supporting the rapidly evolving technologies associated with high-throughput structure determinations.</p>
<p>The mmCIF metadata architecture is built from three levels as illustrated in Fig. 5.5.2.2<figr id="fig5o5o2o2" loc="float"/> (see also Chapter <related volume="G" chnum="2.6" url="/Ga/ch2o6v0001/"><relchtitle>Specification of a relational dictionary definition language (DDL2)</relchtitle><relau>J. D. Westbrook</relau><relau>H. M. Berman</relau><relau>S. R. Hall</relau></related>2.6<a href="/Ga/ch2o6v0001/"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
). Individual data files are described at the top level (<span class="it"><i>e.g.</i></span> Fig. 5.5.2.2<span class="it"><i>a</i></span><figr id="fig5o5o2o2" loc="float"/>). The contents of these data files are defined by a data dictionary (<span class="it"><i>e.g.</i></span> Fig. 5.5.2.2<span class="it"><i>b</i></span><figr id="fig5o5o2o2" loc="float"/>) in the next lower level (see Chapters <related volume="G" chnum="3.6" url="/Ga/ch3o6v0001/"><relchtitle>Classification and use of macromolecular data</relchtitle><relau>P. M. D. Fitzgerald</relau><relau>J. D. Westbrook</relau><relau>P. E. Bourne</relau><relau>B. McMahon</relau><relau>K. D. Watenpaugh</relau><relau>H. M. Berman</relau></related>3.6<a href="/Ga/ch3o6v0001/"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
 and 4.5<a href="/Ga/ch4o5v0001/"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
). The attributes used in this data dictionary to build data definitions are in turn defined in the dictionary description language (<indexg><index id="gach5o5index00022" type="s" significance="standard">DDL<index id="gach5o5index00023" type="s" significance="standard">role in mmCIF metadata architecture</index></index></indexg>DDL) (<span class="it"><i>e.g.</i></span> Fig. 5.5.2.2<span class="it"><i>c</i></span><figr id="fig5o5o2o2" loc="float"/>) in the lowest level (see Chapters <related volume="G" chnum="2.6" url="/Ga/ch2o6v0001/"><relchtitle>Specification of a relational dictionary definition language (DDL2)</relchtitle><relau>J. D. Westbrook</relau><relau>H. M. Berman</relau><relau>S. R. Hall</relau></related>2.6<a href="/Ga/ch2o6v0001/"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
 and 4.10<a href="/Ga/ch4o10v0001/"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
).</p>
<figplace id="fig5o5o2o2"/>
<p>The major syntactical constructs used by mmCIF are illustrated in the data file example of Fig. 5.5.2.2(<span class="it"><i>a</i></span>)<figr id="fig5o5o2o2" loc="float"/>. Each data item or group of data items is preceded by an identifying keyword. Groups of related data items are organized into data categories. Two categories, <span class="scp"><span class="2">CELL</span></span> and <span class="scp"><span class="2">ENTITY_POLY_SEQ</span></span>, are shown in the example. <span class="scp"><span class="2">CELL</span></span> contains an individual instance describing a single set of crystallographic cell constants. <span class="scp"><span class="2">ENTITY_POLY_SEQ</span></span> contains a <indexg><index id="gach5o5index00024" sort="loop" type="s" significance="standard"> <span class="ty"><tt>loop_</tt></span> (STAR File keyword)</index></indexg><span class="b"><b><span class="ty"><tt>loop_</tt></span></b></span> (<span class="it"><i>i.e.</i></span> table) of instances describing a polymer residue sequence. Essentially all mmCIF data are described as a set of tabular data structures.</p>
<p>Each mmCIF data item is defined in a data dictionary. Data definitions are given between save-frame delimiters (<span class="it"><i>i.e.</i></span> <span class="b"><b><span class="ty"><tt>save_</tt></span></b></span>); apart from this, the data definitions share the same simple syntax as used in data files. An example definition for a crystallographic cell constant is shown in Fig. 5.5.2.2(<span class="it"><i>b</i></span>)<figr id="fig5o5o2o2" loc="float"/>. Many features of the cell constant are described in this definition, including data type, range restrictions, units of expression, dependent quantities, related definitions, necessity and related precision estimate. Although not shown in this example, dictionary definitions can also include parent&#8211;child relationships that have important consequences in maintaining data consistency.</p>
<p>The attributes of each data definition are defined in the DDL dictionary. Fig. 5.5.2.2(<span class="it"><i>c</i></span>)<figr id="fig5o5o2o2" loc="float"/> shows example DDL definitions describing data types. DDL definitions have the same syntax as definitions used in the data dictionary. Because the attributes of the DDL are also used in DDL definitions, this metadata architecture is described as self-defining.</p>
<p><indexg><index id="gach5o5index00025" type="s" significance="standard">web sites<index id="gach5o5index00026" type="s" significance="standard">PDB software tools: <a class="linkclass" href="http://sw-tools.pdb.org/">http://sw-tools.pdb.org/</a>
</index></index></indexg>The RCSB PDB distributes parsing tools that support all three levels of this metadata architecture (<a class="linkclass" href="http://sw-tools.pdb.org/">http://sw-tools.pdb.org/</a>
). The <indexg><index id="gach5o5index00027" sort="cifparseobj" type="s" significance="standard"><span class="it"><i>CIFPARSE-OBJ</i></span></index></indexg><indexg><index id="gach5o5index00028" type="s" significance="standard">computer programs<index id="gach5o5index00029" type="s" significance="standard"><span class="it"><i>CIFPARSE-OBJ</i></span></index></index></indexg><span class="it"><i>CIFPARSE_OBJ</i></span> package (Tosic &amp; Westbrook, 2000<bbr id="bb9"/>) provides high-level methods to read, write, validate and manage data from data files, dictionaries and DDLs. Data files can be validated relative to an input data dictionary, and dictionary files can be validated relative to an input DDL. <span class="it"><i>CIFPARSE_OBJ</i></span> stores information in a collection of table objects. Access <indexg><index id="gach5o5index00030" type="s" significance="standard">methods</index></indexg>methods are provided to search and manipulate the table <indexg><index id="gach5o5index00031" type="s" significance="standard">object-oriented CIF libraries</index></indexg>objects. A companion package, <span class="it"><i>CIFOBJ</i></span> (Schirripa &amp; Westbrook, 1996<bbr id="bb8"/>), provides an alternative representation of dictionary and DDL data. <indexg><index id="gach5o5index00032" sort="cifobj" type="s" significance="standard"><span class="it"><i>CIFOBJ</i></span></index></indexg><indexg><index id="gach5o5index00033" type="s" significance="standard">computer programs<index id="gach5o5index00034" type="s" significance="standard"><span class="it"><i>CIFOBJ</i></span></index></index></indexg><span class="it"><i>CIFOBJ</i></span> organizes dictionary information into a collection of category and item-level objects. Access methods are provided for all dictionary attributes.</p>
</div>

<div id="divsec5o5o2o3" class="sec2" secnum="5.5.2.3" fpage="541" lpage="541">
<div class="sectionheaders">
<h4 class="sectionheaders"><a name="sec5o5o2o3"><tree level="2"/></a>5.5.2.3. Supporting other data formats and data delivery methods</h4>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o2o3.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o2o3" secnum="5.5.2.3">Supporting other data formats and data delivery methods</st>
<p>One of the greatest benefits of a dictionary-based informatics infrastructure is the flexibility that it provides in supporting alternative data formats and delivery methods. Because the data and all of their defining attributes are electronically encoded, translation between data and dictionary formats can be achieved using light-weight software filters without loss of any information.</p>
<p><indexg><index id="gach5o5index00035" type="s" significance="standard">XML<index id="gach5o5index00036" type="s" significance="standard">mmCIF schema</index></index></indexg>XML provides a particularly good example of the ease with which data can be converted to and from the mmCIF format. XML translations of mmCIF data files are currently provided on the RCSB PDB beta ftp site (<indexg><index id="gach5o5index00037" type="s" significance="standard">web sites<index id="gach5o5index00038" type="s" significance="standard">Research Collaboratory for Structural Bioinformatics data standardisation: <a class="linkclass" href="ftp://beta.rcsb.org/">ftp://beta.rcsb.org/</a>
</index></index></indexg><a class="linkclass" href="ftp://beta.rcsb.org/pub/pdb/uniformity/data/XML/">ftp://beta.rcsb.org/pub/pdb/uniformity/data/XML/</a>
). These XML files use mmCIF dictionary data-item names as XML tags. These files were created by a translation tool (<a class="linkclass" href="http://sw-tools.pdb.org/apps/MMCIF-XML-UTIL/">http://sw-tools.pdb.org/apps/MMCIF-XML-UTIL/</a>
) that translates mmCIF data files to XML in compliance with an XML schema. The XML schema is similarly software-translated from the PDB exchange data dictionary.</p>
<p>Other delivery methods such as <indexg><index id="gach5o5index00039" type="s" significance="standard">Corba</index></indexg>Corba (<indexg><index id="gach5o5index00040" type="s" significance="standard">web sites<index id="gach5o5index00041" type="s" significance="standard">Object Management Group: <a class="linkclass" href="http://www.omg.org/">http://www.omg.org/</a>
</index></index></indexg><a class="linkclass" href="http://www.omg.org/cgi-bin/doc?lifesci/00-02-02">http://www.omg.org/cgi-bin/doc?lifesci/00&#8211;02-02</a>
) do not require a data format, as data are exchanged using an application program interface (API). A Corba <indexg><index id="gach5o5index00042" type="s" significance="standard">API</index></indexg>API for macromolecular structure (Greer <span class="it"><i>et al.</i></span>, 2002<bbr id="bb6"/>) based on the content of the mmCIF data dictionary has been approved by the <indexg><index id="gach5o5index00043" type="s" significance="standard">Object Management Group</index></indexg>Object Management Group (OMG). Software tools supporting this Corba API <indexg><index id="gach5o5index00044" sort="openmms" type="s" significance="standard"><span class="it"><i>OpenMMS</i></span></index></indexg><indexg><index id="gach5o5index00045" type="s" significance="standard">computer programs<index id="gach5o5index00046" type="s" significance="standard"><span class="it"><i>OpenMMS</i></span></index></index></indexg>(<span class="it"><i>OpenMMS</i></span>, <indexg><index id="gach5o5index00047" type="s" significance="standard">web sites<index id="gach5o5index00048" type="s" significance="standard">OpenMMS software: <a class="linkclass" href="http://openmms.sdsc.ed">http://openmms.sdsc.ed</a>
</index></index></indexg><a class="linkclass" href="http://openmms.sdsc.edu">http://openmms.sdsc.edu</a>
, and <indexg><index id="gach5o5index00049" sort="film" type="s" significance="standard"><span class="it"><i>FILM</i></span></index></indexg><indexg><index id="gach5o5index00050" type="s" significance="standard">computer programs<index id="gach5o5index00051" type="s" significance="standard"><span class="it"><i>CCP4</i></span></index></index></indexg><span class="it"><i>FILM</i></span>, <a class="linkclass" href="http://sw-tools.pdb.org/apps/FILM">http://sw-tools.pdb.org/apps/FILM</a>
) take full advantage of the data dictionary in building the interface definitions and supporting server on which the API is based (see also Section <related volume="G" chnum="5.3" url="/Ga/ch5o3v0001/#sec5o3o8o2"><relchtitle>Syntactic utilities for CIF</relchtitle><relau>B. McMahon</relau></related>5.3.8.2<a href="/Ga/ch5o3v0001/#sec5o3o8o2"><img align="bottom" border="0" src="/graphics/greenarr.gif" alt="[link]"/></a>
).</p>
</div>
</div>

<div id="divsec5o5o3" class="sec1" secnum="5.5.3" fpage="541" lpage="543">
<div class="sectionheaders">
<h3 class="sectionheaders"><a name="sec5o5o3"><tree level="1"/></a>5.5.3. Integrated data-processing system: overview</h3>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o3.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o3" secnum="5.5.3">Integrated data-processing system: overview</st>
<p>The RCSB PDB data-processing system has been designed to take full advantage of the features of the mmCIF metadata framework. The AutoDep Input Tool <indexg><index id="gach5o5index00052" sort="adit" type="s" significance="standard"><span class="it"><i>ADIT</i></span></index></indexg><indexg><index id="gach5o5index00053" type="s" significance="standard">computer programs<index id="gach5o5index00054" type="s" significance="standard"><span class="it"><i>ADIT</i></span></index></index></indexg>(<span class="it"><i>ADIT</i></span>) is an integrated data-processing system developed to support deposition, data processing and annotation of three-dimensional macromolecular structure data.</p>
<p>This system, which is outlined in Fig. 5.5.3.1<figr id="fig5o5o3o1" loc="float"/>, accepts experimental and structural data from a user for deposition. Data are input in the form of data files or through a web-based form interface. The input data can be validated in a very basic sense for syntax compliance and internal consistency. Other computational <indexg><index id="gach5o5index00055" type="s" significance="standard">validation</index></indexg>validation can also be applied, including checking the input <indexg><index id="gach5o5index00056" type="s" significance="standard">structure checking</index></indexg>structure data against a variety of community standard geometrical criteria and comparing the input experimental data with the derived structure model. The suite of validation software used within <span class="it"><i>ADIT</i></span> is distributed separately (<a class="linkclass" href="http://sw-tools.pdb.org/apps/VAL/">http://sw-tools.pdb.org/apps/VAL/</a>
). All of this validation information is returned to the user as a collection of HTML reports.</p>
<figplace id="fig5o5o3o1"/>
<p><indexg><index id="gach5o5index00057" type="s" significance="standard">database<index id="gach5o5index00058" type="s" significance="standard">loader and support tools (mmCIF)</index></index></indexg>In addition to providing data-validation reports, <span class="it"><i>ADIT</i></span> also encodes data in archival data files and loads data into a relational database. The loading of data into the relational database is aided by an expert annotator. The <span class="it"><i>ADIT</i></span> system customizes its behaviour according to the user's requirements. One important distinction is between the behaviour of the interface provided for depositing data and that of the interface used for annotating the data. The depositor is focused only on data collection and provides the simplest possible presentation of the information to be input. The annotator sees the detail of all possible data items as well as the full functionality of the supporting data-processing software and database system.</p>
<p>Although the <span class="it"><i>ADIT</i></span> system was originally developed to support the centralized data deposition and annotation of macromolecular structure data, it is not limited to these particular applications. Because the architecture of the <span class="it"><i>ADIT</i></span> system derives the full scope of information to be processed from a data dictionary, the system can transparently provide data input and processing functionality for any content domain. This feature has been exploited in building a data-input tool for the <indexg><index id="gach5o5index00059" type="s" significance="standard">BioSync</index></indexg>BioSync project (Kuller <span class="it"><i>et al.</i></span>, 2002<bbr id="bb7"/>). The <span class="it"><i>ADIT</i></span> system can also be configured in workstation mode to provide single-user data collection and processing functionality. This version of the <span class="it"><i>ADIT</i></span> system as well as the supporting mmCIF parsing and data-management tools are currently distributed by the RCSB PDB under an open-source licence (<a class="linkclass" href="http://sw-tools.pdb.org/apps/ADIT">http://sw-tools.pdb.org/apps/ADIT</a>
).</p>

<div id="divsec5o5o3o1" class="sec2" secnum="5.5.3.1" fpage="541" lpage="542">
<div class="sectionheaders">
<h4 class="sectionheaders"><a name="sec5o5o3o1"><tree level="2"/></a>5.5.3.1. <span class="it"><i>ADIT</i></span>: functional description </h4>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o3o1.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o3o1" secnum="5.5.3.1"><span class="it"><i>ADIT</i></span>: functional description </st>
<p>The basic functions of the <span class="it"><i>ADIT</i></span> deposition system are shown in Fig. 5.5.3.2<figr id="fig5o5o3o2" loc="float"/>. Users interact with the <span class="it"><i>ADIT</i></span> system through a web server. The <indexg><index id="gach5o5index00060" type="s" significance="standard">CGI (Common Gateway Interface)</index></indexg>CGI components of the <span class="it"><i>ADIT</i></span> system (that is, functional software components interacting with web input data through the Common Gateway Interface protocol) dynamically build the HTML that provides the system user interface. These CGI components are currently implemented as compiled binaries from <indexg><index id="gach5o5index00061" type="s" significance="standard">C++</index></indexg>C++ source code.</p>
<figplace id="fig5o5o3o2"/>
<p>User data can be provided in the form of data files or as keyboard input. Input files can be accepted in a variety of formats. <span class="it"><i>ADIT</i></span> uses a collection of format filters to convert input data to the data specification defined in a persistent data dictionary. Data in the form of data files are typically loaded first. Any input data that are not included in uploaded files can be keyed in by the user. <span class="it"><i>ADIT</i></span> builds a set of HTML forms for each category of data to be input. At any point during an input session, a user may choose to view or deposit the input data. Users who are depositing data may also use the data-validation services through the <span class="it"><i>ADIT</i></span> interface.</p>
<p>Comprehensive data ontologies like the PDB exchange dictionary contain vast numbers of data definitions. A data-input application may only need to access a small fraction of these definitions at any point. To address the problem of selecting only the relevant set of input data items from a data dictionary <span class="it"><i>ADIT</i></span> uses a view database. In addition to defining the <indexg><index id="gach5o5index00062" type="s" significance="standard">scope</index></indexg>scope of the data items to be edited by the <span class="it"><i>ADIT</i></span> application, an <span class="it"><i>ADIT</i></span> data view also stores presentation details that are used in building the HTML input forms. An important use of the data view is to provide a simple and intuitive presentation of information for novice users which disguises the complex details of a data dictionary.</p>
<p>Fig. 5.5.3.3<figr id="fig5o5o3o3" loc="float"/> shows an example <indexg><index id="gach5o5index00063" sort="adit" type="s" significance="standard"><span class="it"><i>ADIT</i></span></index></indexg><indexg><index id="gach5o5index00064" type="s" significance="standard">computer programs<index id="gach5o5index00065" type="s" significance="standard"><span class="it"><i>ADIT</i></span></index></index></indexg><span class="it"><i>ADIT</i></span> editing screen for the crystallographic unit cell. The data dictionary category containing this information is named <span class="scp"><span class="2">CELL</span></span>, and the length of the first cell axis is defined in the dictionary as <span class="b"><b><span class="ty"><tt>_cell.length_a</tt></span></b></span> (Fig. 5.5.2.2<span class="it"><i>b</i></span><figr id="fig5o5o2o2" loc="float"/>). In this case, the data view has substituted <span class="it"><i>Unit Cell</i></span> and <span class="it"><i>Length a</i></span> for the dictionary data names. Although this example is simple, some dictionary data names are as long as 75 characters, and in these instances the ability to display a simpler name is essential.</p>
<figplace id="fig5o5o3o3"/>
<p>Precise dictionary definitions and examples obtained from the data dictionary are accessible from the <span class="it"><i>ADIT</i></span> interface through buttons next to each data item. <span class="it"><i>ADIT</i></span> makes full use of the dictionary specification in data-input operations. Data items defined to assume only specific values have pulldown menus or selection boxes. Data type and range restrictions are checked when data are input and diagnostics are displayed to the user if errors are detected.</p>
<p>For performance reasons, the data dictionary is converted from its tabular text structure to an object representation using <indexg><index id="gach5o5index00066" sort="cifobj" type="s" significance="standard"><span class="it"><i>CIFOBJ</i></span></index></indexg><indexg><index id="gach5o5index00067" type="s" significance="standard">computer programs<index id="gach5o5index00068" type="s" significance="standard"><span class="it"><i>CIFOBJ</i></span></index></index></indexg><span class="it"><i>CIFOBJ</i></span>. The class supporting the object representation provides efficient access functions to all of the data dictionary attributes. A dictionary loader is used to check the consistency of the data dictionary and to load the object representation from the text form of the data dictionary.</p>
<p>Any dictionary that complies with the dictionary description language (<indexg><index id="gach5o5index00069" type="s" significance="standard">DDL2</index></indexg>DDL2) can be loaded and used by <span class="it"><i>ADIT</i></span>. All <span class="it"><i>ADIT</i></span> software components gain their knowledge of the input data from the data dictionary and any associated data views. Consequently, <span class="it"><i>ADIT</i></span> can be tailored for use in virtually any data-input and data-processing application.</p>
</div>

<div id="divsec5o5o3o2" class="sec2" secnum="5.5.3.2" fpage="542" lpage="542">
<div class="sectionheaders">
<h4 class="sectionheaders"><a name="sec5o5o3o2"><tree level="2"/></a>5.5.3.2. Generalized database support</h4>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o3o2.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o3o2" secnum="5.5.3.2">Generalized database support</st>
<p>In addition to the data editing and processing functions, <span class="it"><i>ADIT</i></span> also supports a versatile database loader <indexg><index id="gach5o5index00070" sort="mmcifloader" type="s" significance="standard"><span class="it"><i>mmCIF Loader</i></span></index></indexg><indexg><index id="gach5o5index00071" type="s" significance="standard">computer programs<index id="gach5o5index00072" type="s" significance="standard"><span class="it"><i>mmCIF Loader</i></span></index></index></indexg>(<span class="it"><i>mmCIF Loader</i></span>; <a class="linkclass" href="http://sw-tools.pdb.org/apps/MMCIF-LOADER">http://sw-tools.pdb.org/apps/MMCIF-LOADER</a>
) that builds data&#173;base schemata and extracts the processed data required to load database instances. The relation of the database loader to the central components of the <span class="it"><i>ADIT</i></span> system is shown in Fig. 5.5.3.4<figr id="fig5o5o3o4" loc="float"/>.</p>
<figplace id="fig5o5o3o4"/>
<p>Schemata are defined in a <indexg><index id="gach5o5index00073" type="s" significance="standard">metadata</index></indexg>metadata repository that is accessed by the loader application. In the simplest case, a schema can be constructed that is modelled directly from the data dictionary. Since the data model underlying the dictionary description language used to build <indexg><index id="gach5o5index00074" sort="adit" type="s" significance="standard"><span class="it"><i>ADIT</i></span></index></indexg><indexg><index id="gach5o5index00075" type="s" significance="standard">computer programs<index id="gach5o5index00076" type="s" significance="standard"><span class="it"><i>ADIT</i></span></index></index></indexg><span class="it"><i>ADIT</i></span> data dictionaries is essentially relational, mapping a data dictionary specification to a relational schema is straightforward.</p>
<p>In other cases, a mapping is required between the target schema and the data dictionary specification. This mapping is encoded in the schema metadata repository. The database loader uses this mapping information to extract items from data files and translate these data into a form that can be loaded into the target database schema. The definition of the mapping operation can include: selection operations with equijoin constraints (<span class="it"><i>e.g.</i></span> the value of <span class="b"><b><span class="ty"><tt>_entity.type</tt></span></b></span> where <span class="b"><b><span class="ty"><tt>_entity.id</tt></span></b></span> = 1), aggregation (<span class="it"><i>e.g.</i></span> count, sum, average), collapse (<span class="it"><i>e.g.</i></span> vector to string), type conversions and existence tests.</p>
<p>Schema definitions are converted by the database loader into <indexg><index id="gach5o5index00077" type="s" significance="standard">SQL</index></indexg>SQL instructions that create the defined tables and indices. Loadable data are produced either as SQL insert/update instructions or in the more efficient table copy formats used by popular database engines (<span class="it"><i>i.e.</i></span> DB2, Sybase, Oracle and MySQL). Loadable data can also be produced in <indexg><index id="gach5o5index00078" type="s" significance="standard">XML<index id="gach5o5index00079" type="s" significance="standard">mmCIF schema</index></index></indexg>XML.</p>
</div>

<div id="divsec5o5o3o3" class="sec2" secnum="5.5.3.3" fpage="542" lpage="543">
<div class="sectionheaders">
<h4 class="sectionheaders"><a name="sec5o5o3o3"><tree level="2"/></a>5.5.3.3. Building a structure-determination data pipeline</h4>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o3o3.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o3o3" secnum="5.5.3.3">Building a structure-determination data pipeline</st>
<p>One goal of high-throughput <indexg><index id="gach5o5index00080" type="s" significance="standard">structural genomics</index></indexg>structural genomics is the automatic capture of all the details of each step in the process of structure determination. Fig. 5.5.3.5<figr id="fig5o5o3o5" loc="float"/> shows a simplified structure-determination data pipeline. The essential details of each pipeline step are extracted and later assembled to make a data file for PDB deposition. The RCSB PDB data-processing infrastructure has been developed in anticipation of a data pipeline in which automated deposition would be the terminal step. The dictionary technology and software tools developed by the RCSB PDB to process and manage mmCIF data can be reused to provide the data-handling operations required to build the pipeline.</p>
<figplace id="fig5o5o3o5"/>
<p>Dictionary definitions have been carefully developed to describe the details of each step in the structure-determination pipeline. These data items are typically accessible in electronic form after each program step. The information is either exported directly in mmCIF format or is printed in a program output file. To deal with the latter case, a utility program, <indexg><index id="gach5o5index00081" sort="pdbextract" type="s" significance="standard"><span class="it"><i>PDB_EXTRACT</i></span></index></indexg><indexg><index id="gach5o5index00082" type="s" significance="standard">computer programs<index id="gach5o5index00083" type="s" significance="standard"><span class="it"><i>PDB_EXTRACT</i></span></index></index></indexg><span class="it"><i>PDB_EXTRACT</i></span> (<a class="linkclass" href="http://sw-tools.pdb.org/apps/PDB_EXTRACT">http://sw-tools.pdb.org/apps/PDB_EXTRACT</a>
), has been developed to parse program output files and extract key data values. In either case, the results of this incremental extraction of data from each program step must be merged to build a complete mmCIF data file ready for deposition. The <indexg><index id="gach5o5index00084" sort="pdbextract" type="s" significance="standard"><span class="it"><i>PDB_EXTRACT</i></span></index></indexg><indexg><index id="gach5o5index00085" type="s" significance="standard">computer programs<index id="gach5o5index00086" type="s" significance="standard"><span class="it"><i>PDB_EXTRACT</i></span></index></index></indexg><span class="it"><i>PDB_EXTRACT</i></span> program also carrys out this merging operation.</p>
<p>Some steps in the structure-determination pipeline may not be driven by software. For instance, the details of protein production may be held in laboratory databases or within laboratory notebooks. A version of <span class="it"><i>ADIT</i></span> with a data view including all of the structural genomics data extensions has been created for entering these data. This <indexg><index id="gach5o5index00087" sort="adit" type="s" significance="standard"><span class="it"><i>ADIT</i></span></index></indexg><indexg><index id="gach5o5index00088" type="s" significance="standard">computer programs<index id="gach5o5index00089" type="s" significance="standard"><span class="it"><i>ADIT</i></span></index></index></indexg><span class="it"><i>ADIT</i></span> tool can also be used to <indexg><index id="gach5o5index00090" type="s" significance="standard">validation</index></indexg>validate and check the completeness of the final data file.</p>
</div>
</div>

<div id="divsec5o5o4" class="sec1" secnum="5.5.4" fpage="543" lpage="543">
<div class="sectionheaders">
<h3 class="sectionheaders"><a name="sec5o5o4"><tree level="1"/></a>5.5.4. Access</h3>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/sec5o5o4.pdf">pdf</a> |</span>
</div>
<st secid="sec5o5o4" secnum="5.5.4">Access</st>
<p>All of the software tools and libraries described in this chapter are distributed with full source under an open-source licence. Applications are also distributed in binary form for Intel/Linux, Sun/Solaris, SGI/IRIX and Dec Alpha platforms. Several are also included on the CD-ROM accompanying this volume.</p>
<p><figwrap id="fig5o5o2o1" fpage="539" lpage="539">
<div class="fig">
<table summary="Figure 5.5.2.1" bgcolor="#CCFFCC" border="0" cellpadding="2" width="98%" style="margin-left: auto; margin-right: auto; border: 1px solid green;">
<tbody>
<tr>
<td align="center" width="20%" style="border:solid 1px #000;">
<a class="linkclass" href="/Ga/ch5o5v0001/fig5o5o2o1/"><img src="/figures/Gafig5o5o2o1thm.gif" align="middle" alt="[Figure 5.5.2.1]"/>
<br/></a>
</td>
<td style="border:solid 1px #000;">
<p><span class="size3"><b><a name="fig5o5o2o1">Figure 5.5.2.1</a></b></span>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/fig5o5o2o1.pdf">pdf</a> |</span></p><p>Excerpt of records from a PDB data file.</p>
</td>
</tr>
</tbody>
</table>
</div>
<caption><p>Excerpt of records from a PDB data file.</p></caption>
<short-figcaption><p>Excerpt of records from a PDB data file</p></short-figcaption>
</figwrap>
<figwrap id="fig5o5o2o2" fpage="540" lpage="540">
<div class="fig">
<table summary="Figure 5.5.2.2" bgcolor="#CCFFCC" border="0" cellpadding="2" width="98%" style="margin-left: auto; margin-right: auto; border: 1px solid green;">
<tbody>
<tr>
<td align="center" width="20%" style="border:solid 1px #000;">
<a class="linkclass" href="/Ga/ch5o5v0001/fig5o5o2o2/"><img src="/figures/Gafig5o5o2o2thm.gif" align="middle" alt="[Figure 5.5.2.2]"/>
<br/></a>
</td>
<td style="border:solid 1px #000;">
<p><span class="size3"><b><a name="fig5o5o2o2">Figure 5.5.2.2</a></b></span>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/fig5o5o2o2.pdf">pdf</a> |</span></p><p>Files at different levels of the mmCIF metadata architecture. (<span class="it"><i>a</i></span>) mmCIF data file excerpt. (<span class="it"><i>b</i></span>) Example mmCIF data dictionary definition. (<span class="it"><i>c</i></span>) Example DDL dictionary attribute definition.</p>
</td>
</tr>
</tbody>
</table>
</div>
<caption><p>Files at different levels of the mmCIF metadata architecture. (<span class="it"><i>a</i></span>) mmCIF data file excerpt. (<span class="it"><i>b</i></span>) Example mmCIF data dictionary definition. (<span class="it"><i>c</i></span>) Example DDL dictionary attribute definition.</p></caption>
<short-figcaption><p>Files at different levels of the mmCIF metadata architecture</p></short-figcaption>
</figwrap>
<figwrap id="fig5o5o3o1" fpage="541" lpage="541">
<div class="fig">
<table summary="Figure 5.5.3.1" bgcolor="#CCFFCC" border="0" cellpadding="2" width="98%" style="margin-left: auto; margin-right: auto; border: 1px solid green;">
<tbody>
<tr>
<td align="center" width="20%" style="border:solid 1px #000;">
<a class="linkclass" href="/Ga/ch5o5v0001/fig5o5o3o1/"><img src="/figures/Gafig5o5o3o1thm.gif" align="middle" alt="[Figure 5.5.3.1]"/>
<br/></a>
</td>
<td style="border:solid 1px #000;">
<p><span class="size3"><b><a name="fig5o5o3o1">Figure 5.5.3.1</a></b></span>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/fig5o5o3o1.pdf">pdf</a> |</span></p><p>Functional diagram of the <span class="it"><i>ADIT</i></span> system.</p>
</td>
</tr>
</tbody>
</table>
</div>
<caption><p>Functional diagram of the <span class="it"><i>ADIT</i></span> system.</p></caption>
<short-figcaption><p>Functional diagram of the <span class="it"><i>ADIT</i></span> system</p></short-figcaption>
</figwrap>
<figwrap id="fig5o5o3o2" fpage="541" lpage="541">
<div class="fig">
<table summary="Figure 5.5.3.2" bgcolor="#CCFFCC" border="0" cellpadding="2" width="98%" style="margin-left: auto; margin-right: auto; border: 1px solid green;">
<tbody>
<tr>
<td align="center" width="20%" style="border:solid 1px #000;">
<a class="linkclass" href="/Ga/ch5o5v0001/fig5o5o3o2/"><img src="/figures/Gafig5o5o3o2thm.gif" align="middle" alt="[Figure 5.5.3.2]"/>
<br/></a>
</td>
<td style="border:solid 1px #000;">
<p><span class="size3"><b><a name="fig5o5o3o2">Figure 5.5.3.2</a></b></span>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/fig5o5o3o2.pdf">pdf</a> |</span></p><p>Schematic diagram of <span class="it"><i>ADIT</i></span> editing, format translation and validation functions.</p>
</td>
</tr>
</tbody>
</table>
</div>
<caption><p>Schematic diagram of <span class="it"><i>ADIT</i></span> editing, format translation and validation functions.</p></caption>
<short-figcaption><p>Schematic diagram of <span class="it"><i>ADIT</i></span> editing, format translation and validation functions</p></short-figcaption>
</figwrap>
<figwrap id="fig5o5o3o3" fpage="542" lpage="542">
<div class="fig">
<table summary="Figure 5.5.3.3" bgcolor="#CCFFCC" border="0" cellpadding="2" width="98%" style="margin-left: auto; margin-right: auto; border: 1px solid green;">
<tbody>
<tr>
<td align="center" width="20%" style="border:solid 1px #000;">
<a class="linkclass" href="/Ga/ch5o5v0001/fig5o5o3o3/"><img src="/figures/Gafig5o5o3o3thm.gif" align="middle" alt="[Figure 5.5.3.3]"/>
<br/></a>
</td>
<td style="border:solid 1px #000;">
<p><span class="size3"><b><a name="fig5o5o3o3">Figure 5.5.3.3</a></b></span>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/fig5o5o3o3.pdf">pdf</a> |</span></p><p>Example <span class="it"><i>ADIT</i></span> data-input screen.</p>
</td>
</tr>
</tbody>
</table>
</div>
<caption><p>Example <span class="it"><i>ADIT</i></span> data-input screen.</p></caption>
<short-figcaption><p>Example <span class="it"><i>ADIT</i></span> data-input screen</p></short-figcaption>
</figwrap>
<figwrap id="fig5o5o3o4" fpage="542" lpage="542">
<div class="fig">
<table summary="Figure 5.5.3.4" bgcolor="#CCFFCC" border="0" cellpadding="2" width="98%" style="margin-left: auto; margin-right: auto; border: 1px solid green;">
<tbody>
<tr>
<td align="center" width="20%" style="border:solid 1px #000;">
<a class="linkclass" href="/Ga/ch5o5v0001/fig5o5o3o4/"><img src="/figures/Gafig5o5o3o4thm.gif" align="middle" alt="[Figure 5.5.3.4]"/>
<br/></a>
</td>
<td style="border:solid 1px #000;">
<p><span class="size3"><b><a name="fig5o5o3o4">Figure 5.5.3.4</a></b></span>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/fig5o5o3o4.pdf">pdf</a> |</span></p><p>Schematic diagram of <span class="it"><i>ADIT</i></span> database loading functions.</p>
</td>
</tr>
</tbody>
</table>
</div>
<caption><p>Schematic diagram of <span class="it"><i>ADIT</i></span> database loading functions.</p></caption>
<short-figcaption><p>Schematic diagram of <span class="it"><i>ADIT</i></span> database loading functions</p></short-figcaption>
</figwrap>
<figwrap id="fig5o5o3o5" fpage="543" lpage="543">
<div class="fig">
<table summary="Figure 5.5.3.5" bgcolor="#CCFFCC" border="0" cellpadding="2" width="98%" style="margin-left: auto; margin-right: auto; border: 1px solid green;">
<tbody>
<tr>
<td align="center" width="20%" style="border:solid 1px #000;">
<a class="linkclass" href="/Ga/ch5o5v0001/fig5o5o3o5/"><img src="/figures/Gafig5o5o3o5thm.gif" align="middle" alt="[Figure 5.5.3.5]"/>
<br/></a>
</td>
<td style="border:solid 1px #000;">
<p><span class="size3"><b><a name="fig5o5o3o5">Figure 5.5.3.5</a></b></span>
<span class="navlinks"><span class="topnavlinks">| <a class="navlinks" href="#top">top</a></span> | <a class="navlinks" href="/Ga/ch5o5v0001/fig5o5o3o5.pdf">pdf</a> |</span></p><p>Schematic diagram of a structure-determination data pipeline.</p>
</td>
</tr>
</tbody>
</table>
</div>
<caption><p>Schematic diagram of a structure-determination data pipeline.</p></caption>
<short-figcaption><p>Schematic diagram of a structure-determination data pipeline</p></short-figcaption>
</figwrap>
</p>
</div>
</subch></bdy>
<bm>
<ack>

<h3>Acknowledgements</h3><p>The RCSB/PDB is operated by Rutgers, The State University of New Jersey; the San Diego Supercomputer Center at the University of California, San Diego; and the Center for Advanced Research in Biotechnology of the National Institute of Standards and Technology. RCSB/PDB is supported by funds from the National Science Foundation (NSF), the National Institute of General Medical Sciences (NIGMS), the Department of Energy (DOE), the National Library of Medicine (NLM), the National Cancer Institute (NCI), the National Center for Research Resources (NCRR), the National Institute of Biomedical Imaging and Bioengineering (NIBIB), and the National Institute of Neurological Disorders and Stroke (NINDS).</p>
</ack>
<bibl>
<bb id="bb1"><bbau>Berman, H. M.</bbau>, <bbau>Henrick, K.</bbau> &amp; <bbau>Nakamura, H.</bbau> (2003). <span class="it"><i>Announcing the worldwide Protein Data Bank.</i></span> <span class="it"><i>Nature Struct. Biol.</i></span> <span class="b"><b>10</b></span>, 980.</bb><bb id="bb2"><bbau>Berman, H. M.</bbau>, <bbau>Westbrook, J.</bbau>, <bbau>Feng, Z.</bbau>, <bbau>Gilliland, G.</bbau>, <bbau>Bhat, T. N.</bbau>, <bbau>Weissig, H.</bbau>, <bbau>Shindyalov, I. N.</bbau> &amp; <bbau>Bourne, P. E.</bbau> (2000). <span class="it"><i>The Protein Data Bank.</i></span> <span class="it"><i>Nucleic Acids Res.</i></span> <span class="b"><b>28</b></span>, 235&#8211;242.</bb><bb id="bb3"><bbau>Bernstein, F. C.</bbau>, <bbau>Koetzle, T. F.</bbau>, <bbau>Williams, G. J. B.</bbau>, <bbau>Meyer, E. F. Jr</bbau>, <bbau>Brice, M. D.</bbau>, <bbau>Rodgers, J. R.</bbau>, <bbau>Kennard, O.</bbau>, <bbau>Shimanouchi, T.</bbau> &amp; <bbau>Tasumi, M.</bbau> (1977). <span class="it"><i>The Protein Data Bank: a computer-based archival file for macromolecular structures.</i></span> <span class="it"><i>J. Mol. Biol.</i></span> <span class="b"><b>112</b></span>, 535&#8211;542. </bb><bb id="bb4"><bbau>Bourne, P. E.</bbau>, <bbau>Berman, H. M.</bbau>, <bbau>McMahon, B.</bbau>, <bbau>Watenpaugh, K. D.</bbau>, <bbau>Westbrook, J. D.</bbau> &amp; <bbau>Fitzgerald, P. M. D.</bbau> (1997). <span class="it"><i>Macromolecular Crystallographic Information File.</i></span> <span class="it"><i>Methods Enzymol.</i></span> <span class="b"><b>277</b></span>, 571&#8211;590.</bb><bb id="bb5"> <bbau>Callaway, J.</bbau>, <bbau>Cummings, M.</bbau>, <bbau>Deroski, B.</bbau>, <bbau>Esposito, P.</bbau>, <bbau>Forman, A.</bbau>, <bbau>Langdon, P.</bbau>, <bbau>Libeson, M.</bbau>, <bbau>McCarthy, J.</bbau>, <bbau>Sikora, J.</bbau>, <bbau>Xue, D.</bbau>, <bbau>Abola, E.</bbau>, <bbau>Bernstein, F.</bbau>, <bbau>Manning, N.</bbau>, <bbau>Shea, R.</bbau>, <bbau>Stampf, D.</bbau> &amp; <bbau>Sussman, J.</bbau> (1996). <span class="it"><i>Protein Data Bank contents guide: Atomic coordinate entry format description.</i></span> Brookhaven National Laboratory, New York, USA. Available from <a class="linkclass" href="http://www.rcsb.org/pdb/docs/format/pdbguide2.2/part_0.html">http://www.rcsb.org/pdb/docs/format/pdbguide2.2/part_0.html</a>
.</bb><bb id="bb6"><bbau>Greer, D. S.</bbau>, <bbau>Westbrook, J. D.</bbau> &amp; <bbau>Bourne, P. E.</bbau> (2002). <span class="it"><i>An ontology driven architecture for derived representations of macromolecular structure.</i></span> <span class="it"><i>Bioinformatics</i></span>, <span class="b"><b>18</b></span>, 1280&#8211;1281.</bb><bb id="bb7"><bbau>Kuller, A.</bbau>, <bbau>Fleri, W.</bbau>, <bbau>Bluhm, W. F.</bbau>, <bbau>Smith, J. L.</bbau>, <bbau>Westbrook, J.</bbau> &amp; <bbau>Bourne, P. E.</bbau> (2002). <span class="it"><i>A biologist's guide to synchrotron facilities: the BioSync web resource.</i></span> <span class="it"><i>Trends Biochem. Sci.</i></span> <span class="b"><b>27</b></span>, 213&#8211;215.</bb><bb id="bb8"><bbau>Schirripa, S.</bbau> &amp; <bbau>Westbrook, J. D.</bbau> (1996). <span class="it"><i>CIFOBJ. A class library of mmCIF access tools.</i></span> Reference guide. <a class="linkclass" href="http://sw-tools.pdb.org/apps/CIFOBJ/cifobj/index.html">http://sw-tools.pdb.org/apps/CIFOBJ/cifobj/index.html</a>
.</bb><bb id="bb9"><bbau>Tosic, O.</bbau> &amp; <bbau>Westbrook, J. D.</bbau> (2000). <span class="it"><i>CIFParse. A library of access tools for mmCIF.</i></span> Reference guide. <a class="linkclass" href="http://sw-tools.pdb.org/apps/CIFPARSE-OBJ/cifparse/index.html">http://sw-tools.pdb.org/apps/CIFPARSE-OBJ/cifparse/index.html</a>
.</bb><bb id="bb10"><bbau>Westbrook, J.</bbau> &amp; <bbau>Bourne, P. E.</bbau> (2000). <span class="it"><i>STAR/mmCIF: an ontology for macromolecular structure.</i></span> <span class="it"><i>Bioinformatics</i></span>, <span class="b"><b>16</b></span>, 159&#8211;168.</bb></bibl>
</bm>
<figsection>
<bigfig id="fig5o5o2o1" fignum="5.5.2.1">
<div class="chfigure"><table summary="Figure 5.5.2.1" border="1" bgcolor="#CCFFCC" width="100%">
<tbody>
<tr>
<td align="center">
<img src="/figures/Gafig5o5o2o1.gif" alt="[Figure 5.5.2.1]"/>
<br/>
</td>
</tr>
<tr>
<td>
<span class="size3"><b><a name="fig5o5o2o1">Figure 5.5.2.1</a></b></span>
<p>Excerpt of records from a PDB data file.</p></td>
</tr>
</tbody>
</table>
<br/>
</div>
</bigfig>
<bigfig id="fig5o5o2o2" fignum="5.5.2.2">
<div class="chfigure"><table summary="Figure 5.5.2.2" border="1" bgcolor="#CCFFCC" width="100%">
<tbody>
<tr>
<td align="center">
<img src="/figures/Gafig5o5o2o2.gif" alt="[Figure 5.5.2.2]"/>
<br/>
</td>
</tr>
<tr>
<td>
<span class="size3"><b><a name="fig5o5o2o2">Figure 5.5.2.2</a></b></span>
<p>Files at different levels of the mmCIF metadata architecture. (<span class="it"><i>a</i></span>) mmCIF data file excerpt. (<span class="it"><i>b</i></span>) Example mmCIF data dictionary definition. (<span class="it"><i>c</i></span>) Example DDL dictionary attribute definition.</p></td>
</tr>
</tbody>
</table>
<br/>
</div>
</bigfig>
<bigfig id="fig5o5o3o1" fignum="5.5.3.1">
<div class="chfigure"><table summary="Figure 5.5.3.1" border="1" bgcolor="#CCFFCC" width="100%">
<tbody>
<tr>
<td align="center">
<img src="/figures/Gafig5o5o3o1.gif" alt="[Figure 5.5.3.1]"/>
<br/>
</td>
</tr>
<tr>
<td>
<span class="size3"><b><a name="fig5o5o3o1">Figure 5.5.3.1</a></b></span>
<p>Functional diagram of the <span class="it"><i>ADIT</i></span> system.</p></td>
</tr>
</tbody>
</table>
<br/>
</div>
</bigfig>
<bigfig id="fig5o5o3o2" fignum="5.5.3.2">
<div class="chfigure"><table summary="Figure 5.5.3.2" border="1" bgcolor="#CCFFCC" width="100%">
<tbody>
<tr>
<td align="center">
<img src="/figures/Gafig5o5o3o2.gif" alt="[Figure 5.5.3.2]"/>
<br/>
</td>
</tr>
<tr>
<td>
<span class="size3"><b><a name="fig5o5o3o2">Figure 5.5.3.2</a></b></span>
<p>Schematic diagram of <span class="it"><i>ADIT</i></span> editing, format translation and validation functions.</p></td>
</tr>
</tbody>
</table>
<br/>
</div>
</bigfig>
<bigfig id="fig5o5o3o3" fignum="5.5.3.3">
<div class="chfigure"><table summary="Figure 5.5.3.3" border="1" bgcolor="#CCFFCC" width="100%">
<tbody>
<tr>
<td align="center">
<img src="/figures/Gafig5o5o3o3.gif" alt="[Figure 5.5.3.3]"/>
<br/>
</td>
</tr>
<tr>
<td>
<span class="size3"><b><a name="fig5o5o3o3">Figure 5.5.3.3</a></b></span>
<p>Example <span class="it"><i>ADIT</i></span> data-input screen.</p></td>
</tr>
</tbody>
</table>
<br/>
</div>
</bigfig>
<bigfig id="fig5o5o3o4" fignum="5.5.3.4">
<div class="chfigure"><table summary="Figure 5.5.3.4" border="1" bgcolor="#CCFFCC" width="100%">
<tbody>
<tr>
<td align="center">
<img src="/figures/Gafig5o5o3o4.gif" alt="[Figure 5.5.3.4]"/>
<br/>
</td>
</tr>
<tr>
<td>
<span class="size3"><b><a name="fig5o5o3o4">Figure 5.5.3.4</a></b></span>
<p>Schematic diagram of <span class="it"><i>ADIT</i></span> database loading functions.</p></td>
</tr>
</tbody>
</table>
<br/>
</div>
</bigfig>
<bigfig id="fig5o5o3o5" fignum="5.5.3.5">
<div class="chfigure"><table summary="Figure 5.5.3.5" border="1" bgcolor="#CCFFCC" width="100%">
<tbody>
<tr>
<td align="center">
<img src="/figures/Gafig5o5o3o5.gif" alt="[Figure 5.5.3.5]"/>
<br/>
</td>
</tr>
<tr>
<td>
<span class="size3"><b><a name="fig5o5o3o5">Figure 5.5.3.5</a></b></span>
<p>Schematic diagram of a structure-determination data pipeline.</p></td>
</tr>
</tbody>
</table>
<br/>
</div>
</bigfig>
</figsection>
<fnsection>
</fnsection>
<indexes>
   <entry number="4">
      <term level="1">
         <level1>
            <span class="it">
               <i>ADIT</i>
            </span>
         </level1>
         <link indexid="index00052" significance="standard" section="1" sort="adit" chnumo="5o5" id="gach5o5index00052" type="s" secido="5o5o3" volid="Ga" secid="5.5.3"/>
         <link indexid="index00063" significance="standard" section="1" chnumo="5o5" sort="adit" type="s" id="gach5o5index00063" secido="5o5o3o1" volid="Ga" secid="5.5.3.1"/>
         <link indexid="index00074" significance="standard" section="1" chnumo="5o5" sort="adit" type="s" id="gach5o5index00074" secido="5o5o3o2" volid="Ga" secid="5.5.3.2"/>
         <link indexid="index00087" significance="standard" section="1" chnumo="5o5" sort="adit" type="s" id="gach5o5index00087" secido="5o5o3o3" volid="Ga" secid="5.5.3.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>API</level1>
         <link indexid="index00042" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00042" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>BioSync</level1>
         <link indexid="index00059" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00059" secido="5o5o3" volid="Ga" secid="5.5.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>C++</level1>
         <link indexid="index00061" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00061" secido="5o5o3o1" volid="Ga" secid="5.5.3.1"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>CGI (Common Gateway Interface)</level1>
         <link indexid="index00060" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00060" secido="5o5o3o1" volid="Ga" secid="5.5.3.1"/>
      </term>
   </entry>
   <entry number="2">
      <term level="1">
         <level1>
            <span class="it">
               <i>CIFOBJ</i>
            </span>
         </level1>
         <link indexid="index00032" significance="standard" section="1" chnumo="5o5" sort="cifobj" type="s" id="gach5o5index00032" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
         <link indexid="index00066" significance="standard" section="1" chnumo="5o5" sort="cifobj" type="s" id="gach5o5index00066" secido="5o5o3o1" volid="Ga" secid="5.5.3.1"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>
            <span class="it">
               <i>CIFPARSE-OBJ</i>
            </span>
         </level1>
         <link indexid="index00027" significance="standard" section="1" chnumo="5o5" sort="cifparseobj" type="s" id="gach5o5index00027" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
   </entry>
   <entry number="12">
      <term level="1">
         <level1>computer programs</level1>
      </term>
      <term level="2">
         <index id="gach5o5index00054" significance="standard" type="s">
            <span class="it">
               <i>ADIT</i>
            </span>
         </index>
         <link indexid="index00054" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00054" secido="5o5o3" volid="Ga" secid="5.5.3"/>
         <link indexid="index00065" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00065" secido="5o5o3o1" volid="Ga" secid="5.5.3.1"/>
         <link indexid="index00076" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00076" secido="5o5o3o2" volid="Ga" secid="5.5.3.2"/>
         <link indexid="index00089" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00089" secido="5o5o3o3" volid="Ga" secid="5.5.3.3"/>
      </term>
      <term level="2">
         <index id="gach5o5index00051" significance="standard" type="s">
            <span class="it">
               <i>CCP4</i>
            </span>
         </index>
         <link indexid="index00051" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00051" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
      <term level="2">
         <index id="gach5o5index00034" significance="standard" type="s">
            <span class="it">
               <i>CIFOBJ</i>
            </span>
         </index>
         <link indexid="index00034" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00034" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
         <link indexid="index00068" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00068" secido="5o5o3o1" volid="Ga" secid="5.5.3.1"/>
      </term>
      <term level="2">
         <index id="gach5o5index00029" significance="standard" type="s">
            <span class="it">
               <i>CIFPARSE-OBJ</i>
            </span>
         </index>
         <link indexid="index00029" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00029" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
      <term level="2">
         <index id="gach5o5index00072" significance="standard" type="s">
            <span class="it">
               <i>mmCIF Loader</i>
            </span>
         </index>
         <link indexid="index00072" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00072" secido="5o5o3o2" volid="Ga" secid="5.5.3.2"/>
      </term>
      <term level="2">
         <index id="gach5o5index00046" significance="standard" type="s">
            <span class="it">
               <i>OpenMMS</i>
            </span>
         </index>
         <link indexid="index00046" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00046" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
      <term level="2">
         <index id="gach5o5index00083" significance="standard" type="s">
            <span class="it">
               <i>PDB_EXTRACT</i>
            </span>
         </index>
         <link indexid="index00083" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00083" secido="5o5o3o3" volid="Ga" secid="5.5.3.3"/>
         <link indexid="index00086" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00086" secido="5o5o3o3" volid="Ga" secid="5.5.3.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>Corba</level1>
         <link indexid="index00039" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00039" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>data exchange standards</level1>
      </term>
      <term level="2">
         <index id="gach5o5index00009" significance="standard" type="s">PDB</index>
         <link indexid="index00009" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00009" secido="5o5o2o1" volid="Ga" secid="5.5.2.1"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>database</level1>
      </term>
      <term level="2">
         <index id="gach5o5index00058" significance="standard" type="s">loader and support tools (mmCIF)</index>
         <link indexid="index00058" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00058" secido="5o5o3" volid="Ga" secid="5.5.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>DDL2</level1>
         <link indexid="index00069" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00069" secido="5o5o3o1" volid="Ga" secid="5.5.3.1"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>DDL</level1>
      </term>
      <term level="2">
         <index id="gach5o5index00023" significance="standard" type="s">role in mmCIF metadata architecture</index>
         <link indexid="index00023" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00023" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>
            <span class="it">
               <i>FILM</i>
            </span>
         </level1>
         <link indexid="index00049" significance="standard" section="1" chnumo="5o5" sort="film" type="s" id="gach5o5index00049" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>
            <span class="ty">
               <tt>loop_</tt>
            </span> (STAR File keyword)</level1>
         <link indexid="index00024" significance="standard" section="1" chnumo="5o5" sort="loop" type="s" id="gach5o5index00024" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
   </entry>
   <entry number="3">
      <term level="1">
         <level1>metadata</level1>
         <link indexid="index00016" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00016" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
         <link indexid="index00017" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00017" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
         <link indexid="index00073" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00073" secido="5o5o3o2" volid="Ga" secid="5.5.3.2"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>methods</level1>
         <link indexid="index00030" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00030" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>
            <span class="it">
               <i>mmCIF Loader</i>
            </span>
         </level1>
         <link indexid="index00070" significance="standard" section="1" chnumo="5o5" sort="mmcifloader" type="s" id="gach5o5index00070" secido="5o5o3o2" volid="Ga" secid="5.5.3.2"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>mmCIF</level1>
      </term>
      <term level="2">
         <index id="gach5o5index00003" significance="standard" type="s">use in PDB data management</index>
         <link indexid="index00003" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00003" secido="5o5o1" volid="Ga" secid="5.5.1"/>
      </term>
   </entry>
   <entry number="2">
      <term level="1">
         <level1>NMR structures</level1>
         <link indexid="index00013" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00013" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
         <link indexid="index00021" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00021" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>Object Management Group</level1>
         <link indexid="index00043" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00043" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>object-oriented CIF libraries</level1>
         <link indexid="index00031" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00031" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
   </entry>
   <entry number="3">
      <term level="1">
         <level1>ontology</level1>
         <link indexid="index00006" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00006" secido="5o5o1" volid="Ga" secid="5.5.1"/>
         <link indexid="index00007" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00007" secido="5o5o2" volid="Ga" secid="5.5.2"/>
         <link indexid="index00010" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00010" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>
            <span class="it">
               <i>OpenMMS</i>
            </span>
         </level1>
         <link indexid="index00044" significance="standard" section="1" chnumo="5o5" sort="openmms" type="s" id="gach5o5index00044" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>PDB exchange data dictionary</level1>
         <link indexid="index00004" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00004" secido="5o5o1" volid="Ga" secid="5.5.1"/>
      </term>
   </entry>
   <entry number="2">
      <term level="1">
         <level1>
            <span class="it">
               <i>PDB_EXTRACT</i>
            </span>
         </level1>
         <link indexid="index00081" significance="standard" section="1" chnumo="5o5" sort="pdbextract" type="s" id="gach5o5index00081" secido="5o5o3o3" volid="Ga" secid="5.5.3.3"/>
         <link indexid="index00084" significance="standard" section="1" chnumo="5o5" sort="pdbextract" type="s" id="gach5o5index00084" secido="5o5o3o3" volid="Ga" secid="5.5.3.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>Protein Data Bank</level1>
         <link indexid="index00001" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00001" secido="5o5o1" volid="Ga" secid="5.5.1"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>Research Collaboratory for Structural Bioinformatics</level1>
         <link indexid="index00005" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00005" secido="5o5o1" volid="Ga" secid="5.5.1"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>scope</level1>
         <link indexid="index00062" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00062" secido="5o5o3o1" volid="Ga" secid="5.5.3.1"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>SQL</level1>
         <link indexid="index00077" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00077" secido="5o5o3o2" volid="Ga" secid="5.5.3.2"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>STAR File</level1>
      </term>
      <term level="2">
         <index id="gach5o5index00012" significance="standard" type="s">basis for macromolecular structure ontology</index>
         <link indexid="index00012" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00012" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
   </entry>
   <entry number="2">
      <term level="1">
         <level1>structural genomics</level1>
         <link indexid="index00018" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00018" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
         <link indexid="index00080" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00080" secido="5o5o3o3" volid="Ga" secid="5.5.3.3"/>
      </term>
   </entry>
   <entry number="1">
      <term level="1">
         <level1>structure checking</level1>
         <link indexid="index00056" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00056" secido="5o5o3" volid="Ga" secid="5.5.3"/>
      </term>
   </entry>
   <entry number="2">
      <term level="1">
         <level1>validation</level1>
         <link indexid="index00055" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00055" secido="5o5o3" volid="Ga" secid="5.5.3"/>
         <link indexid="index00090" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00090" secido="5o5o3o3" volid="Ga" secid="5.5.3.3"/>
      </term>
   </entry>
   <entry number="6">
      <term level="1">
         <level1>web sites</level1>
      </term>
      <term level="2">
         <index id="gach5o5index00041" significance="standard" type="s">Object Management Group: <a class="linkclass" href="http://www.omg.org/">http://www.omg.org/</a>
         </index>
         <link indexid="index00041" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00041" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
      <term level="2">
         <index id="gach5o5index00048" significance="standard" type="s">OpenMMS software: <a class="linkclass" href="http://openmms.sdsc.ed">http://openmms.sdsc.ed</a>
         </index>
         <link indexid="index00048" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00048" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
      <term level="2">
         <index id="gach5o5index00015" significance="standard" type="s">PDB mmCIF site: <a class="linkclass" href="http://mmcif.pdb.org/">http://mmcif.pdb.org/</a>
         </index>
         <link indexid="index00015" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00015" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
         <link indexid="index00020" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00020" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
      <term level="2">
         <index id="gach5o5index00026" significance="standard" type="s">PDB software tools: <a class="linkclass" href="http://sw-tools.pdb.org/">http://sw-tools.pdb.org/</a>
         </index>
         <link indexid="index00026" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00026" secido="5o5o2o2" volid="Ga" secid="5.5.2.2"/>
      </term>
      <term level="2">
         <index id="gach5o5index00038" significance="standard" type="s">Research Collaboratory for Structural Bioinformatics data standardisation: <a class="linkclass" href="ftp://beta.rcsb.org/">ftp://beta.rcsb.org/</a>
         </index>
         <link indexid="index00038" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00038" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
      </term>
   </entry>
   <entry number="2">
      <term level="1">
         <level1>XML</level1>
      </term>
      <term level="2">
         <index id="gach5o5index00036" significance="standard" type="s">mmCIF schema</index>
         <link indexid="index00036" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00036" secido="5o5o2o3" volid="Ga" secid="5.5.2.3"/>
         <link indexid="index00079" significance="standard" section="1" chnumo="5o5" type="s" id="gach5o5index00079" secido="5o5o3o2" volid="Ga" secid="5.5.3.2"/>
      </term>
   </entry>
</indexes>
</wrap>