nih-gov/www.ncbi.nlm.nih.gov/books/n/handbook/ch13/index.html

701 lines
No EOL
79 KiB
HTML

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<!-- AppResources meta begin -->
<meta name="paf-app-resources" content="" />
<script type="text/javascript">var ncbi_startTime = new Date();</script>
<!-- AppResources meta end -->
<!-- TemplateResources meta begin -->
<meta name="paf_template" content="" />
<!-- TemplateResources meta end -->
<!-- Logger begin -->
<meta name="ncbi_db" content="books" /><meta name="ncbi_pdid" content="book-part" /><meta name="ncbi_acc" content="NBK21082" /><meta name="ncbi_domain" content="handbook" /><meta name="ncbi_report" content="record" /><meta name="ncbi_type" content="fulltext" /><meta name="ncbi_objectid" content="" /><meta name="ncbi_pcid" content="/NBK21082/" /><meta name="ncbi_pagename" content="The Processing of Biological Sequence Data at NCBI - The NCBI Handbook - NCBI Bookshelf" /><meta name="ncbi_bookparttype" content="chapter" /><meta name="ncbi_app" content="bookshelf" />
<!-- Logger end -->
<title>The Processing of Biological Sequence Data at NCBI - The NCBI Handbook - NCBI Bookshelf</title>
<!-- AppResources external_resources begin -->
<link rel="stylesheet" href="/core/jig/1.15.2/css/jig.min.css" /><script type="text/javascript" src="/core/jig/1.15.2/js/jig.min.js"></script>
<!-- AppResources external_resources end -->
<!-- Page meta begin -->
<meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX" /><meta name="citation_inbook_title" content="The NCBI Handbook [Internet]" /><meta name="citation_title" content="The Processing of Biological Sequence Data at NCBI" /><meta name="citation_publisher" content="National Center for Biotechnology Information (US)" /><meta name="citation_date" content="2006/03/14" /><meta name="citation_author" content="Karl Sirotkin" /><meta name="citation_author" content="Tatiana Tatusova" /><meta name="citation_author" content="Eugene Yaschenko" /><meta name="citation_author" content="Mark Cavanaugh" /><meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK21082/" /><link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" /><meta name="DC.Title" content="The Processing of Biological Sequence Data at NCBI" /><meta name="DC.Type" content="Text" /><meta name="DC.Publisher" content="National Center for Biotechnology Information (US)" /><meta name="DC.Contributor" content="Karl Sirotkin" /><meta name="DC.Contributor" content="Tatiana Tatusova" /><meta name="DC.Contributor" content="Eugene Yaschenko" /><meta name="DC.Contributor" content="Mark Cavanaugh" /><meta name="DC.Date" content="2006/03/14" /><meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK21082/" /><meta name="description" content="The biological sequence information that builds the foundation of NCBI's databases and curated resources comes from many sources. How are these data managed and processed once they reach NCBI? This chapter discusses the flow of sequence data, from the management of data submission to the generation of publicly available data products." /><meta name="og:title" content="The Processing of Biological Sequence Data at NCBI" /><meta name="og:type" content="book" /><meta name="og:description" content="The biological sequence information that builds the foundation of NCBI's databases and curated resources comes from many sources. How are these data managed and processed once they reach NCBI? This chapter discusses the flow of sequence data, from the management of data submission to the generation of publicly available data products." /><meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK21082/" /><meta name="og:site_name" content="NCBI Bookshelf" /><meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" /><meta name="twitter:card" content="summary" /><meta name="twitter:site" content="@ncbibooks" /><meta name="warning" content="This publication is provided for historical reference only and the information may be out of date." /><meta name="bk-non-canon-loc" content="/books/n/handbook/ch13/" /><link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK21082/" /><link rel="stylesheet" href="/corehtml/pmc/css/figpopup.css" type="text/css" media="screen" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_print.min.css" type="text/css" media="print" /><style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style><style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style><script type="text/javascript" src="/corehtml/pmc/js/jquery.hoverIntent.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/common.min.js?_=3.18"> </script><script type="text/javascript" src="/corehtml/pmc/js/large-obj-scrollbars.min.js"> </script><script type="text/javascript">window.name="mainwindow";</script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/book-toc.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/books.min.js"> </script><meta name="book-collection" content="NONE" />
<!-- Page meta end -->
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8D5BC07C814FC100000000010A00CD.m_13" />
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3985586/3808861/4121862/3974050/3917732/251717/4216701/14534/45193/4113719/3849091/3984811/3751656/4033350/3840896/3577051/3852958/4008682/4207974/4206132/4062871/12930/3964959/3854974/36029/4128070/9685/3549676/3609192/3609193/3609213/3395586.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3411343/3882866.css" media="print" /></head>
<body class="book-part">
<div class="grid">
<div class="col twelve_col nomargin shadow">
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
<div class="sysmessages">
<noscript>
<p class="nojs">
<strong>Warning:</strong>
The NCBI web site requires JavaScript to function.
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
</p>
</noscript>
</div>
<!--/.sysmessage-->
<div class="wrap">
<div class="page">
<div class="top">
<div id="universal_header">
<section class="usa-banner">
<div class="usa-accordion">
<header class="usa-banner-header">
<div class="usa-grid usa-banner-inner">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
<p>An official website of the United States government</p>
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
<span class="usa-banner-button-text">Here's how you know</span>
</button>
</div>
</header>
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
<div class="usa-banner-guidance-gov usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
<div class="usa-media_block-body">
<p>
<strong>The .gov means it's official.</strong>
<br />
Federal government websites often end in .gov or .mil. Before
sharing sensitive information, make sure you're on a federal
government site.
</p>
</div>
</div>
<div class="usa-banner-guidance-ssl usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
<div class="usa-media_block-body">
<p>
<strong>The site is secure.</strong>
<br />
The <strong>https://</strong> ensures that you are connecting to the
official website and that any information you provide is encrypted
and transmitted securely.
</p>
</div>
</div>
</div>
</div>
</section>
<div class="usa-overlay"></div>
<header class="ncbi-header" role="banner" data-section="Header">
<div class="usa-grid">
<div class="usa-width-one-whole">
<div class="ncbi-header__logo">
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
</a>
</div>
<div class="ncbi-header__account">
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
<span class="fa fa-user" aria-hidden="true">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
<g style="fill: #fff">
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
</g>
</svg>
</span>
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
<span class="sr-only">Show account info</span>
</button>
</div>
<div class="ncbi-popup-anchor">
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
<div class="ncbi-popup-head">
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
<span class="fa fa-times">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
</svg>
</span>
<span class="usa-sr-only">Close</span></button>
<h4>Account</h4>
</div>
<div class="account-user-info">
Logged in as:<br />
<b><span class="username" id="uname_long">username</span></b>
</div>
<div class="account-links">
<ul class="usa-unstyled-list">
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
</ul>
</div>
</div>
</div>
</div>
</div>
</header>
<div role="navigation" aria-label="access keys">
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
</div>
<section data-section="Alerts">
<div class="ncbi-alerts-placeholder"></div>
</section>
</div>
<div class="header">
<div class="res_logo"><h1 class="res_name"><a href="/books/" title="Bookshelf home">Bookshelf</a></h1><h2 class="res_tagline"></h2></div>
<div class="search"><form method="get" action="/books/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="books" selected="selected" data-ac_dict="bookshelf-search">Books</option><option value="nuccore">Nucleotide</option><option value="gquery">All Databases</option><option value="sra" class="last">SRA</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books" data-ac_dict="bookshelf-search">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Books. Use up and down arrows to choose an item from the autocomplete." value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="dictionary:'bookshelf-search',disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'no'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class="searchlinks inline_list"><li>
<a href="/books/browse/">Browse Titles</a>
</li><li>
<a href="/books/advanced/">Advanced</a>
</li><li class="help">
<a href="/books/NBK3833/">Help</a>
</li><li class="disclaimer">
<a target="_blank" data-ga-category="literature_resources" data-ga-action="link_click" data-ga-label="disclaimer_link" href="https://www.ncbi.nlm.nih.gov/books/about/disclaimer/">Disclaimer</a>
</li></ul></div>
</div>
<!--<component id="Page" label="headcontent"/>-->
</div>
<div class="content">
<!-- site messages -->
<!-- Custom content 1 -->
<div class="col1">
</div>
<div class="container">
<div id="maincontent" class="content eight_col col">
<!-- Custom content in the left column above book nav -->
<div class="col2">
</div>
<!-- Book content -->
<!-- Custom content between navigation and content -->
<div class="col3">
</div>
<div class="document">
<div class="pre-content"><div><div class="bk_prnt"><p class="small">NCBI Bookshelf. A service of the National Library of Medicine, National Institutes of Health.</p><p>McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. </p></div><div class="bk_msg_box bk_bttm_mrgn clearfix bk_noprnt"><div class="iconblock clearfix"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook2e/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook2e-lrg.png" alt="Cover" height="100px" width="80px" /></a><div class="icnblk_cntnt"><ul class="messages"><li class="info icon"><span class="icon"><a href="/books/n/handbook2e/">See "The NCBI Handbook, 2nd Edition"</a></span></li></ul></div></div></div><div class="messagearea bk_noprnt" style="margin-bottom:1.3846em "><ul class="messages"><li class="warn icon"><span class="icon">This publication is provided for historical reference only and the information may be out of date.</span></li></ul></div><div class="bk_prnt"><p style="color:red;"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p></div><div class="iconblock clearfix whole_rhythm no_top_margin bk_noprnt"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" alt="Cover of The NCBI Handbook" height="100px" width="80px" /></a><div class="icnblk_cntnt eight_col"><h2>The NCBI Handbook [Internet].</h2><a data-jig="ncbitoggler" href="#__NBK21082_dtls__">Show details</a><div style="display:none" class="ui-widget" id="__NBK21082_dtls__"><div>McEntyre J, Ostell J, editors.</div><div>Bethesda (MD): <a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&amp;targetsite=external&amp;targetcat=link&amp;targettype=publisher">National Center for Biotechnology Information (US)</a>; 2002-.</div></div><div class="half_rhythm"><ul class="inline_list"><li style="margin-right:1em"><a class="bk_cntns" href="/books/n/handbook/">Contents</a></li></ul></div></div><div class="icnblk_cntnt two_col"><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch12/" title="Previous page in this title">&lt; Prev</a><a class="active page_link next" href="/books/n/handbook/ch14/" title="Next page in this title">Next &gt;</a></div></div></div></div></div>
<div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><h1 id="_NBK21082_"><span class="label">Chapter 13</span><span class="title" itemprop="name">The Processing of Biological Sequence Data at NCBI</span></h1><p class="contrib-group"><span itemprop="author">Karl Sirotkin</span>, <span itemprop="author">Tatiana Tatusova</span>, <span itemprop="author">Eugene Yaschenko</span>, and <span itemprop="author">Mark Cavanaugh</span>.</p><p class="small">Created: <span itemprop="datePublished">October 9, 2002</span>; Last Update: <span itemprop="dateModified">March 14, 2006</span>.</p><p><em>Estimated reading time: 13 minutes</em></p></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><p>The biological sequence information that builds the foundation of <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>'s databases and
curated resources comes from many sources. How are these data managed and processed once they
reach NCBI? This chapter discusses the flow of sequence data, from the management of data
submission to the generation of publicly available data products. </p><div id="ch13.Overview"><h2 id="_ch13_Overview_">Overview</h2><p>The central dogma of molecular biology asserts that sequences flow from <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> to <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a> to
protein. In <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>, DNA and RNA sequences are retrieved together as nucleotides and then
integrated, along with proteins, into the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> system. Once in the system nucleotides and
proteins are both available for public use in at least three ways: </p><dl class="temp-labeled-list"><dt>1.</dt><dd id="A2095"><p class="no_top_margin">The <a href="/Sitemap/index.html#Entrez" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez
system</a> (<a href="/books/n/handbook/ch15/">Chapter 15</a>) retrieves
nucleotide and protein sequences according to text queries that are entered into the
search box. Text queries can be followed by search fields, such as author, <a class="def" href="/books/n/handbook/A1237/def-item/app36/">definition line</a>, and organism (for example, "homo sapiens"[orgn]), and are used to further define
raw sequence data being used for retrieval.</p></dd><dt>2.</dt><dd id="A2096"><p class="no_top_margin">The sequences themselves can be searched directly by using <a href="/BLAST/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLAST</a> (<a href="/books/n/handbook/ch16/">Chapter 16</a>), which uses a sequence as a query
to find similar sequences. </p></dd><dt>3.</dt><dd id="A2097"><p class="no_top_margin">Large subsets of sequences can be downloaded by <a href="/Sitemap/index.html#FTPSite" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">FTP</a>.
</p></dd></dl><p>There are many sources for both nucleotide and protein sequences. Sequences submitted
directly to <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> (<a href="/books/n/handbook/ch1/">Chapter 1</a>) or replicated
from one of our two collaborating databases, the European Molecular Biology Laboratory
(<a class="def" href="/books/n/handbook/A1237/def-item/app44/">EMBL</a>) Data Library and the <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> Data Bank of Japan (<a class="def" href="/books/n/handbook/A1237/def-item/app35/">DDBJ</a>), are the major sources. The
Reference Sequence collection (<a href="/books/n/handbook/ch18/">Chapter 18</a>) and
the UniProt database, which incorporates data from <a class="def" href="/books/n/handbook/A1237/def-item/app175/">SWISS-PROT</a>, are yet additional sources. </p><p>An information management system that consists of two major components, the ID database and
the IQ database, underlies the submission, storage, and access of <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a>, and other
curated data resources (such as the Reference Sequences (<a href="/books/n/handbook/ch18/">Chapter 18</a>), the <a class="def" href="/books/n/handbook/A1237/def-item/app99/">Map Viewer</a> (<a href="/books/n/handbook/ch20/">Chapter 20</a>), or <a class="def" href="/books/n/handbook/A1237/def-item/app97/">Entrez Gene</a> (<a href="/books/n/handbook/ch19/">Chapter
19</a>)). Whereas ID handles incoming sequences and feeds other databases with subsets
to suit different needs, IQ holds links between sequences stored in ID and between these
sequences and other resources. </p><div id="ch13.Abstract_Syntax_Nota"><h3>Abstract Syntax Notation 1 (ASN.1) Is the Data Format Used by the ID System</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> is the data description language in which all sequence data at <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> are structured.
ASN.1 allows a detailed description of both the sequences and the information associated
with them, such as author names, source organism, and biological features (known as
&#x0201c;features&#x0201d;). The image below shows <span class="bk_pgobj">FEATURES</span> as displayed in <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> format. </p><p>
<span class="graphic"><img src="/books/NBK21082/bin/ch13.chapt13a.jpg" alt="Image ch13.chapt13a.jpg" /></span>
</p><p>In the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> format, the organism information is presented as shown below. You can also
<a href="/entrez/viewer.fcgi?db=nucleotide&#x00026;qty=1&#x00026;c_start=1&#x00026;list_uids=71106260&#x00026;dopt=asn&#x00026;dispmax=5&#x00026;sendto=&#x00026;from=begin&#x00026;to=end&#x00026;extrafeatpresent=1&#x00026;ef_MGC=16" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">see a complete ASN.1 record</a>.</p><pre>orgname { name binomial { genus "Macaca" , species "mulatta" } , </pre><p>Maintaining all data in the same structured format simplifies data parsing, manipulation,
and quality assurance, and eases the task of data integration and software development for
sequence analysis. All of the various divisions of <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> can be downloaded in <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> from
the <a href="ftp://ftp.ncbi.nih.gov" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">NCBI FTP site</a>. In the ID data management
system, data are stored as ASN.1 blobs, minimizing the amount of biological information
that is captured and updated in the relational database schema.</p><p>Similar to an <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a> <a class="def" href="/books/n/handbook/A1237/def-item/app40/">DTD</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> has an associated file that contains the description of the
legal data structure. This file is called asn.all and is available as part of the
&#x0201c;C&#x0201d; toolkit in an archive named
&#x0201c;ncbi.tar.gz&#x0201d; located in the <a href="ftp://ftp.ncbi.nih.gov/toolbox/ncbi_tools" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">FTP directory</a>. When
unpacked, the directory &#x0201c;/demo&#x0201d;, found in the
&#x0201c;ncbi.tar.gz&#x0201d; archive, contains the asn.all file. In the same
&#x0201c;/demo&#x0201d; directory is testval.c, a tool that validates the data
against asn.all. Additionally, a set of utilities for producing ASN.1 while programming in
&#x0201c;C&#x0201d; is found in the subutil.c file of the
&#x0201c;/api&#x0201d; directory, which is unpacked from the same
&#x0201c;ncbi.tar.gz&#x0201d; archive.</p></div><div id="ch13.Sources_of_Sequence_"><h3>Sources of Sequence Data</h3><p>The sequence data available at <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> comes from many different sources (<a class="figpopup" href="/books/NBK21082/figure/ch13.F1/?report=objectonly" target="object" rid-figpopup="figch13F1" rid-ob="figobch13F1">Figure 1</a>). In
summary, the data consist of:</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch13F1" co-legend-rid="figlgndch13F1"><a href="/books/NBK21082/figure/ch13.F1/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch13F1" rid-ob="figobch13F1"><img class="small-thumb" src="/books/NBK21082/bin/ch13.ch13f1Karl.gif" src-large="/books/NBK21082/bin/ch13.ch13f1Karl.jpg" alt="Figure 1" /></a><div class="icnblk_cntnt" id="figlgndch13F1"><h4 id="ch13.F1"><a href="/books/NBK21082/figure/ch13.F1/?report=objectonly" target="object" rid-ob="figobch13F1">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 1. Sources of sequence data available at NCBI. </p></div></div><ul><li id="A2098" class="half_rhythm"><div><a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> sequences (<a href="/books/n/handbook/ch1/">Chapter 1</a>)</div></li><li id="A2099" class="half_rhythm"><div>Reference sequences (<a href="/books/n/handbook/ch18/">Chapter 18</a>)</div></li><li id="A2100" class="half_rhythm"><div>sequences from other databases, such as <a class="def" href="/books/n/handbook/A1237/def-item/app175/">SWISS-PROT</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app137/">PIR</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app143/">PRF</a>, and <a class="def" href="/books/n/handbook/A1237/def-item/app130/">PDB</a></div></li><li id="A2101" class="half_rhythm"><div>sequences from the United States patents</div></li></ul><p>The submission pathway depends on the data source (see <a class="figpopup" href="/books/NBK21082/figure/ch13.F1/?report=objectonly" target="object" rid-figpopup="figch13F1" rid-ob="figobch13F1">Figure 1</a>) and volume. <a class="def" href="/books/n/handbook/A1237/def-item/app74/">HTGS</a> and other large-volume submitters use <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a>, usually
after converting their data to <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> with tools such as tabl2asn. Small-volume submitters
typically use either <a class="def" href="/books/n/handbook/A1237/def-item/app7/">BankIt</a> (<a href="/books/n/handbook/ch1/">Chapter 1</a>) or
<a class="def" href="/books/n/handbook/A1237/def-item/app161/">Sequin</a> (<a href="/books/n/handbook/ch12/">Chapter 12</a>) to prepare the ASN.1 for
submission.</p><p>The data received are then subjected to some quality control by the submission tools
<a class="def" href="/books/n/handbook/A1237/def-item/app7/">BankIt</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app161/">Sequin</a>, and fa2htgs. These tools have built-in validation mechanisms to check if
the data submitted have the correct structure and contain the essential information. The
work of the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> indexing staff, who uses Sequin, adds one more layer of quality
control and provides assistance to submitters. The staff also helps with the use of Sequin
for complex submissions </p></div></div><div id="ch13.Data_Flow_Components"><h2 id="_ch13_Data_Flow_Components_">Data Flow Components</h2><div id="ch13.The_ID_Database"><h3>The ID Database</h3><p>The ID database is a group of standard relational databases that holds both <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> objects
and sequence identifier-related information. ASN.1 objects follow the specifications in
the asn.all file for <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> sequence data objects. ID holds data for <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> and the many
databases in the <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> system. Details of the architecture of relational ID databases and
the software associated with them are described <a href="#ch13.Data_Flow_Architectu">later in this chapter</a>. All of the sequences from
the International Nucleotide Sequence Database Collaboration (INSDC)are in GenBank, and
they all have Accession numbers assigned to them. Accession numbers point to sequences and
their associated biological information and annotation. </p><p>In the ID database, blobs are added into a single column of a relational database.
Although the columns behave as in a relational database, the information that makes each
blob, such as biological features, raw sequence data, and author information, are neither
parsed nor split out. In this sense, the ID database can be considered as a hybrid
database that stores complex objects. </p><p>Note: Blob stands for Binary Large Object (or binary data object) and refers to a large
piece of data, a large structured data object that can be stored as a unit and processed
by software that knows the structure. For more information, check the <a href="/books/n/handbook/A1237/">Glossary</a>.</p></div><div id="ch13.Versions__GIs__Annot"><h3>Versions, GIs, Annotation Changes, and Takeovers</h3><p>Every time a change is made to a sequence, a new version of the sequence is produced.
This new version has a new <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> number (GI or GenInfo Identifier is a sequence
identification number for a nucleotide sequence) assigned to it (<b>A</b> and
<b>B</b> in the image below). When a change is made to the annotation associated
with a sequence, a new blob is produced, but no new version or GI is assigned. This series
of events marks the history of the sequence since its first days in <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a>.</p><p>You can track annotation and sequence changes, as well as the
&#x0201c;takeover&#x0201d; of one record by another by using the Sequence Revision
History tool. The tool can be accessed from the side blue bar in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> Nucleotide and
Entrez Protein and is used to highlight differences in sequence versions and annotations.
To understand how the History tool works, let&#x02019;s examine the <a href="/entrez/sutils/girevhist.cgi?val=AF123456" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">history of the Gallus gallus doublesex and mab-3 related transcription factor 1
mRNA</a> (Accession <a href="/entrez/viewer.fcgi?db=nucleotide&#x00026;val=6633795" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">AF123456</a>), which was first added to <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> March 20, 1999. </p><p>Click on <span class="bk_pgobj">Check sequence revision
history</span> in the blue side bar of <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> Nucleotide or Entrez Protein to
be directed to the <span class="bk_pgobj">Sequence Revision
History</span> page. Enter the Accession or <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> numbers or the <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a>-style
Sequence IDs (<span class="bk_pgobj">SeqIds</span>) into the
<span class="bk_pgobj">Find</span> box. The <span class="bk_pgobj">Revision history</span> for <a href="/nuccore/6633795" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">AF123456</a> is
displayed.</p><p>
<span class="graphic"><img src="/books/NBK21082/bin/ch13.chapt13f3.jpg" alt="Image ch13.chapt13f3.jpg" /></span>
</p><p>The Update Date column (<b>C</b> in the image above) contains the date of every
update to <a href="/nuccore/6633795" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">AF123456</a>. Some involve sequence changes, others involve only annotation changes.
Click on a date in the column to retrieve <a href="/nuccore/6633795" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">AF123456</a> as it existed at that point in time.
The status column (<b>D</b>) reports which version is live and which ones are dead.
Columns I and II (<b>E</b>) are used to compare two different sequences. </p><p>Notice that on <span class="bk_pgobj">Mar 23 1999</span>, at
1:24 PM, a new <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blob was produced for Accession AF12345. However, no new <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> number
(<b>A</b>) or version (<b>B</b>) was assigned because the changes were
limited to the annotation and biological features of the sequence, with no changes made to
the sequence data. On December 23, 1999, Accession <a href="/nuccore/6633795" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">AF123456</a> gained a new GI
(<span class="bk_pgobj">6633795</span>) and version
(<span class="bk_pgobj">Version 2</span>) because in this
case a change was made to the sequence data. </p><p>Compare the two blobs produced on March 23, 1999 and December 23, 1999 to see the
difference between them. </p><ul><li id="A2102" class="half_rhythm"><div>Start by <a href="/entrez/sutils/girevhist.cgi?val=AF123456" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">accessing the Revision history for AF12345</a>. </div></li><li id="A2103" class="half_rhythm"><div>Select one sequence in each column (I or II) as shown in the image above
(<b>E</b>). </div></li><li id="A2104" class="half_rhythm"><div>Push the <span class="bk_pgobj">Show</span> button at the
upper left of the page to display the two blobs (<b>G</b>).</div></li></ul><p>The differences between blobs are highlighted, with each blob displaying a different
color. Compare <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blobs produced on March 20, 1999 and March 23, 1999 and you will see
that the differences between the two are limited to the annotation and biological features
described in the blobs, whereas the sequence data remain the same. </p><p>The understanding of the biological features related to a sequence can change with or
without a change in the underlying genetic sequence. For example, <a href="/entrez/sutils/girevhist.cgi?val=J00179http://www.ncbi.nlm.nih.gov/entrez/sutils/girevhist.cgi?val=J00179" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">the sequence revision history of J00179</a> reveals that although the annotation
changed four times, there has been only one sequence version (<span class="bk_pgobj"><a href="/nuccore/183807" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">J00179</a></span>) with one <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> (<span class="bk_pgobj">183807</span>). <a href="/nuccore/183807" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">J00179</a> can still be retrieved in
<a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> by searching its Accession or GI number, but this record has been replaced by
<a href="/entrez/sutils/girevhist.cgi?val=U01317" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Accession U01317</a> and therefore is no longer indexed. The version number
assigned to the &#x0201c;take over&#x0201d; record <a href="/nuccore/455025" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">U01317</a> is 1, whereas the
replaced version of this record (<a href="/nuccore/183807" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">J00179</a>) remains as <span class="bk_pgobj">Version 0</span>. All sequences deposited before
February 1999 received no sequence version, that&#x02019;s why <a href="/nuccore/183807" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">J00179</a> is version zero.
In February 1999, the use of a sequence version was implemented, and all sequences
deposited in <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> at that time received a version number 1. Since then, ordinals
assigned to sequence versions have increased every time a change is made to the sequence
data. </p><p>The use of both systems, Version and <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a>, leads to two parallel ways of tracking sequence
versions for an object. In the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile, the Accession Version provides the
ordinal instance (version) of the sequence. Within ID, each unique sequence is assigned a
GI number; and therefore the instances of an Accession can be tracked by checking its
chain of GI numbers. Note that Accession and Accession Version are different things, with
the former been used to designate a <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> sequence of some molecule or piece of some
molecule deposited in GenBank and the latter to indicate the version of that sequence. A
single Accession can have many GIs that are assigned every time the sequence changes,
whereas an Accession Version has only one GI.</p><p>Within the ID relational databases, there is a chain identifier that can be used to link
these <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> numbers. Not all sequences within ID are in <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> and not all have sequence
versions, but all sequences have a chain of GI numbers. For this reason, internally, the
GI number is the universal pointer to a particular sequence, as opposed to the Accession
Version, which would work only for versioned sequences. The ID database is also the
controller for allowed &#x0201c;takeovers&#x0201d; of one Accession by another. In
the example above, GI 4454562 is taken over by GI 6633795. A takeover can also occur when
the sequences of two clones are merged into a single clone. One or several of the
Accessions of older clones can be taken over by a new Accession. </p></div><div id="ch13.Output_of_Data_from_"><h3>Output of Data from the ID System</h3><p>Once all incoming data have been converted to <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> format and entered into ID, the data
are then replicated into several different servers and transformed into several different
formats (<a class="figpopup" href="/books/NBK21082/figure/ch13.F2/?report=objectonly" target="object" rid-figpopup="figch13F2" rid-ob="figobch13F2">Figure
2</a>). The replication is necessary for a number of reasons: (i) it separates the
&#x0201c;incoming&#x0201d; data system (ID) from the
&#x0201c;outgoing&#x0201d; data which is the data used in response to scientific
queries by users; (ii) it helps balance the load of queries, thus providing quicker
response times and allowing different servers to specialize in different functions; and
(iii) it protects against data loss should one server fail. The details of the internal
structure of the ID system and how the structure is replicated are discussed in the <a href="#ch13.Data_Flow_Architectu">Data Flow Architecture</a> section. </p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch13F2" co-legend-rid="figlgndch13F2"><a href="/books/NBK21082/figure/ch13.F2/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch13F2" rid-ob="figobch13F2"><img class="small-thumb" src="/books/NBK21082/bin/ch13.ch13f2Karl.gif" src-large="/books/NBK21082/bin/ch13.ch13f2Karl.jpg" alt="Figure 2" /></a><div class="icnblk_cntnt" id="figlgndch13F2"><h4 id="ch13.F2"><a href="/books/NBK21082/figure/ch13.F2/?report=objectonly" target="object" rid-ob="figobch13F2">Figure</a></h4><p class="float-caption no_bottom_margin"> Figure 2. Products of the ID system. </p></div></div></div><div id="ch13.The_IQ_Database"><h3>The IQ Database</h3><p>The IQ database is a <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a> data-warehousing product that preserves its SQL language
interface but which inverts its data by storing it by column, not by row. Its strength is
in its ability to speed up results from queries based on the anticipated indexing. This
non-relational database holds links between many different objects.</p><p>For example, as part of the processing of incoming sequences, each protein and nucleotide
sequence is searched for similar sequences (<a href="/books/n/handbook/ch16/">Chapter
16</a>) against the rest of the database. Users can then select the <span class="bk_pgobj">Related Sequences</span> link that is displayed next
to each record in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> Nucleotide and Entrez Protein (<a href="/books/n/handbook/ch15/">Chapter 15</a>) to see a set of similar sequences, sometimes known as
&#x0201c;neighbors&#x0201d;. The IQ database keeps track of the neighbors for any
given sequence. These relationships are all pre-computed to save users&#x02019; time. </p><p>IQ stores the relationships between similar nucleotide sequences and between similar
protein sequences and which proteins are coded for by which nucleotides and also holds
information on the links between entries in different <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> databases. This might
include, for example, information on the publications cited within sequence records, which
links to <a class="def" href="/books/n/handbook/A1237/def-item/app150/">PubMed</a> or to an organism in the Taxonomy database. Some of this information comes
from the analysis of the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> in ID by e2index, a tool that extracts terms from <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>
sequence ASN.1 during &#x0201c;indexing&#x0201d; for Entrez. </p></div><div id="ch13.The_BLAST_Control_Da"><h3>The BLAST Control Database</h3><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> Control database receives information from ID that is used to generate BLAST
databases (<a href="/books/n/handbook/ch16/">Chapter 16</a>) for the BLAST query
service and for stand-alone BLAST users. The information is used internally to generate
the sequence neighbors stored in IQ.</p></div><div id="ch13.The_GenBank_Flatfile"><h3>The GenBank Flatfile and Error Capture Databases</h3><p>Many <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> users think of the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile as the archetypal sequence data format (see
an <a href="/entrez/viewer.fcgi?db=nucleotide&#x00026;val=21536375" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">example of a GenBank flatfile</a>). However, within NCBI and especially within
the ID internal data flow system, <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> is considered the original format from which
reports such as the GenBank flatfile can be generated (see an <a href="/entrez/viewer.fcgi?db=nucleotide&#x00026;qty=1&#x00026;c_start=1&#x00026;list_uids=21536375&#x00026;dopt=asn&#x00026;dispmax=5&#x00026;sendto=&#x00026;from=begin&#x00026;to=end&#x00026;extrafeatpresent=1&#x00026;ef_MGC=16" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">example of an ASN.1 file</a>). </p><p>Although the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile is usually generated on demand from the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>, for certain
products such as complete GenBank releases, a GenBank flatfile image is made for each
active sequence. This flatfile is stored in a database called FF4Release, which consists
of the latest transformation of ASN.1 to the GenBank flatfile format.</p><p>The FF4Release database is also a place where internal error reports are captured. The
reports can be analyzed and displayed for different time points in the data processing
pathway: </p><ul><li id="A2105" class="half_rhythm"><div><a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> itself can be validated using the testval (or its replacement, asnval)
tool&#x02014;syntax checking is not necessary, because the underlying ASN.1
libraries enforce proper syntax according to the definition file. </div></li><li id="A2106" class="half_rhythm"><div>Errors can be discovered during conversion to the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile format. </div></li><li id="A2107" class="half_rhythm"><div>Through a reparse from the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile format to <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>. This is done as a
further check for legality of the ASN.1, and our current software for producing
GenBank format reports from it.</div></li></ul></div><div id="ch13.Entrez_Postings_File"><h3>Entrez Postings Files</h3><p>When sequences are submitted to <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> or one of our collaborating databases, additional
information about the sequence is often included. This might be a brief description of a
gene in the <a class="def" href="/books/n/handbook/A1237/def-item/app36/">definition line</a>, along with annotated sequence features such as the source
organism name. To make this information searchable via <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>, these words have to be
indexed. They are extracted from the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> using e2index and then stored in the Entrez
posting files, which are optimized for <a class="def" href="/books/n/handbook/A1237/def-item/app16/">Boolean</a> queries by the Entrez system (see <a href="/books/n/handbook/ch15/">Chapter 15</a>).</p><p>All of these products from the ID system are listed in <a class="figpopup" href="/books/NBK21082/table/ch13.T1/?report=objectonly" target="object" rid-figpopup="figch13T1" rid-ob="figobch13T1">Table 1</a>. <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> also generates weekly
&#x0201c;LiveLists&#x0201d; for public, collaborator, and in-house use. LiveLists
show all Accession numbers currently in use. Accession numbers that have been replaced or
otherwise removed from circulation because of error or submitter request are not in the
LiveList.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch13T1"><a href="/books/NBK21082/table/ch13.T1/?report=objectonly" target="object" title="Table" class="img_link icnblk_img figpopup" rid-figpopup="figch13T1" rid-ob="figobch13T1"><img class="small-thumb" src="/books/NBK21082/table/ch13.T1/?report=thumb" src-large="/books/NBK21082/table/ch13.T1/?report=previmg" alt="Table 1. Products of the ID system." /></a><div class="icnblk_cntnt"><h4 id="ch13.T1"><a href="/books/NBK21082/table/ch13.T1/?report=objectonly" target="object" rid-ob="figobch13T1">Table</a></h4><p class="float-caption no_bottom_margin">Table 1. Products of the ID system. </p></div></div></div></div><div id="ch13.Data_Flow_Architectu"><h2 id="_ch13_Data_Flow_Architectu_">Data Flow Architecture</h2><p>Sequences enter ID when a client (internal to <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>) loads data into the system. The <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>
data can be loaded either through a stand-alone program or a client <a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a>. In both cases, the
data are submitted to ID through IDProdOS, an open server (commonly called
&#x0201c;middleware&#x0201d;) that sits between the clients and the database system.
An overview of the flow of sequence data through the ID architecture with its multiple
components is shown in <a class="figpopup" href="/books/NBK21082/figure/ch13.F3/?report=objectonly" target="object" rid-figpopup="figch13F3" rid-ob="figobch13F3">Figure 3</a> and discussed below.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch13F3" co-legend-rid="figlgndch13F3"><a href="/books/NBK21082/figure/ch13.F3/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch13F3" rid-ob="figobch13F3"><img class="small-thumb" src="/books/NBK21082/bin/ch13.ch13f3Karl.gif" src-large="/books/NBK21082/bin/ch13.ch13f3Karl.jpg" alt="Figure 3" /></a><div class="icnblk_cntnt" id="figlgndch13F3"><h4 id="ch13.F3"><a href="/books/NBK21082/figure/ch13.F3/?report=objectonly" target="object" rid-ob="figobch13F3">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 3. The ID system architecture. </p></div></div><p>IDProdOS hides details of the underlying complexity from the client <a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a>, which was shown to
be useful when the previous version of the ID system (a single database and an open server)
was converted to the current system without requiring any changes to the clients. </p><p>IDProdOS does an initial check of the actions required by the load. For example, in a
record that has <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> and protein sequences, including annotation and sequence identifiers,
the identifier on the protein has to be unique. The same identifier should not be given to
an outdated DNA sequence and a current sequence, unless the current sequence has replaced
the old one. That&#x02019;s because proteins, generally, are not allowed to move between
<a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> records, although proteins moving between segments of a complete genome submission
are sometimes allowed. </p><p>Additional checking is performed by stored procedures in the IdMain database. The details
of what is allowed vary according to the source of the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>, which includes direct
submissions from collaborators and the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> project. These procedures check (i) which
sequence identifiers may be used, (ii) which sequences may be replaced by which other
sequences, and (iii) which sequence version may be used in a record. </p><p>If the sequences pass all these checks, three things happen: (i) IDProdOS changes the SeqId
pointers in the blob to <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> numbers, which are now used as sequence-specific pointers, (ii)
IdMain retains the sequence identifier information that was also used for the checking, and
(iii) IDProdOS loads the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blobs to the blob satellites. </p><p>The IdMain database contains the sequence identifiers for each of the sequence records,
including all those for <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blobs that contain multiple sequences. It enforces sequence
version rules, among other rules.</p><p>Relational satellite databases are fully normalized databases that hold records for which
there is only one sequence per intended <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blob. Few, if any, features are allowed on
records intended for relational satellite databases (the PubSeqOS produces the ASN.1 by
converting the data extracted from relational tables). This contrasts with the Blob
satellite databases, from which ASN.1 is retrieved as-is. Blob satellite databases,
different from relational databases, contain ASN.1 objects as unnormalized data objects.</p><p>Recently, annotation-only satellite databases have been added to the ID system. These
satellites contain annotation to be added to Bioseqs, linked by <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> number. Because there are
multiple such annotation satellite databases, more than one set of additional annotation may
be added to a Bioseq.</p><p>The SnpAnnot database contains feature information that is limited to simple <a class="def" href="/books/n/handbook/A1237/def-item/app115/">mutation</a>
information from dbSNP (<a href="/books/n/handbook/ch5/">Chapter 5</a>). The <a class="def" href="/books/n/handbook/A1237/def-item/app20/">CDD</a>
Annotation database contains feature information that is limited to protein domains for the
protein sequences known to ID. In both cases, these features might be added to <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>-curated
records by the PubSeqOS when the records are requested.</p><p>To visualize the role of replication, the rectangle in the middle of <a class="figpopup" href="/books/NBK21082/figure/ch13.F3/?report=objectonly" target="object" rid-figpopup="figch13F3" rid-ob="figobch13F3">Figure 3</a> represents the use of the <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a> Replication Server to copy
information from the loading side of the system to the query side.</p><p>Similar to IDProdOS, PubSeqOS is a open server (also called
&#x0201c;middleware&#x0201d;) that sits between the clients and the database system.
It hides details of the underlying complexity from the client <a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a>. It actually has an almost
identical code base as IDProdOS because they both serve similar functions. When a record is
requested in a format other than <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>, psansconvert is called to do the conversion. This
distinct <i>child</i> process allows both insulation from any possible instability
and allows for use of multiple central processing units (CPUs) in a natural way.</p><p>Note: The <i>child</i> process is a technical term used to describe a process
that is owned by and completely dependent on a parent process that initiated it.</p><p>At the query side are all records in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>, plus graveyards and EntrezControl, a special
database that is not queried by the public. EntrezControl is used to control the indexing of
blobs for Entrez. Its rows are initiated by a trigger that fires when rows are added by
replication to the IdMan database. A trigger is a special, database-stored procedure that
responds to changes in a database table. </p><p>The graveyards are databases that contain blobs that were replaced or taken over and
therefore no longer indexed in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>. Once replaced or taken over, blobs do not
change&#x02014;which is the reason why they are limited to the query
side&#x02014;but they are still retrievable by <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> or other sequence identifier. </p></div><div id="bk_toc_contnr"></div></div></div>
<div class="post-content"><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div><div class="small"><span class="label">Bookshelf ID: NBK21082</span></div><div style="margin-top:2em" class="bk_noprnt"><a class="bk_cntns" href="/books/n/handbook/">Contents</a><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch12/" title="Previous page in this title">&lt; Prev</a><a class="active page_link next" href="/books/n/handbook/ch14/" title="Next page in this title">Next &gt;</a></div></div></div></div>
</div>
<!-- Custom content below content -->
<div class="col4">
</div>
<!-- Book content -->
<!-- Custom contetnt below bottom nav -->
<div class="col5">
</div>
</div>
<div id="rightcolumn" class="four_col col last">
<!-- Custom content above discovery portlets -->
<div class="col6">
<div id="ncbi_share_book"><a href="#" class="ncbi_share" data-ncbi_share_config="popup:false,shorten:true" ref="id=NBK21082&amp;db=books">Share</a></div>
</div>
<div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Views</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="PDF_download" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="/books/NBK21082/?report=reader">PubReader</a></li><li><a href="/books/NBK21082/?report=printable">Print View</a></li><li><a data-jig="ncbidialog" href="#_ncbi_dlg_citbx_NBK21082" data-jigconfig="width:400,modal:true">Cite this Page</a><div id="_ncbi_dlg_citbx_NBK21082" style="display:none" title="Cite this Page"><div class="bk_tt">Sirotkin K, Tatusova T, Yaschenko E, et al. The Processing of Biological Sequence Data at NCBI. 2002 Oct 9 [Updated 2006 Mar 14]. In: McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. Chapter 13.<span class="bk_cite_avail"></span></div></div></li><li><a href="/books/NBK21082/pdf/Bookshelf_NBK21082.pdf">PDF version of this page</a> (317K)</li><li><a href="/books/n/handbook/pdf/">PDF version of this title</a> (7.2M)</li><li><a href="#" class="toggle-glossary-link" title="Enable/disable links to the glossary">Disable Glossary Links</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>In this Page</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="page-toc" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="#ch13.Overview" ref="log$=inpage&amp;link_id=inpage">Overview</a></li><li><a href="#ch13.Data_Flow_Components" ref="log$=inpage&amp;link_id=inpage">Data Flow Components</a></li><li><a href="#ch13.Data_Flow_Architectu" ref="log$=inpage&amp;link_id=inpage">Data Flow Architecture</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Recent Activity</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="recent_activity" id="Shutter"></a></div><div class="portlet_content"><div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="HTDisplay" class=""><div class="action"><a href="javascript:historyDisplayState('ClearHT')">Clear</a><a href="javascript:historyDisplayState('HTOff')" class="HTOn">Turn Off</a><a href="javascript:historyDisplayState('HTOn')" class="HTOff">Turn On</a></div><ul id="activity"><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=1" href="/portal/utils/pageresolver.fcgi?recordid=67c825c6d5edb449bf4326fc">The Processing of Biological Sequence Data at NCBI - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The Processing of Biological Sequence Data at NCBI - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=2" href="/portal/utils/pageresolver.fcgi?recordid=67c825c5d5edb449bf432137">Sequin: A Sequence Submission and Editing Tool - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Sequin: A Sequence Submission and Editing Tool - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=3" href="/portal/utils/pageresolver.fcgi?recordid=67c825c4d5edb449bf431d15">Data Flow and Processing - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Data Flow and Processing - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=4" href="/portal/utils/pageresolver.fcgi?recordid=67c825c3d5edb449bf4315a7">The Major Histocompatibility Complex Database, dbMHC - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The Major Histocompatibility Complex Database, dbMHC - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=5" href="/portal/utils/pageresolver.fcgi?recordid=67c825c26d1ec11b6f5f12c9">The SKY/CGH Database for Spectral Karyotyping and Comparative Genomic Hybridizat...</a><div class="ralinkpop offscreen_noflow">The SKY/CGH Database for Spectral Karyotyping and Comparative Genomic Hybridization Data - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li></ul><p class="HTOn">Your browsing activity is empty.</p><p class="HTOff">Activity recording is turned off.</p><p id="turnOn" class="HTOff"><a href="javascript:historyDisplayState('HTOn')">Turn recording back on</a></p><a class="seemore" href="/sites/myncbi/recentactivity">See more...</a></div></div></div>
<!-- Custom content below discovery portlets -->
<div class="col7">
</div>
</div>
</div>
<!-- Custom content after all -->
<div class="col8">
</div>
<div class="col9">
</div>
<script type="text/javascript" src="/corehtml/pmc/js/jquery.scrollTo-1.4.2.js"></script>
<script type="text/javascript">
(function($){
$('.skiplink').each(function(i, item){
var href = $($(item).attr('href'));
href.attr('tabindex', '-1').addClass('skiptarget'); // ensure the target can receive focus
$(item).on('click', function(event){
event.preventDefault();
$.scrollTo(href, 0, {
onAfter: function(){
href.focus();
}
});
});
});
})(jQuery);
</script>
</div>
<div class="bottom">
<div id="NCBIFooter_dynamic">
<!--<component id="Breadcrumbs" label="breadcrumbs"/>
<component id="Breadcrumbs" label="helpdesk"/>-->
</div>
<div class="footer" id="footer">
<section class="icon-section">
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
<div class="grid-container container">
<div class="icon-section_container">
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11 {
fill: #737373;
}
</style>
</defs>
<title>Twitter</title>
<path class="cls-11" d="M250.11,105.48c-7,3.14-13,3.25-19.27.14,8.12-4.86,8.49-8.27,11.43-17.46a78.8,78.8,0,0,1-25,9.55,39.35,39.35,0,0,0-67,35.85,111.6,111.6,0,0,1-81-41.08A39.37,39.37,0,0,0,81.47,145a39.08,39.08,0,0,1-17.8-4.92c0,.17,0,.33,0,.5a39.32,39.32,0,0,0,31.53,38.54,39.26,39.26,0,0,1-17.75.68,39.37,39.37,0,0,0,36.72,27.3A79.07,79.07,0,0,1,56,223.34,111.31,111.31,0,0,0,116.22,241c72.3,0,111.83-59.9,111.83-111.84,0-1.71,0-3.4-.1-5.09C235.62,118.54,244.84,113.37,250.11,105.48Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>Facebook</title>
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>LinkedIn</title>
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11,
.cls-12 {
fill: #737373;
}
.cls-11 {
fill-rule: evenodd;
}
</style>
</defs>
<title>GitHub</title>
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
</path>
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
</path>
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
</path>
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
</path>
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
</path>
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
</path>
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
</svg></a>
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
<defs><style>.cls-1{fill:#737373;}</style></defs>
<title>NCBI Insights Blog</title>
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
</svg>
</a>
</div>
</div>
</section>
<section class="container-fluid bg-primary">
<div class="container pt-5">
<div class="row mt-3">
<div class="col-lg-3 col-12">
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
<ul class="list-inline social_media">
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
<style type="text/css">
.st20 {
fill: #FFFFFF;
}
.st30 {
fill: none;
stroke: #FFFFFF;
stroke-width: 8;
stroke-miterlimit: 10;
}
</style>
<title>Twitter</title>
<g>
<g>
<g>
<path class="st20" d="M192.9,88.1c-5,2.2-9.2,2.3-13.6,0.1c5.7-3.4,6-5.8,8.1-12.3c-5.4,3.2-11.4,5.5-17.6,6.7 c-10.5-11.2-28.1-11.7-39.2-1.2c-7.2,6.8-10.2,16.9-8,26.5c-22.3-1.1-43.1-11.7-57.2-29C58,91.6,61.8,107.9,74,116 c-4.4-0.1-8.7-1.3-12.6-3.4c0,0.1,0,0.2,0,0.4c0,13.2,9.3,24.6,22.3,27.2c-4.1,1.1-8.4,1.3-12.5,0.5c3.6,11.3,14,19,25.9,19.3 c-11.6,9.1-26.4,13.2-41.1,11.5c12.7,8.1,27.4,12.5,42.5,12.5c51,0,78.9-42.2,78.9-78.9c0-1.2,0-2.4-0.1-3.6 C182.7,97.4,189.2,93.7,192.9,88.1z"></path>
</g>
</g>
<circle class="st30" cx="124.4" cy="128.8" r="108.2"></circle>
</g>
</svg></a></li>
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
<style type="text/css">
.st10 {
fill: #FFFFFF;
}
.st110 {
fill: none;
stroke: #FFFFFF;
stroke-width: 8;
stroke-miterlimit: 10;
}
</style>
<title>Facebook</title>
<g>
<g>
<path class="st10" d="M159,99.1h-24V88.4c0-5,3.3-6.2,5.7-6.2h16.8V60l-24.4-0.1c-22.1,0-26.2,16.5-26.2,27.1v12.1H90v22.5h16.9 v67.5H135v-67.5h21.7L159,99.1z"></path>
</g>
</g>
<circle class="st110" cx="123.6" cy="123.2" r="108.2"></circle>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
<title>Youtube</title>
<style type="text/css">
.st4 {
fill: none;
stroke: #FFFFFF;
stroke-width: 8;
stroke-miterlimit: 10;
}
.st5 {
fill: #FFFFFF;
}
</style>
<circle class="st4" cx="124.2" cy="123.4" r="108.2"></circle>
<g transform="translate(0,-952.36218)">
<path class="st5" d="M88.4,1037.4c-10.4,0-18.7,8.3-18.7,18.7v40.1c0,10.4,8.3,18.7,18.7,18.7h72.1c10.4,0,18.7-8.3,18.7-18.7 v-40.1c0-10.4-8.3-18.7-18.7-18.7H88.4z M115.2,1058.8l29.4,17.4l-29.4,17.4V1058.8z"></path>
</g>
</svg></a></li>
</ul>
</div>
<div class="col-lg-3 col-12">
<p class="address_footer text-white">National Library of Medicine<br />
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
Bethesda, MD 20894</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
</div>
</div>
<div class="row">
<div class="col-lg-12 centered-lg">
<nav class="bottom-links">
<ul class="mt-3">
<li>
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
</li>
<li>
<a class="text-white" href="https://www.nih.gov/">NIH</a>
</li>
<li>
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
</li>
<li>
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
</section>
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
</div>
</div>
<!--/.page-->
</div>
<!--/.wrap-->
</div><!-- /.twelve_col -->
</div>
<!-- /.grid -->
<span class="PAFAppResources"></span>
<!-- BESelector tab -->
<noscript><img alt="statistics" src="/stat?jsdisabled=true&amp;ncbi_db=books&amp;ncbi_pdid=book-part&amp;ncbi_acc=NBK21082&amp;ncbi_domain=handbook&amp;ncbi_report=record&amp;ncbi_type=fulltext&amp;ncbi_objectid=&amp;ncbi_pcid=/NBK21082/&amp;ncbi_pagename=The Processing of Biological Sequence Data at NCBI - The NCBI Handbook - NCBI Bookshelf&amp;ncbi_bookparttype=chapter&amp;ncbi_app=bookshelf" /></noscript>
<!-- usually for JS scripts at page bottom -->
<!--<component id="PageFixtures" label="styles"></component>-->
<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal106 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3879255/4121861/3501987/4008961/3893018/3821238/4062932/4209313/4212053/4076480/3921943/3400083/3426610.js" snapshot="books"></script></body>
</html>