701 lines
No EOL
79 KiB
HTML
701 lines
No EOL
79 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
|
|
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|
<!-- AppResources meta begin -->
|
|
<meta name="paf-app-resources" content="" />
|
|
<script type="text/javascript">var ncbi_startTime = new Date();</script>
|
|
|
|
<!-- AppResources meta end -->
|
|
|
|
<!-- TemplateResources meta begin -->
|
|
<meta name="paf_template" content="" />
|
|
|
|
<!-- TemplateResources meta end -->
|
|
|
|
<!-- Logger begin -->
|
|
<meta name="ncbi_db" content="books" /><meta name="ncbi_pdid" content="book-part" /><meta name="ncbi_acc" content="NBK21082" /><meta name="ncbi_domain" content="handbook" /><meta name="ncbi_report" content="record" /><meta name="ncbi_type" content="fulltext" /><meta name="ncbi_objectid" content="" /><meta name="ncbi_pcid" content="/NBK21082/" /><meta name="ncbi_pagename" content="The Processing of Biological Sequence Data at NCBI - The NCBI Handbook - NCBI Bookshelf" /><meta name="ncbi_bookparttype" content="chapter" /><meta name="ncbi_app" content="bookshelf" />
|
|
<!-- Logger end -->
|
|
|
|
<title>The Processing of Biological Sequence Data at NCBI - The NCBI Handbook - NCBI Bookshelf</title>
|
|
|
|
<!-- AppResources external_resources begin -->
|
|
<link rel="stylesheet" href="/core/jig/1.15.2/css/jig.min.css" /><script type="text/javascript" src="/core/jig/1.15.2/js/jig.min.js"></script>
|
|
|
|
<!-- AppResources external_resources end -->
|
|
|
|
<!-- Page meta begin -->
|
|
<meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX" /><meta name="citation_inbook_title" content="The NCBI Handbook [Internet]" /><meta name="citation_title" content="The Processing of Biological Sequence Data at NCBI" /><meta name="citation_publisher" content="National Center for Biotechnology Information (US)" /><meta name="citation_date" content="2006/03/14" /><meta name="citation_author" content="Karl Sirotkin" /><meta name="citation_author" content="Tatiana Tatusova" /><meta name="citation_author" content="Eugene Yaschenko" /><meta name="citation_author" content="Mark Cavanaugh" /><meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK21082/" /><link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" /><meta name="DC.Title" content="The Processing of Biological Sequence Data at NCBI" /><meta name="DC.Type" content="Text" /><meta name="DC.Publisher" content="National Center for Biotechnology Information (US)" /><meta name="DC.Contributor" content="Karl Sirotkin" /><meta name="DC.Contributor" content="Tatiana Tatusova" /><meta name="DC.Contributor" content="Eugene Yaschenko" /><meta name="DC.Contributor" content="Mark Cavanaugh" /><meta name="DC.Date" content="2006/03/14" /><meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK21082/" /><meta name="description" content="The biological sequence information that builds the foundation of NCBI's databases and curated resources comes from many sources. How are these data managed and processed once they reach NCBI? This chapter discusses the flow of sequence data, from the management of data submission to the generation of publicly available data products." /><meta name="og:title" content="The Processing of Biological Sequence Data at NCBI" /><meta name="og:type" content="book" /><meta name="og:description" content="The biological sequence information that builds the foundation of NCBI's databases and curated resources comes from many sources. How are these data managed and processed once they reach NCBI? This chapter discusses the flow of sequence data, from the management of data submission to the generation of publicly available data products." /><meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK21082/" /><meta name="og:site_name" content="NCBI Bookshelf" /><meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" /><meta name="twitter:card" content="summary" /><meta name="twitter:site" content="@ncbibooks" /><meta name="warning" content="This publication is provided for historical reference only and the information may be out of date." /><meta name="bk-non-canon-loc" content="/books/n/handbook/ch13/" /><link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK21082/" /><link rel="stylesheet" href="/corehtml/pmc/css/figpopup.css" type="text/css" media="screen" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_print.min.css" type="text/css" media="print" /><style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style><style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style><script type="text/javascript" src="/corehtml/pmc/js/jquery.hoverIntent.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/common.min.js?_=3.18"> </script><script type="text/javascript" src="/corehtml/pmc/js/large-obj-scrollbars.min.js"> </script><script type="text/javascript">window.name="mainwindow";</script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/book-toc.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/books.min.js"> </script><meta name="book-collection" content="NONE" />
|
|
|
|
<!-- Page meta end -->
|
|
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8D5BC07C814FC100000000010A00CD.m_13" />
|
|
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3985586/3808861/4121862/3974050/3917732/251717/4216701/14534/45193/4113719/3849091/3984811/3751656/4033350/3840896/3577051/3852958/4008682/4207974/4206132/4062871/12930/3964959/3854974/36029/4128070/9685/3549676/3609192/3609193/3609213/3395586.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3411343/3882866.css" media="print" /></head>
|
|
<body class="book-part">
|
|
<div class="grid">
|
|
<div class="col twelve_col nomargin shadow">
|
|
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
|
|
<div class="sysmessages">
|
|
<noscript>
|
|
<p class="nojs">
|
|
<strong>Warning:</strong>
|
|
The NCBI web site requires JavaScript to function.
|
|
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
|
|
</p>
|
|
</noscript>
|
|
</div>
|
|
<!--/.sysmessage-->
|
|
<div class="wrap">
|
|
<div class="page">
|
|
<div class="top">
|
|
<div id="universal_header">
|
|
<section class="usa-banner">
|
|
<div class="usa-accordion">
|
|
<header class="usa-banner-header">
|
|
<div class="usa-grid usa-banner-inner">
|
|
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
|
|
<p>An official website of the United States government</p>
|
|
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
|
|
<span class="usa-banner-button-text">Here's how you know</span>
|
|
</button>
|
|
</div>
|
|
</header>
|
|
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
|
|
<div class="usa-banner-guidance-gov usa-width-one-half">
|
|
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
|
|
<div class="usa-media_block-body">
|
|
<p>
|
|
<strong>The .gov means it's official.</strong>
|
|
<br />
|
|
Federal government websites often end in .gov or .mil. Before
|
|
sharing sensitive information, make sure you're on a federal
|
|
government site.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div class="usa-banner-guidance-ssl usa-width-one-half">
|
|
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
|
|
<div class="usa-media_block-body">
|
|
<p>
|
|
<strong>The site is secure.</strong>
|
|
<br />
|
|
The <strong>https://</strong> ensures that you are connecting to the
|
|
official website and that any information you provide is encrypted
|
|
and transmitted securely.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
<div class="usa-overlay"></div>
|
|
<header class="ncbi-header" role="banner" data-section="Header">
|
|
|
|
<div class="usa-grid">
|
|
<div class="usa-width-one-whole">
|
|
|
|
<div class="ncbi-header__logo">
|
|
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
|
|
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
|
|
</a>
|
|
</div>
|
|
|
|
<div class="ncbi-header__account">
|
|
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
|
|
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
|
|
<span class="fa fa-user" aria-hidden="true">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
|
|
<g style="fill: #fff">
|
|
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
|
|
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
|
|
</g>
|
|
</svg>
|
|
</span>
|
|
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
|
|
<span class="sr-only">Show account info</span>
|
|
</button>
|
|
</div>
|
|
|
|
<div class="ncbi-popup-anchor">
|
|
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
|
|
<div class="ncbi-popup-head">
|
|
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
|
|
<span class="fa fa-times">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
|
|
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
|
|
</svg>
|
|
</span>
|
|
<span class="usa-sr-only">Close</span></button>
|
|
<h4>Account</h4>
|
|
</div>
|
|
<div class="account-user-info">
|
|
Logged in as:<br />
|
|
<b><span class="username" id="uname_long">username</span></b>
|
|
</div>
|
|
<div class="account-links">
|
|
<ul class="usa-unstyled-list">
|
|
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
|
|
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
|
|
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
|
|
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</header>
|
|
<div role="navigation" aria-label="access keys">
|
|
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
|
|
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
|
|
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
|
|
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
|
|
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
|
|
</div>
|
|
<section data-section="Alerts">
|
|
<div class="ncbi-alerts-placeholder"></div>
|
|
</section>
|
|
</div>
|
|
<div class="header">
|
|
<div class="res_logo"><h1 class="res_name"><a href="/books/" title="Bookshelf home">Bookshelf</a></h1><h2 class="res_tagline"></h2></div>
|
|
<div class="search"><form method="get" action="/books/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="books" selected="selected" data-ac_dict="bookshelf-search">Books</option><option value="nuccore">Nucleotide</option><option value="gquery">All Databases</option><option value="sra" class="last">SRA</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books" data-ac_dict="bookshelf-search">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Books. Use up and down arrows to choose an item from the autocomplete." value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="dictionary:'bookshelf-search',disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'no'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class="searchlinks inline_list"><li>
|
|
<a href="/books/browse/">Browse Titles</a>
|
|
</li><li>
|
|
<a href="/books/advanced/">Advanced</a>
|
|
</li><li class="help">
|
|
<a href="/books/NBK3833/">Help</a>
|
|
</li><li class="disclaimer">
|
|
<a target="_blank" data-ga-category="literature_resources" data-ga-action="link_click" data-ga-label="disclaimer_link" href="https://www.ncbi.nlm.nih.gov/books/about/disclaimer/">Disclaimer</a>
|
|
</li></ul></div>
|
|
</div>
|
|
|
|
|
|
|
|
<!--<component id="Page" label="headcontent"/>-->
|
|
|
|
</div>
|
|
<div class="content">
|
|
<!-- site messages -->
|
|
<!-- Custom content 1 -->
|
|
<div class="col1">
|
|
|
|
</div>
|
|
|
|
<div class="container">
|
|
<div id="maincontent" class="content eight_col col">
|
|
<!-- Custom content in the left column above book nav -->
|
|
<div class="col2">
|
|
|
|
</div>
|
|
|
|
<!-- Book content -->
|
|
|
|
|
|
<!-- Custom content between navigation and content -->
|
|
<div class="col3">
|
|
|
|
</div>
|
|
|
|
<div class="document">
|
|
<div class="pre-content"><div><div class="bk_prnt"><p class="small">NCBI Bookshelf. A service of the National Library of Medicine, National Institutes of Health.</p><p>McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. </p></div><div class="bk_msg_box bk_bttm_mrgn clearfix bk_noprnt"><div class="iconblock clearfix"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook2e/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook2e-lrg.png" alt="Cover" height="100px" width="80px" /></a><div class="icnblk_cntnt"><ul class="messages"><li class="info icon"><span class="icon"><a href="/books/n/handbook2e/">See "The NCBI Handbook, 2nd Edition"</a></span></li></ul></div></div></div><div class="messagearea bk_noprnt" style="margin-bottom:1.3846em "><ul class="messages"><li class="warn icon"><span class="icon">This publication is provided for historical reference only and the information may be out of date.</span></li></ul></div><div class="bk_prnt"><p style="color:red;"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p></div><div class="iconblock clearfix whole_rhythm no_top_margin bk_noprnt"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" alt="Cover of The NCBI Handbook" height="100px" width="80px" /></a><div class="icnblk_cntnt eight_col"><h2>The NCBI Handbook [Internet].</h2><a data-jig="ncbitoggler" href="#__NBK21082_dtls__">Show details</a><div style="display:none" class="ui-widget" id="__NBK21082_dtls__"><div>McEntyre J, Ostell J, editors.</div><div>Bethesda (MD): <a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&targetsite=external&targetcat=link&targettype=publisher">National Center for Biotechnology Information (US)</a>; 2002-.</div></div><div class="half_rhythm"><ul class="inline_list"><li style="margin-right:1em"><a class="bk_cntns" href="/books/n/handbook/">Contents</a></li></ul></div></div><div class="icnblk_cntnt two_col"><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch12/" title="Previous page in this title">< Prev</a><a class="active page_link next" href="/books/n/handbook/ch14/" title="Next page in this title">Next ></a></div></div></div></div></div>
|
|
<div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><h1 id="_NBK21082_"><span class="label">Chapter 13</span><span class="title" itemprop="name">The Processing of Biological Sequence Data at NCBI</span></h1><p class="contrib-group"><span itemprop="author">Karl Sirotkin</span>, <span itemprop="author">Tatiana Tatusova</span>, <span itemprop="author">Eugene Yaschenko</span>, and <span itemprop="author">Mark Cavanaugh</span>.</p><p class="small">Created: <span itemprop="datePublished">October 9, 2002</span>; Last Update: <span itemprop="dateModified">March 14, 2006</span>.</p><p><em>Estimated reading time: 13 minutes</em></p></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><p>The biological sequence information that builds the foundation of <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>'s databases and
|
|
curated resources comes from many sources. How are these data managed and processed once they
|
|
reach NCBI? This chapter discusses the flow of sequence data, from the management of data
|
|
submission to the generation of publicly available data products. </p><div id="ch13.Overview"><h2 id="_ch13_Overview_">Overview</h2><p>The central dogma of molecular biology asserts that sequences flow from <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> to <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a> to
|
|
protein. In <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>, DNA and RNA sequences are retrieved together as nucleotides and then
|
|
integrated, along with proteins, into the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> system. Once in the system nucleotides and
|
|
proteins are both available for public use in at least three ways: </p><dl class="temp-labeled-list"><dt>1.</dt><dd id="A2095"><p class="no_top_margin">The <a href="/Sitemap/index.html#Entrez" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Entrez
|
|
system</a> (<a href="/books/n/handbook/ch15/">Chapter 15</a>) retrieves
|
|
nucleotide and protein sequences according to text queries that are entered into the
|
|
search box. Text queries can be followed by search fields, such as author, <a class="def" href="/books/n/handbook/A1237/def-item/app36/">definition line</a>, and organism (for example, "homo sapiens"[orgn]), and are used to further define
|
|
raw sequence data being used for retrieval.</p></dd><dt>2.</dt><dd id="A2096"><p class="no_top_margin">The sequences themselves can be searched directly by using <a href="/BLAST/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BLAST</a> (<a href="/books/n/handbook/ch16/">Chapter 16</a>), which uses a sequence as a query
|
|
to find similar sequences. </p></dd><dt>3.</dt><dd id="A2097"><p class="no_top_margin">Large subsets of sequences can be downloaded by <a href="/Sitemap/index.html#FTPSite" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">FTP</a>.
|
|
</p></dd></dl><p>There are many sources for both nucleotide and protein sequences. Sequences submitted
|
|
directly to <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> (<a href="/books/n/handbook/ch1/">Chapter 1</a>) or replicated
|
|
from one of our two collaborating databases, the European Molecular Biology Laboratory
|
|
(<a class="def" href="/books/n/handbook/A1237/def-item/app44/">EMBL</a>) Data Library and the <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> Data Bank of Japan (<a class="def" href="/books/n/handbook/A1237/def-item/app35/">DDBJ</a>), are the major sources. The
|
|
Reference Sequence collection (<a href="/books/n/handbook/ch18/">Chapter 18</a>) and
|
|
the UniProt database, which incorporates data from <a class="def" href="/books/n/handbook/A1237/def-item/app175/">SWISS-PROT</a>, are yet additional sources. </p><p>An information management system that consists of two major components, the ID database and
|
|
the IQ database, underlies the submission, storage, and access of <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a>, and other
|
|
curated data resources (such as the Reference Sequences (<a href="/books/n/handbook/ch18/">Chapter 18</a>), the <a class="def" href="/books/n/handbook/A1237/def-item/app99/">Map Viewer</a> (<a href="/books/n/handbook/ch20/">Chapter 20</a>), or <a class="def" href="/books/n/handbook/A1237/def-item/app97/">Entrez Gene</a> (<a href="/books/n/handbook/ch19/">Chapter
|
|
19</a>)). Whereas ID handles incoming sequences and feeds other databases with subsets
|
|
to suit different needs, IQ holds links between sequences stored in ID and between these
|
|
sequences and other resources. </p><div id="ch13.Abstract_Syntax_Nota"><h3>Abstract Syntax Notation 1 (ASN.1) Is the Data Format Used by the ID System</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> is the data description language in which all sequence data at <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> are structured.
|
|
ASN.1 allows a detailed description of both the sequences and the information associated
|
|
with them, such as author names, source organism, and biological features (known as
|
|
“features”). The image below shows <span class="bk_pgobj">FEATURES</span> as displayed in <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> format. </p><p>
|
|
<span class="graphic"><img src="/books/NBK21082/bin/ch13.chapt13a.jpg" alt="Image ch13.chapt13a.jpg" /></span>
|
|
</p><p>In the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> format, the organism information is presented as shown below. You can also
|
|
<a href="/entrez/viewer.fcgi?db=nucleotide&qty=1&c_start=1&list_uids=71106260&dopt=asn&dispmax=5&sendto=&from=begin&to=end&extrafeatpresent=1&ef_MGC=16" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">see a complete ASN.1 record</a>.</p><pre>orgname { name binomial { genus "Macaca" , species "mulatta" } , </pre><p>Maintaining all data in the same structured format simplifies data parsing, manipulation,
|
|
and quality assurance, and eases the task of data integration and software development for
|
|
sequence analysis. All of the various divisions of <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> can be downloaded in <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> from
|
|
the <a href="ftp://ftp.ncbi.nih.gov" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">NCBI FTP site</a>. In the ID data management
|
|
system, data are stored as ASN.1 blobs, minimizing the amount of biological information
|
|
that is captured and updated in the relational database schema.</p><p>Similar to an <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a> <a class="def" href="/books/n/handbook/A1237/def-item/app40/">DTD</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> has an associated file that contains the description of the
|
|
legal data structure. This file is called asn.all and is available as part of the
|
|
“C” toolkit in an archive named
|
|
“ncbi.tar.gz” located in the <a href="ftp://ftp.ncbi.nih.gov/toolbox/ncbi_tools" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">FTP directory</a>. When
|
|
unpacked, the directory “/demo”, found in the
|
|
“ncbi.tar.gz” archive, contains the asn.all file. In the same
|
|
“/demo” directory is testval.c, a tool that validates the data
|
|
against asn.all. Additionally, a set of utilities for producing ASN.1 while programming in
|
|
“C” is found in the subutil.c file of the
|
|
“/api” directory, which is unpacked from the same
|
|
“ncbi.tar.gz” archive.</p></div><div id="ch13.Sources_of_Sequence_"><h3>Sources of Sequence Data</h3><p>The sequence data available at <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> comes from many different sources (<a class="figpopup" href="/books/NBK21082/figure/ch13.F1/?report=objectonly" target="object" rid-figpopup="figch13F1" rid-ob="figobch13F1">Figure 1</a>). In
|
|
summary, the data consist of:</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch13F1" co-legend-rid="figlgndch13F1"><a href="/books/NBK21082/figure/ch13.F1/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch13F1" rid-ob="figobch13F1"><img class="small-thumb" src="/books/NBK21082/bin/ch13.ch13f1Karl.gif" src-large="/books/NBK21082/bin/ch13.ch13f1Karl.jpg" alt="Figure 1" /></a><div class="icnblk_cntnt" id="figlgndch13F1"><h4 id="ch13.F1"><a href="/books/NBK21082/figure/ch13.F1/?report=objectonly" target="object" rid-ob="figobch13F1">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 1. Sources of sequence data available at NCBI. </p></div></div><ul><li id="A2098" class="half_rhythm"><div><a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> sequences (<a href="/books/n/handbook/ch1/">Chapter 1</a>)</div></li><li id="A2099" class="half_rhythm"><div>Reference sequences (<a href="/books/n/handbook/ch18/">Chapter 18</a>)</div></li><li id="A2100" class="half_rhythm"><div>sequences from other databases, such as <a class="def" href="/books/n/handbook/A1237/def-item/app175/">SWISS-PROT</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app137/">PIR</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app143/">PRF</a>, and <a class="def" href="/books/n/handbook/A1237/def-item/app130/">PDB</a></div></li><li id="A2101" class="half_rhythm"><div>sequences from the United States patents</div></li></ul><p>The submission pathway depends on the data source (see <a class="figpopup" href="/books/NBK21082/figure/ch13.F1/?report=objectonly" target="object" rid-figpopup="figch13F1" rid-ob="figobch13F1">Figure 1</a>) and volume. <a class="def" href="/books/n/handbook/A1237/def-item/app74/">HTGS</a> and other large-volume submitters use <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a>, usually
|
|
after converting their data to <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> with tools such as tabl2asn. Small-volume submitters
|
|
typically use either <a class="def" href="/books/n/handbook/A1237/def-item/app7/">BankIt</a> (<a href="/books/n/handbook/ch1/">Chapter 1</a>) or
|
|
<a class="def" href="/books/n/handbook/A1237/def-item/app161/">Sequin</a> (<a href="/books/n/handbook/ch12/">Chapter 12</a>) to prepare the ASN.1 for
|
|
submission.</p><p>The data received are then subjected to some quality control by the submission tools
|
|
<a class="def" href="/books/n/handbook/A1237/def-item/app7/">BankIt</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app161/">Sequin</a>, and fa2htgs. These tools have built-in validation mechanisms to check if
|
|
the data submitted have the correct structure and contain the essential information. The
|
|
work of the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> indexing staff, who uses Sequin, adds one more layer of quality
|
|
control and provides assistance to submitters. The staff also helps with the use of Sequin
|
|
for complex submissions </p></div></div><div id="ch13.Data_Flow_Components"><h2 id="_ch13_Data_Flow_Components_">Data Flow Components</h2><div id="ch13.The_ID_Database"><h3>The ID Database</h3><p>The ID database is a group of standard relational databases that holds both <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> objects
|
|
and sequence identifier-related information. ASN.1 objects follow the specifications in
|
|
the asn.all file for <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> sequence data objects. ID holds data for <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> and the many
|
|
databases in the <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> system. Details of the architecture of relational ID databases and
|
|
the software associated with them are described <a href="#ch13.Data_Flow_Architectu">later in this chapter</a>. All of the sequences from
|
|
the International Nucleotide Sequence Database Collaboration (INSDC)are in GenBank, and
|
|
they all have Accession numbers assigned to them. Accession numbers point to sequences and
|
|
their associated biological information and annotation. </p><p>In the ID database, blobs are added into a single column of a relational database.
|
|
Although the columns behave as in a relational database, the information that makes each
|
|
blob, such as biological features, raw sequence data, and author information, are neither
|
|
parsed nor split out. In this sense, the ID database can be considered as a hybrid
|
|
database that stores complex objects. </p><p>Note: Blob stands for Binary Large Object (or binary data object) and refers to a large
|
|
piece of data, a large structured data object that can be stored as a unit and processed
|
|
by software that knows the structure. For more information, check the <a href="/books/n/handbook/A1237/">Glossary</a>.</p></div><div id="ch13.Versions__GIs__Annot"><h3>Versions, GIs, Annotation Changes, and Takeovers</h3><p>Every time a change is made to a sequence, a new version of the sequence is produced.
|
|
This new version has a new <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> number (GI or GenInfo Identifier is a sequence
|
|
identification number for a nucleotide sequence) assigned to it (<b>A</b> and
|
|
<b>B</b> in the image below). When a change is made to the annotation associated
|
|
with a sequence, a new blob is produced, but no new version or GI is assigned. This series
|
|
of events marks the history of the sequence since its first days in <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a>.</p><p>You can track annotation and sequence changes, as well as the
|
|
“takeover” of one record by another by using the Sequence Revision
|
|
History tool. The tool can be accessed from the side blue bar in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> Nucleotide and
|
|
Entrez Protein and is used to highlight differences in sequence versions and annotations.
|
|
To understand how the History tool works, let’s examine the <a href="/entrez/sutils/girevhist.cgi?val=AF123456" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">history of the Gallus gallus doublesex and mab-3 related transcription factor 1
|
|
mRNA</a> (Accession <a href="/entrez/viewer.fcgi?db=nucleotide&val=6633795" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">AF123456</a>), which was first added to <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> March 20, 1999. </p><p>Click on <span class="bk_pgobj">Check sequence revision
|
|
history</span> in the blue side bar of <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> Nucleotide or Entrez Protein to
|
|
be directed to the <span class="bk_pgobj">Sequence Revision
|
|
History</span> page. Enter the Accession or <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> numbers or the <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a>-style
|
|
Sequence IDs (<span class="bk_pgobj">SeqIds</span>) into the
|
|
<span class="bk_pgobj">Find</span> box. The <span class="bk_pgobj">Revision history</span> for <a href="/nuccore/6633795" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">AF123456</a> is
|
|
displayed.</p><p>
|
|
<span class="graphic"><img src="/books/NBK21082/bin/ch13.chapt13f3.jpg" alt="Image ch13.chapt13f3.jpg" /></span>
|
|
</p><p>The Update Date column (<b>C</b> in the image above) contains the date of every
|
|
update to <a href="/nuccore/6633795" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">AF123456</a>. Some involve sequence changes, others involve only annotation changes.
|
|
Click on a date in the column to retrieve <a href="/nuccore/6633795" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">AF123456</a> as it existed at that point in time.
|
|
The status column (<b>D</b>) reports which version is live and which ones are dead.
|
|
Columns I and II (<b>E</b>) are used to compare two different sequences. </p><p>Notice that on <span class="bk_pgobj">Mar 23 1999</span>, at
|
|
1:24 PM, a new <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blob was produced for Accession AF12345. However, no new <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> number
|
|
(<b>A</b>) or version (<b>B</b>) was assigned because the changes were
|
|
limited to the annotation and biological features of the sequence, with no changes made to
|
|
the sequence data. On December 23, 1999, Accession <a href="/nuccore/6633795" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">AF123456</a> gained a new GI
|
|
(<span class="bk_pgobj">6633795</span>) and version
|
|
(<span class="bk_pgobj">Version 2</span>) because in this
|
|
case a change was made to the sequence data. </p><p>Compare the two blobs produced on March 23, 1999 and December 23, 1999 to see the
|
|
difference between them. </p><ul><li id="A2102" class="half_rhythm"><div>Start by <a href="/entrez/sutils/girevhist.cgi?val=AF123456" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">accessing the Revision history for AF12345</a>. </div></li><li id="A2103" class="half_rhythm"><div>Select one sequence in each column (I or II) as shown in the image above
|
|
(<b>E</b>). </div></li><li id="A2104" class="half_rhythm"><div>Push the <span class="bk_pgobj">Show</span> button at the
|
|
upper left of the page to display the two blobs (<b>G</b>).</div></li></ul><p>The differences between blobs are highlighted, with each blob displaying a different
|
|
color. Compare <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blobs produced on March 20, 1999 and March 23, 1999 and you will see
|
|
that the differences between the two are limited to the annotation and biological features
|
|
described in the blobs, whereas the sequence data remain the same. </p><p>The understanding of the biological features related to a sequence can change with or
|
|
without a change in the underlying genetic sequence. For example, <a href="/entrez/sutils/girevhist.cgi?val=J00179http://www.ncbi.nlm.nih.gov/entrez/sutils/girevhist.cgi?val=J00179" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">the sequence revision history of J00179</a> reveals that although the annotation
|
|
changed four times, there has been only one sequence version (<span class="bk_pgobj"><a href="/nuccore/183807" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">J00179</a></span>) with one <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> (<span class="bk_pgobj">183807</span>). <a href="/nuccore/183807" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">J00179</a> can still be retrieved in
|
|
<a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> by searching its Accession or GI number, but this record has been replaced by
|
|
<a href="/entrez/sutils/girevhist.cgi?val=U01317" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Accession U01317</a> and therefore is no longer indexed. The version number
|
|
assigned to the “take over” record <a href="/nuccore/455025" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">U01317</a> is 1, whereas the
|
|
replaced version of this record (<a href="/nuccore/183807" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">J00179</a>) remains as <span class="bk_pgobj">Version 0</span>. All sequences deposited before
|
|
February 1999 received no sequence version, that’s why <a href="/nuccore/183807" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">J00179</a> is version zero.
|
|
In February 1999, the use of a sequence version was implemented, and all sequences
|
|
deposited in <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> at that time received a version number 1. Since then, ordinals
|
|
assigned to sequence versions have increased every time a change is made to the sequence
|
|
data. </p><p>The use of both systems, Version and <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a>, leads to two parallel ways of tracking sequence
|
|
versions for an object. In the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile, the Accession Version provides the
|
|
ordinal instance (version) of the sequence. Within ID, each unique sequence is assigned a
|
|
GI number; and therefore the instances of an Accession can be tracked by checking its
|
|
chain of GI numbers. Note that Accession and Accession Version are different things, with
|
|
the former been used to designate a <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> sequence of some molecule or piece of some
|
|
molecule deposited in GenBank and the latter to indicate the version of that sequence. A
|
|
single Accession can have many GIs that are assigned every time the sequence changes,
|
|
whereas an Accession Version has only one GI.</p><p>Within the ID relational databases, there is a chain identifier that can be used to link
|
|
these <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> numbers. Not all sequences within ID are in <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> and not all have sequence
|
|
versions, but all sequences have a chain of GI numbers. For this reason, internally, the
|
|
GI number is the universal pointer to a particular sequence, as opposed to the Accession
|
|
Version, which would work only for versioned sequences. The ID database is also the
|
|
controller for allowed “takeovers” of one Accession by another. In
|
|
the example above, GI 4454562 is taken over by GI 6633795. A takeover can also occur when
|
|
the sequences of two clones are merged into a single clone. One or several of the
|
|
Accessions of older clones can be taken over by a new Accession. </p></div><div id="ch13.Output_of_Data_from_"><h3>Output of Data from the ID System</h3><p>Once all incoming data have been converted to <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> format and entered into ID, the data
|
|
are then replicated into several different servers and transformed into several different
|
|
formats (<a class="figpopup" href="/books/NBK21082/figure/ch13.F2/?report=objectonly" target="object" rid-figpopup="figch13F2" rid-ob="figobch13F2">Figure
|
|
2</a>). The replication is necessary for a number of reasons: (i) it separates the
|
|
“incoming” data system (ID) from the
|
|
“outgoing” data which is the data used in response to scientific
|
|
queries by users; (ii) it helps balance the load of queries, thus providing quicker
|
|
response times and allowing different servers to specialize in different functions; and
|
|
(iii) it protects against data loss should one server fail. The details of the internal
|
|
structure of the ID system and how the structure is replicated are discussed in the <a href="#ch13.Data_Flow_Architectu">Data Flow Architecture</a> section. </p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch13F2" co-legend-rid="figlgndch13F2"><a href="/books/NBK21082/figure/ch13.F2/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch13F2" rid-ob="figobch13F2"><img class="small-thumb" src="/books/NBK21082/bin/ch13.ch13f2Karl.gif" src-large="/books/NBK21082/bin/ch13.ch13f2Karl.jpg" alt="Figure 2" /></a><div class="icnblk_cntnt" id="figlgndch13F2"><h4 id="ch13.F2"><a href="/books/NBK21082/figure/ch13.F2/?report=objectonly" target="object" rid-ob="figobch13F2">Figure</a></h4><p class="float-caption no_bottom_margin"> Figure 2. Products of the ID system. </p></div></div></div><div id="ch13.The_IQ_Database"><h3>The IQ Database</h3><p>The IQ database is a <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a> data-warehousing product that preserves its SQL language
|
|
interface but which inverts its data by storing it by column, not by row. Its strength is
|
|
in its ability to speed up results from queries based on the anticipated indexing. This
|
|
non-relational database holds links between many different objects.</p><p>For example, as part of the processing of incoming sequences, each protein and nucleotide
|
|
sequence is searched for similar sequences (<a href="/books/n/handbook/ch16/">Chapter
|
|
16</a>) against the rest of the database. Users can then select the <span class="bk_pgobj">Related Sequences</span> link that is displayed next
|
|
to each record in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> Nucleotide and Entrez Protein (<a href="/books/n/handbook/ch15/">Chapter 15</a>) to see a set of similar sequences, sometimes known as
|
|
“neighbors”. The IQ database keeps track of the neighbors for any
|
|
given sequence. These relationships are all pre-computed to save users’ time. </p><p>IQ stores the relationships between similar nucleotide sequences and between similar
|
|
protein sequences and which proteins are coded for by which nucleotides and also holds
|
|
information on the links between entries in different <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> databases. This might
|
|
include, for example, information on the publications cited within sequence records, which
|
|
links to <a class="def" href="/books/n/handbook/A1237/def-item/app150/">PubMed</a> or to an organism in the Taxonomy database. Some of this information comes
|
|
from the analysis of the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> in ID by e2index, a tool that extracts terms from <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>
|
|
sequence ASN.1 during “indexing” for Entrez. </p></div><div id="ch13.The_BLAST_Control_Da"><h3>The BLAST Control Database</h3><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> Control database receives information from ID that is used to generate BLAST
|
|
databases (<a href="/books/n/handbook/ch16/">Chapter 16</a>) for the BLAST query
|
|
service and for stand-alone BLAST users. The information is used internally to generate
|
|
the sequence neighbors stored in IQ.</p></div><div id="ch13.The_GenBank_Flatfile"><h3>The GenBank Flatfile and Error Capture Databases</h3><p>Many <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> users think of the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile as the archetypal sequence data format (see
|
|
an <a href="/entrez/viewer.fcgi?db=nucleotide&val=21536375" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">example of a GenBank flatfile</a>). However, within NCBI and especially within
|
|
the ID internal data flow system, <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> is considered the original format from which
|
|
reports such as the GenBank flatfile can be generated (see an <a href="/entrez/viewer.fcgi?db=nucleotide&qty=1&c_start=1&list_uids=21536375&dopt=asn&dispmax=5&sendto=&from=begin&to=end&extrafeatpresent=1&ef_MGC=16" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">example of an ASN.1 file</a>). </p><p>Although the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile is usually generated on demand from the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>, for certain
|
|
products such as complete GenBank releases, a GenBank flatfile image is made for each
|
|
active sequence. This flatfile is stored in a database called FF4Release, which consists
|
|
of the latest transformation of ASN.1 to the GenBank flatfile format.</p><p>The FF4Release database is also a place where internal error reports are captured. The
|
|
reports can be analyzed and displayed for different time points in the data processing
|
|
pathway: </p><ul><li id="A2105" class="half_rhythm"><div><a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> itself can be validated using the testval (or its replacement, asnval)
|
|
tool—syntax checking is not necessary, because the underlying ASN.1
|
|
libraries enforce proper syntax according to the definition file. </div></li><li id="A2106" class="half_rhythm"><div>Errors can be discovered during conversion to the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile format. </div></li><li id="A2107" class="half_rhythm"><div>Through a reparse from the <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfile format to <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>. This is done as a
|
|
further check for legality of the ASN.1, and our current software for producing
|
|
GenBank format reports from it.</div></li></ul></div><div id="ch13.Entrez_Postings_File"><h3>Entrez Postings Files</h3><p>When sequences are submitted to <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> or one of our collaborating databases, additional
|
|
information about the sequence is often included. This might be a brief description of a
|
|
gene in the <a class="def" href="/books/n/handbook/A1237/def-item/app36/">definition line</a>, along with annotated sequence features such as the source
|
|
organism name. To make this information searchable via <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>, these words have to be
|
|
indexed. They are extracted from the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> using e2index and then stored in the Entrez
|
|
posting files, which are optimized for <a class="def" href="/books/n/handbook/A1237/def-item/app16/">Boolean</a> queries by the Entrez system (see <a href="/books/n/handbook/ch15/">Chapter 15</a>).</p><p>All of these products from the ID system are listed in <a class="figpopup" href="/books/NBK21082/table/ch13.T1/?report=objectonly" target="object" rid-figpopup="figch13T1" rid-ob="figobch13T1">Table 1</a>. <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> also generates weekly
|
|
“LiveLists” for public, collaborator, and in-house use. LiveLists
|
|
show all Accession numbers currently in use. Accession numbers that have been replaced or
|
|
otherwise removed from circulation because of error or submitter request are not in the
|
|
LiveList.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch13T1"><a href="/books/NBK21082/table/ch13.T1/?report=objectonly" target="object" title="Table" class="img_link icnblk_img figpopup" rid-figpopup="figch13T1" rid-ob="figobch13T1"><img class="small-thumb" src="/books/NBK21082/table/ch13.T1/?report=thumb" src-large="/books/NBK21082/table/ch13.T1/?report=previmg" alt="Table 1. Products of the ID system." /></a><div class="icnblk_cntnt"><h4 id="ch13.T1"><a href="/books/NBK21082/table/ch13.T1/?report=objectonly" target="object" rid-ob="figobch13T1">Table</a></h4><p class="float-caption no_bottom_margin">Table 1. Products of the ID system. </p></div></div></div></div><div id="ch13.Data_Flow_Architectu"><h2 id="_ch13_Data_Flow_Architectu_">Data Flow Architecture</h2><p>Sequences enter ID when a client (internal to <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>) loads data into the system. The <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>
|
|
data can be loaded either through a stand-alone program or a client <a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a>. In both cases, the
|
|
data are submitted to ID through IDProdOS, an open server (commonly called
|
|
“middleware”) that sits between the clients and the database system.
|
|
An overview of the flow of sequence data through the ID architecture with its multiple
|
|
components is shown in <a class="figpopup" href="/books/NBK21082/figure/ch13.F3/?report=objectonly" target="object" rid-figpopup="figch13F3" rid-ob="figobch13F3">Figure 3</a> and discussed below.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch13F3" co-legend-rid="figlgndch13F3"><a href="/books/NBK21082/figure/ch13.F3/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch13F3" rid-ob="figobch13F3"><img class="small-thumb" src="/books/NBK21082/bin/ch13.ch13f3Karl.gif" src-large="/books/NBK21082/bin/ch13.ch13f3Karl.jpg" alt="Figure 3" /></a><div class="icnblk_cntnt" id="figlgndch13F3"><h4 id="ch13.F3"><a href="/books/NBK21082/figure/ch13.F3/?report=objectonly" target="object" rid-ob="figobch13F3">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 3. The ID system architecture. </p></div></div><p>IDProdOS hides details of the underlying complexity from the client <a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a>, which was shown to
|
|
be useful when the previous version of the ID system (a single database and an open server)
|
|
was converted to the current system without requiring any changes to the clients. </p><p>IDProdOS does an initial check of the actions required by the load. For example, in a
|
|
record that has <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> and protein sequences, including annotation and sequence identifiers,
|
|
the identifier on the protein has to be unique. The same identifier should not be given to
|
|
an outdated DNA sequence and a current sequence, unless the current sequence has replaced
|
|
the old one. That’s because proteins, generally, are not allowed to move between
|
|
<a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> records, although proteins moving between segments of a complete genome submission
|
|
are sometimes allowed. </p><p>Additional checking is performed by stored procedures in the IdMain database. The details
|
|
of what is allowed vary according to the source of the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>, which includes direct
|
|
submissions from collaborators and the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> project. These procedures check (i) which
|
|
sequence identifiers may be used, (ii) which sequences may be replaced by which other
|
|
sequences, and (iii) which sequence version may be used in a record. </p><p>If the sequences pass all these checks, three things happen: (i) IDProdOS changes the SeqId
|
|
pointers in the blob to <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> numbers, which are now used as sequence-specific pointers, (ii)
|
|
IdMain retains the sequence identifier information that was also used for the checking, and
|
|
(iii) IDProdOS loads the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blobs to the blob satellites. </p><p>The IdMain database contains the sequence identifiers for each of the sequence records,
|
|
including all those for <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blobs that contain multiple sequences. It enforces sequence
|
|
version rules, among other rules.</p><p>Relational satellite databases are fully normalized databases that hold records for which
|
|
there is only one sequence per intended <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> blob. Few, if any, features are allowed on
|
|
records intended for relational satellite databases (the PubSeqOS produces the ASN.1 by
|
|
converting the data extracted from relational tables). This contrasts with the Blob
|
|
satellite databases, from which ASN.1 is retrieved as-is. Blob satellite databases,
|
|
different from relational databases, contain ASN.1 objects as unnormalized data objects.</p><p>Recently, annotation-only satellite databases have been added to the ID system. These
|
|
satellites contain annotation to be added to Bioseqs, linked by <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> number. Because there are
|
|
multiple such annotation satellite databases, more than one set of additional annotation may
|
|
be added to a Bioseq.</p><p>The SnpAnnot database contains feature information that is limited to simple <a class="def" href="/books/n/handbook/A1237/def-item/app115/">mutation</a>
|
|
information from dbSNP (<a href="/books/n/handbook/ch5/">Chapter 5</a>). The <a class="def" href="/books/n/handbook/A1237/def-item/app20/">CDD</a>
|
|
Annotation database contains feature information that is limited to protein domains for the
|
|
protein sequences known to ID. In both cases, these features might be added to <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>-curated
|
|
records by the PubSeqOS when the records are requested.</p><p>To visualize the role of replication, the rectangle in the middle of <a class="figpopup" href="/books/NBK21082/figure/ch13.F3/?report=objectonly" target="object" rid-figpopup="figch13F3" rid-ob="figobch13F3">Figure 3</a> represents the use of the <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a> Replication Server to copy
|
|
information from the loading side of the system to the query side.</p><p>Similar to IDProdOS, PubSeqOS is a open server (also called
|
|
“middleware”) that sits between the clients and the database system.
|
|
It hides details of the underlying complexity from the client <a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a>. It actually has an almost
|
|
identical code base as IDProdOS because they both serve similar functions. When a record is
|
|
requested in a format other than <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>, psansconvert is called to do the conversion. This
|
|
distinct <i>child</i> process allows both insulation from any possible instability
|
|
and allows for use of multiple central processing units (CPUs) in a natural way.</p><p>Note: The <i>child</i> process is a technical term used to describe a process
|
|
that is owned by and completely dependent on a parent process that initiated it.</p><p>At the query side are all records in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>, plus graveyards and EntrezControl, a special
|
|
database that is not queried by the public. EntrezControl is used to control the indexing of
|
|
blobs for Entrez. Its rows are initiated by a trigger that fires when rows are added by
|
|
replication to the IdMan database. A trigger is a special, database-stored procedure that
|
|
responds to changes in a database table. </p><p>The graveyards are databases that contain blobs that were replaced or taken over and
|
|
therefore no longer indexed in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>. Once replaced or taken over, blobs do not
|
|
change—which is the reason why they are limited to the query
|
|
side—but they are still retrievable by <a class="def" href="/books/n/handbook/A1237/def-item/app67/">GI</a> or other sequence identifier. </p></div><div id="bk_toc_contnr"></div></div></div>
|
|
<div class="post-content"><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div><div class="small"><span class="label">Bookshelf ID: NBK21082</span></div><div style="margin-top:2em" class="bk_noprnt"><a class="bk_cntns" href="/books/n/handbook/">Contents</a><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch12/" title="Previous page in this title">< Prev</a><a class="active page_link next" href="/books/n/handbook/ch14/" title="Next page in this title">Next ></a></div></div></div></div>
|
|
|
|
</div>
|
|
|
|
<!-- Custom content below content -->
|
|
<div class="col4">
|
|
|
|
</div>
|
|
|
|
|
|
<!-- Book content -->
|
|
|
|
<!-- Custom contetnt below bottom nav -->
|
|
<div class="col5">
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="rightcolumn" class="four_col col last">
|
|
<!-- Custom content above discovery portlets -->
|
|
<div class="col6">
|
|
<div id="ncbi_share_book"><a href="#" class="ncbi_share" data-ncbi_share_config="popup:false,shorten:true" ref="id=NBK21082&db=books">Share</a></div>
|
|
|
|
</div>
|
|
<div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Views</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="PDF_download" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="/books/NBK21082/?report=reader">PubReader</a></li><li><a href="/books/NBK21082/?report=printable">Print View</a></li><li><a data-jig="ncbidialog" href="#_ncbi_dlg_citbx_NBK21082" data-jigconfig="width:400,modal:true">Cite this Page</a><div id="_ncbi_dlg_citbx_NBK21082" style="display:none" title="Cite this Page"><div class="bk_tt">Sirotkin K, Tatusova T, Yaschenko E, et al. The Processing of Biological Sequence Data at NCBI. 2002 Oct 9 [Updated 2006 Mar 14]. In: McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. Chapter 13.<span class="bk_cite_avail"></span></div></div></li><li><a href="/books/NBK21082/pdf/Bookshelf_NBK21082.pdf">PDF version of this page</a> (317K)</li><li><a href="/books/n/handbook/pdf/">PDF version of this title</a> (7.2M)</li><li><a href="#" class="toggle-glossary-link" title="Enable/disable links to the glossary">Disable Glossary Links</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>In this Page</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="page-toc" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="#ch13.Overview" ref="log$=inpage&link_id=inpage">Overview</a></li><li><a href="#ch13.Data_Flow_Components" ref="log$=inpage&link_id=inpage">Data Flow Components</a></li><li><a href="#ch13.Data_Flow_Architectu" ref="log$=inpage&link_id=inpage">Data Flow Architecture</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Recent Activity</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="recent_activity" id="Shutter"></a></div><div class="portlet_content"><div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="HTDisplay" class=""><div class="action"><a href="javascript:historyDisplayState('ClearHT')">Clear</a><a href="javascript:historyDisplayState('HTOff')" class="HTOn">Turn Off</a><a href="javascript:historyDisplayState('HTOn')" class="HTOff">Turn On</a></div><ul id="activity"><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=1" href="/portal/utils/pageresolver.fcgi?recordid=67c825c6d5edb449bf4326fc">The Processing of Biological Sequence Data at NCBI - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The Processing of Biological Sequence Data at NCBI - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=2" href="/portal/utils/pageresolver.fcgi?recordid=67c825c5d5edb449bf432137">Sequin: A Sequence Submission and Editing Tool - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Sequin: A Sequence Submission and Editing Tool - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=3" href="/portal/utils/pageresolver.fcgi?recordid=67c825c4d5edb449bf431d15">Data Flow and Processing - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Data Flow and Processing - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=4" href="/portal/utils/pageresolver.fcgi?recordid=67c825c3d5edb449bf4315a7">The Major Histocompatibility Complex Database, dbMHC - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The Major Histocompatibility Complex Database, dbMHC - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=5" href="/portal/utils/pageresolver.fcgi?recordid=67c825c26d1ec11b6f5f12c9">The SKY/CGH Database for Spectral Karyotyping and Comparative Genomic Hybridizat...</a><div class="ralinkpop offscreen_noflow">The SKY/CGH Database for Spectral Karyotyping and Comparative Genomic Hybridization Data - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li></ul><p class="HTOn">Your browsing activity is empty.</p><p class="HTOff">Activity recording is turned off.</p><p id="turnOn" class="HTOff"><a href="javascript:historyDisplayState('HTOn')">Turn recording back on</a></p><a class="seemore" href="/sites/myncbi/recentactivity">See more...</a></div></div></div>
|
|
|
|
<!-- Custom content below discovery portlets -->
|
|
<div class="col7">
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Custom content after all -->
|
|
<div class="col8">
|
|
|
|
</div>
|
|
<div class="col9">
|
|
|
|
</div>
|
|
|
|
<script type="text/javascript" src="/corehtml/pmc/js/jquery.scrollTo-1.4.2.js"></script>
|
|
<script type="text/javascript">
|
|
(function($){
|
|
$('.skiplink').each(function(i, item){
|
|
var href = $($(item).attr('href'));
|
|
href.attr('tabindex', '-1').addClass('skiptarget'); // ensure the target can receive focus
|
|
$(item).on('click', function(event){
|
|
event.preventDefault();
|
|
$.scrollTo(href, 0, {
|
|
onAfter: function(){
|
|
href.focus();
|
|
}
|
|
});
|
|
});
|
|
});
|
|
})(jQuery);
|
|
</script>
|
|
</div>
|
|
<div class="bottom">
|
|
|
|
<div id="NCBIFooter_dynamic">
|
|
<!--<component id="Breadcrumbs" label="breadcrumbs"/>
|
|
<component id="Breadcrumbs" label="helpdesk"/>-->
|
|
|
|
</div>
|
|
|
|
<div class="footer" id="footer">
|
|
<section class="icon-section">
|
|
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
|
|
<div class="grid-container container">
|
|
<div class="icon-section_container">
|
|
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
|
<defs>
|
|
<style>
|
|
.cls-11 {
|
|
fill: #737373;
|
|
}
|
|
</style>
|
|
</defs>
|
|
<title>Twitter</title>
|
|
<path class="cls-11" d="M250.11,105.48c-7,3.14-13,3.25-19.27.14,8.12-4.86,8.49-8.27,11.43-17.46a78.8,78.8,0,0,1-25,9.55,39.35,39.35,0,0,0-67,35.85,111.6,111.6,0,0,1-81-41.08A39.37,39.37,0,0,0,81.47,145a39.08,39.08,0,0,1-17.8-4.92c0,.17,0,.33,0,.5a39.32,39.32,0,0,0,31.53,38.54,39.26,39.26,0,0,1-17.75.68,39.37,39.37,0,0,0,36.72,27.3A79.07,79.07,0,0,1,56,223.34,111.31,111.31,0,0,0,116.22,241c72.3,0,111.83-59.9,111.83-111.84,0-1.71,0-3.4-.1-5.09C235.62,118.54,244.84,113.37,250.11,105.48Z">
|
|
</path>
|
|
</svg></a>
|
|
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
|
<title>Facebook</title>
|
|
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
|
|
</path>
|
|
</svg></a>
|
|
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
|
<title>LinkedIn</title>
|
|
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
|
|
</path>
|
|
</svg></a>
|
|
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
|
<defs>
|
|
<style>
|
|
.cls-11,
|
|
.cls-12 {
|
|
fill: #737373;
|
|
}
|
|
|
|
.cls-11 {
|
|
fill-rule: evenodd;
|
|
}
|
|
</style>
|
|
</defs>
|
|
<title>GitHub</title>
|
|
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
|
|
</path>
|
|
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
|
|
</path>
|
|
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
|
|
</path>
|
|
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
|
|
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
|
|
</path>
|
|
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
|
|
</path>
|
|
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
|
|
</path>
|
|
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
|
|
</svg></a>
|
|
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
|
|
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
|
|
<defs><style>.cls-1{fill:#737373;}</style></defs>
|
|
<title>NCBI Insights Blog</title>
|
|
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
|
|
</svg>
|
|
</a>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
|
|
<section class="container-fluid bg-primary">
|
|
<div class="container pt-5">
|
|
<div class="row mt-3">
|
|
<div class="col-lg-3 col-12">
|
|
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
|
|
<ul class="list-inline social_media">
|
|
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
|
<style type="text/css">
|
|
.st20 {
|
|
fill: #FFFFFF;
|
|
}
|
|
|
|
.st30 {
|
|
fill: none;
|
|
stroke: #FFFFFF;
|
|
stroke-width: 8;
|
|
stroke-miterlimit: 10;
|
|
}
|
|
</style>
|
|
<title>Twitter</title>
|
|
<g>
|
|
<g>
|
|
<g>
|
|
<path class="st20" d="M192.9,88.1c-5,2.2-9.2,2.3-13.6,0.1c5.7-3.4,6-5.8,8.1-12.3c-5.4,3.2-11.4,5.5-17.6,6.7 c-10.5-11.2-28.1-11.7-39.2-1.2c-7.2,6.8-10.2,16.9-8,26.5c-22.3-1.1-43.1-11.7-57.2-29C58,91.6,61.8,107.9,74,116 c-4.4-0.1-8.7-1.3-12.6-3.4c0,0.1,0,0.2,0,0.4c0,13.2,9.3,24.6,22.3,27.2c-4.1,1.1-8.4,1.3-12.5,0.5c3.6,11.3,14,19,25.9,19.3 c-11.6,9.1-26.4,13.2-41.1,11.5c12.7,8.1,27.4,12.5,42.5,12.5c51,0,78.9-42.2,78.9-78.9c0-1.2,0-2.4-0.1-3.6 C182.7,97.4,189.2,93.7,192.9,88.1z"></path>
|
|
</g>
|
|
</g>
|
|
<circle class="st30" cx="124.4" cy="128.8" r="108.2"></circle>
|
|
</g>
|
|
</svg></a></li>
|
|
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
|
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
|
<style type="text/css">
|
|
.st10 {
|
|
fill: #FFFFFF;
|
|
}
|
|
|
|
.st110 {
|
|
fill: none;
|
|
stroke: #FFFFFF;
|
|
stroke-width: 8;
|
|
stroke-miterlimit: 10;
|
|
}
|
|
</style>
|
|
<title>Facebook</title>
|
|
<g>
|
|
<g>
|
|
<path class="st10" d="M159,99.1h-24V88.4c0-5,3.3-6.2,5.7-6.2h16.8V60l-24.4-0.1c-22.1,0-26.2,16.5-26.2,27.1v12.1H90v22.5h16.9 v67.5H135v-67.5h21.7L159,99.1z"></path>
|
|
</g>
|
|
</g>
|
|
<circle class="st110" cx="123.6" cy="123.2" r="108.2"></circle>
|
|
</svg>
|
|
</a></li>
|
|
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
|
<title>Youtube</title>
|
|
<style type="text/css">
|
|
.st4 {
|
|
fill: none;
|
|
stroke: #FFFFFF;
|
|
stroke-width: 8;
|
|
stroke-miterlimit: 10;
|
|
}
|
|
|
|
.st5 {
|
|
fill: #FFFFFF;
|
|
}
|
|
</style>
|
|
<circle class="st4" cx="124.2" cy="123.4" r="108.2"></circle>
|
|
<g transform="translate(0,-952.36218)">
|
|
<path class="st5" d="M88.4,1037.4c-10.4,0-18.7,8.3-18.7,18.7v40.1c0,10.4,8.3,18.7,18.7,18.7h72.1c10.4,0,18.7-8.3,18.7-18.7 v-40.1c0-10.4-8.3-18.7-18.7-18.7H88.4z M115.2,1058.8l29.4,17.4l-29.4,17.4V1058.8z"></path>
|
|
</g>
|
|
</svg></a></li>
|
|
</ul>
|
|
</div>
|
|
<div class="col-lg-3 col-12">
|
|
<p class="address_footer text-white">National Library of Medicine<br />
|
|
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
|
|
Bethesda, MD 20894</a></p>
|
|
</div>
|
|
<div class="col-lg-3 col-12 centered-lg">
|
|
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
|
|
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
|
|
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
|
|
</div>
|
|
<div class="col-lg-3 col-12 centered-lg">
|
|
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
|
|
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
|
|
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
|
|
</div>
|
|
</div>
|
|
<div class="row">
|
|
<div class="col-lg-12 centered-lg">
|
|
<nav class="bottom-links">
|
|
<ul class="mt-3">
|
|
<li>
|
|
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
|
|
</li>
|
|
<li>
|
|
<a class="text-white" href="https://www.nih.gov/">NIH</a>
|
|
</li>
|
|
<li>
|
|
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
|
|
</li>
|
|
<li>
|
|
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
|
|
</li>
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
|
|
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<!--/.page-->
|
|
</div>
|
|
<!--/.wrap-->
|
|
</div><!-- /.twelve_col -->
|
|
</div>
|
|
<!-- /.grid -->
|
|
|
|
<span class="PAFAppResources"></span>
|
|
|
|
<!-- BESelector tab -->
|
|
|
|
|
|
|
|
<noscript><img alt="statistics" src="/stat?jsdisabled=true&ncbi_db=books&ncbi_pdid=book-part&ncbi_acc=NBK21082&ncbi_domain=handbook&ncbi_report=record&ncbi_type=fulltext&ncbi_objectid=&ncbi_pcid=/NBK21082/&ncbi_pagename=The Processing of Biological Sequence Data at NCBI - The NCBI Handbook - NCBI Bookshelf&ncbi_bookparttype=chapter&ncbi_app=bookshelf" /></noscript>
|
|
|
|
|
|
<!-- usually for JS scripts at page bottom -->
|
|
<!--<component id="PageFixtures" label="styles"></component>-->
|
|
|
|
|
|
<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal106 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
|
|
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
|
|
|
|
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3879255/4121861/3501987/4008961/3893018/3821238/4062932/4209313/4212053/4076480/3921943/3400083/3426610.js" snapshot="books"></script></body>
|
|
</html> |