487 lines
No EOL
137 KiB
HTML
487 lines
No EOL
137 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||
|
||
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||
<!-- AppResources meta begin -->
|
||
<meta name="paf-app-resources" content="" />
|
||
<script type="text/javascript">var ncbi_startTime = new Date();</script>
|
||
|
||
<!-- AppResources meta end -->
|
||
|
||
<!-- TemplateResources meta begin -->
|
||
<meta name="paf_template" content="" />
|
||
|
||
<!-- TemplateResources meta end -->
|
||
|
||
<!-- Logger begin -->
|
||
<meta name="ncbi_db" content="books" /><meta name="ncbi_pdid" content="book-part" /><meta name="ncbi_acc" content="NBK21091" /><meta name="ncbi_domain" content="handbook" /><meta name="ncbi_report" content="record" /><meta name="ncbi_type" content="fulltext" /><meta name="ncbi_objectid" content="" /><meta name="ncbi_pcid" content="/NBK21091/" /><meta name="ncbi_pagename" content="The Reference Sequence (RefSeq) Database - The NCBI Handbook - NCBI Bookshelf" /><meta name="ncbi_bookparttype" content="chapter" /><meta name="ncbi_app" content="bookshelf" />
|
||
<!-- Logger end -->
|
||
|
||
<title>The Reference Sequence (RefSeq) Database - The NCBI Handbook - NCBI Bookshelf</title>
|
||
|
||
<!-- AppResources external_resources begin -->
|
||
<link rel="stylesheet" href="/core/jig/1.15.2/css/jig.min.css" /><script type="text/javascript" src="/core/jig/1.15.2/js/jig.min.js"></script>
|
||
|
||
<!-- AppResources external_resources end -->
|
||
|
||
<!-- Page meta begin -->
|
||
<meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX" /><meta name="citation_inbook_title" content="The NCBI Handbook [Internet]" /><meta name="citation_title" content="The Reference Sequence (RefSeq) Database" /><meta name="citation_publisher" content="National Center for Biotechnology Information (US)" /><meta name="citation_date" content="2012/04/06" /><meta name="citation_author" content="Kim Pruitt" /><meta name="citation_author" content="Garth Brown" /><meta name="citation_author" content="Tatiana Tatusova" /><meta name="citation_author" content="Donna Maglott" /><meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK21091/" /><link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" /><meta name="DC.Title" content="The Reference Sequence (RefSeq) Database" /><meta name="DC.Type" content="Text" /><meta name="DC.Publisher" content="National Center for Biotechnology Information (US)" /><meta name="DC.Contributor" content="Kim Pruitt" /><meta name="DC.Contributor" content="Garth Brown" /><meta name="DC.Contributor" content="Tatiana Tatusova" /><meta name="DC.Contributor" content="Donna Maglott" /><meta name="DC.Date" content="2012/04/06" /><meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK21091/" /><meta name="description" content="NCBI’s Reference Sequence (RefSeq) database is a collection of taxonomically diverse, non-redundant and richly annotated sequences representing naturally occurring molecules of DNA, RNA, and protein. Included are sequences from plasmids, organelles, viruses, archaea, bacteria, and eukaryotes. Each RefSeq is constructed wholly from sequence data submitted to the International Nucleotide Sequence Database Collaboration (INSDC). Similar to a review article, a RefSeq is a synthesis of information integrated across multiple sources at a given time. RefSeqs provide a foundation for uniting sequence data with genetic and functional information. They are generated to provide reference standards for multiple purposes ranging from genome annotation to reporting locations of sequence variation in medical records. The RefSeq collection is available without restriction and can be retrieved in several different ways, such as by searching or by available links in NCBI resources, including PubMed, Nucleotide, Protein, Gene, and Map Viewer, searching with a sequence via BLAST, and downloading from the RefSeq FTP site." /><meta name="og:title" content="The Reference Sequence (RefSeq) Database" /><meta name="og:type" content="book" /><meta name="og:description" content="NCBI’s Reference Sequence (RefSeq) database is a collection of taxonomically diverse, non-redundant and richly annotated sequences representing naturally occurring molecules of DNA, RNA, and protein. Included are sequences from plasmids, organelles, viruses, archaea, bacteria, and eukaryotes. Each RefSeq is constructed wholly from sequence data submitted to the International Nucleotide Sequence Database Collaboration (INSDC). Similar to a review article, a RefSeq is a synthesis of information integrated across multiple sources at a given time. RefSeqs provide a foundation for uniting sequence data with genetic and functional information. They are generated to provide reference standards for multiple purposes ranging from genome annotation to reporting locations of sequence variation in medical records. The RefSeq collection is available without restriction and can be retrieved in several different ways, such as by searching or by available links in NCBI resources, including PubMed, Nucleotide, Protein, Gene, and Map Viewer, searching with a sequence via BLAST, and downloading from the RefSeq FTP site." /><meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK21091/" /><meta name="og:site_name" content="NCBI Bookshelf" /><meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" /><meta name="twitter:card" content="summary" /><meta name="twitter:site" content="@ncbibooks" /><meta name="warning" content="This publication is provided for historical reference only and the information may be out of date." /><meta name="bk-non-canon-loc" content="/books/n/handbook/ch18/" /><link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK21091/" /><link rel="stylesheet" href="/corehtml/pmc/css/figpopup.css" type="text/css" media="screen" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_print.min.css" type="text/css" media="print" /><style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style><style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style><script type="text/javascript" src="/corehtml/pmc/js/jquery.hoverIntent.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/common.min.js?_=3.18"> </script><script type="text/javascript" src="/corehtml/pmc/js/large-obj-scrollbars.min.js"> </script><script type="text/javascript">window.name="mainwindow";</script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/book-toc.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/books.min.js"> </script><meta name="book-collection" content="NONE" />
|
||
|
||
<!-- Page meta end -->
|
||
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8D877F7C8055C10000000000CB0098.m_13" />
|
||
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3985586/3808861/4121862/3974050/3917732/251717/4216701/14534/45193/4113719/3849091/3984811/3751656/4033350/3840896/3577051/3852958/4008682/4207974/4206132/4062871/12930/3964959/3854974/36029/4128070/9685/3549676/3609192/3609193/3609213/3395586.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3411343/3882866.css" media="print" /></head>
|
||
<body class="book-part">
|
||
<div class="grid">
|
||
<div class="col twelve_col nomargin shadow">
|
||
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
|
||
<div class="sysmessages">
|
||
<noscript>
|
||
<p class="nojs">
|
||
<strong>Warning:</strong>
|
||
The NCBI web site requires JavaScript to function.
|
||
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
|
||
</p>
|
||
</noscript>
|
||
</div>
|
||
<!--/.sysmessage-->
|
||
<div class="wrap">
|
||
<div class="page">
|
||
<div class="top">
|
||
<div id="universal_header">
|
||
<section class="usa-banner">
|
||
<div class="usa-accordion">
|
||
<header class="usa-banner-header">
|
||
<div class="usa-grid usa-banner-inner">
|
||
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
|
||
<p>An official website of the United States government</p>
|
||
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
|
||
<span class="usa-banner-button-text">Here's how you know</span>
|
||
</button>
|
||
</div>
|
||
</header>
|
||
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
|
||
<div class="usa-banner-guidance-gov usa-width-one-half">
|
||
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
|
||
<div class="usa-media_block-body">
|
||
<p>
|
||
<strong>The .gov means it's official.</strong>
|
||
<br />
|
||
Federal government websites often end in .gov or .mil. Before
|
||
sharing sensitive information, make sure you're on a federal
|
||
government site.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
<div class="usa-banner-guidance-ssl usa-width-one-half">
|
||
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
|
||
<div class="usa-media_block-body">
|
||
<p>
|
||
<strong>The site is secure.</strong>
|
||
<br />
|
||
The <strong>https://</strong> ensures that you are connecting to the
|
||
official website and that any information you provide is encrypted
|
||
and transmitted securely.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<div class="usa-overlay"></div>
|
||
<header class="ncbi-header" role="banner" data-section="Header">
|
||
|
||
<div class="usa-grid">
|
||
<div class="usa-width-one-whole">
|
||
|
||
<div class="ncbi-header__logo">
|
||
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
|
||
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
|
||
</a>
|
||
</div>
|
||
|
||
<div class="ncbi-header__account">
|
||
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
|
||
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
|
||
<span class="fa fa-user" aria-hidden="true">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
|
||
<g style="fill: #fff">
|
||
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
|
||
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
|
||
</g>
|
||
</svg>
|
||
</span>
|
||
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
|
||
<span class="sr-only">Show account info</span>
|
||
</button>
|
||
</div>
|
||
|
||
<div class="ncbi-popup-anchor">
|
||
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
|
||
<div class="ncbi-popup-head">
|
||
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
|
||
<span class="fa fa-times">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
|
||
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
|
||
</svg>
|
||
</span>
|
||
<span class="usa-sr-only">Close</span></button>
|
||
<h4>Account</h4>
|
||
</div>
|
||
<div class="account-user-info">
|
||
Logged in as:<br />
|
||
<b><span class="username" id="uname_long">username</span></b>
|
||
</div>
|
||
<div class="account-links">
|
||
<ul class="usa-unstyled-list">
|
||
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
|
||
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
|
||
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
|
||
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</header>
|
||
<div role="navigation" aria-label="access keys">
|
||
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
|
||
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
|
||
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
|
||
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
|
||
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
|
||
</div>
|
||
<section data-section="Alerts">
|
||
<div class="ncbi-alerts-placeholder"></div>
|
||
</section>
|
||
</div>
|
||
<div class="header">
|
||
<div class="res_logo"><h1 class="res_name"><a href="/books/" title="Bookshelf home">Bookshelf</a></h1><h2 class="res_tagline"></h2></div>
|
||
<div class="search"><form method="get" action="/books/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="books" selected="selected" data-ac_dict="bookshelf-search">Books</option><option value="gene">Gene</option><option value="snp">SNP</option><option value="refseq" class="last">RefSeq</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books" data-ac_dict="bookshelf-search">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Books. Use up and down arrows to choose an item from the autocomplete." value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="dictionary:'bookshelf-search',disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'no'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class="searchlinks inline_list"><li>
|
||
<a href="/books/browse/">Browse Titles</a>
|
||
</li><li>
|
||
<a href="/books/advanced/">Advanced</a>
|
||
</li><li class="help">
|
||
<a href="/books/NBK3833/">Help</a>
|
||
</li><li class="disclaimer">
|
||
<a target="_blank" data-ga-category="literature_resources" data-ga-action="link_click" data-ga-label="disclaimer_link" href="https://www.ncbi.nlm.nih.gov/books/about/disclaimer/">Disclaimer</a>
|
||
</li></ul></div>
|
||
</div>
|
||
|
||
|
||
|
||
<!--<component id="Page" label="headcontent"/>-->
|
||
|
||
</div>
|
||
<div class="content">
|
||
<!-- site messages -->
|
||
<!-- Custom content 1 -->
|
||
<div class="col1">
|
||
|
||
</div>
|
||
|
||
<div class="container">
|
||
<div id="maincontent" class="content eight_col col">
|
||
<!-- Custom content in the left column above book nav -->
|
||
<div class="col2">
|
||
|
||
</div>
|
||
|
||
<!-- Book content -->
|
||
|
||
|
||
<!-- Custom content between navigation and content -->
|
||
<div class="col3">
|
||
|
||
</div>
|
||
|
||
<div class="document">
|
||
<div class="pre-content"><div><div class="bk_prnt"><p class="small">NCBI Bookshelf. A service of the National Library of Medicine, National Institutes of Health.</p><p>McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. </p></div><div class="bk_msg_box bk_bttm_mrgn clearfix bk_noprnt"><div class="iconblock clearfix"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook2e/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook2e-lrg.png" alt="Cover" height="100px" width="80px" /></a><div class="icnblk_cntnt"><ul class="messages"><li class="info icon"><span class="icon"><a href="/books/n/handbook2e/">See "The NCBI Handbook, 2nd Edition"</a></span></li></ul></div></div></div><div class="messagearea bk_noprnt" style="margin-bottom:1.3846em "><ul class="messages"><li class="warn icon"><span class="icon">This publication is provided for historical reference only and the information may be out of date.</span></li></ul></div><div class="bk_prnt"><p style="color:red;"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p></div><div class="iconblock clearfix whole_rhythm no_top_margin bk_noprnt"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" alt="Cover of The NCBI Handbook" height="100px" width="80px" /></a><div class="icnblk_cntnt eight_col"><h2>The NCBI Handbook [Internet].</h2><a data-jig="ncbitoggler" href="#__NBK21091_dtls__">Show details</a><div style="display:none" class="ui-widget" id="__NBK21091_dtls__"><div>McEntyre J, Ostell J, editors.</div><div>Bethesda (MD): <a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&targetsite=external&targetcat=link&targettype=publisher">National Center for Biotechnology Information (US)</a>; 2002-.</div></div><div class="half_rhythm"><ul class="inline_list"><li style="margin-right:1em"><a class="bk_cntns" href="/books/n/handbook/">Contents</a></li></ul></div></div><div class="icnblk_cntnt two_col"><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch17/" title="Previous page in this title">< Prev</a><a class="active page_link next" href="/books/n/handbook/ch19/" title="Next page in this title">Next ></a></div></div></div></div></div>
|
||
<div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><h1 id="_NBK21091_"><span class="label">Chapter 18</span><span class="title" itemprop="name">The Reference Sequence (RefSeq) Database</span></h1><p class="contrib-group"><span itemprop="author">Kim Pruitt</span>, <span itemprop="author">Garth Brown</span>, <span itemprop="author">Tatiana Tatusova</span>, and <span itemprop="author">Donna Maglott</span>.</p><p class="small">Created: <span itemprop="datePublished">October 9, 2002</span>; Last Update: <span itemprop="dateModified">April 6, 2012</span>.</p><p><em>Estimated reading time: 24 minutes</em></p></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><div id="ch18.Summary"><h2 id="_ch18_Summary_">Summary</h2><p><a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>’s Reference Sequence (<a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a>) database is a collection of taxonomically diverse, non-redundant and richly annotated sequences representing naturally occurring molecules of <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a>, and protein. Included are sequences from plasmids, organelles, viruses, archaea, bacteria, and eukaryotes. Each RefSeq is constructed wholly from sequence data submitted to the International Nucleotide Sequence Database Collaboration (INSDC). Similar to a review article, a RefSeq is a synthesis of information integrated across multiple sources at a given time. RefSeqs provide a foundation for uniting sequence data with genetic and functional information. They are generated to provide reference standards for multiple purposes ranging from genome annotation to reporting locations of sequence variation in medical records. The RefSeq collection is available without restriction and can be retrieved in several different ways, such as by searching or by available links in NCBI resources, including <a href="/sites/entrez?db=pubmed" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">PubMed</a>, <a href="/entrez/query.fcgi?db=Nucleotide" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Nucleotide</a>, <a href="/entrez/query.fcgi?db=Protein" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein</a>, <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a>, and <a href="/mapview/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Map Viewer</a>, searching with a sequence via <a href="/BLAST/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BLAST</a>, and downloading from the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">RefSeq FTP</a> site.</p><p>This chapter describes:</p><ul><li class="half_rhythm"><div>The database content</div></li><li class="half_rhythm"><div>How data are assembled and maintained</div></li><li class="half_rhythm"><div>How RefSeqs can be accessed and retrieved</div></li></ul></div><div id="ch18.Introduction"><h2 id="_ch18_Introduction_">Introduction</h2><p><a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>’s Reference Sequence (<a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a>) collection is a freely accessible database of naturally occurring <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a>, and protein sequences. It is a unique resource because it provides a large, multi-species, curated sequence database representing separate but explicitly linked records from genomes to transcripts and translation products, as appropriate. Unlike the sequence redundancy found in the public sequence repositories that comprise the <a href="http://www.insdc.org" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a>, (<i>i.e.</i>, NCBI’s <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a>, the <a href="http://www.ebi.ac.uk/ena/home" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">European Nucleotide Archive</a> [ENA], and the <a href="http://www.ddbj.nig.ac.jp/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">DNA Data Bank of Japan</a> [<a class="def" href="/books/n/handbook/A1237/def-item/app35/">DDBJ</a>]), the RefSeq collection aims to provide, for each included species, a complete set of non-redundant, extensively cross-linked, and richly annotated nucleic acid and protein records. It is recognized, however, that the coverage and finishing of public sequence data varies from organism to organism so intermediate genomic records are provided in some circumstances.</p><p>The non-redundant nature of the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> collection facilitates database inquiries based on genomic location, sequence, or text annotation. Be aware, however, that the RefSeq collection does include alternatively spliced transcripts encoding the same protein or distinct protein isoforms, in addition to orthologs, paralogs, and alternative haplotypes for some organisms, which will affect the outcome of a database query.</p><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records are based on sequence records submitted to the <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a>. However, the RefSeq collection is a distinct database. The public archival databases house sequences and annotations supplied by original authors and cannot be altered by others. The RefSeq collection differs from the archival databases in the same way that a review article differs from a related collection of primary research articles on the same subject. Each RefSeq record represents a synthesis, by a person or group, of the primary information that was generated and submitted by others. Other organizing principles or standards of judgment are possible, which is why the work is attributed to the synthesizing "editors". The RefSeq dataset is curated on an ongoing basis by collaborating groups and by <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> staff. Sequence records are presented in a standard format and subjected to computational validation. The <a href="http://www.insdc.org" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> source of the RefSeq record, the curation status, and attribution to the curation group are also indicated.</p><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> collection establishes a useful baseline for integrating diverse data types, including sequence, genetic, expression, and functional information, into one consistent framework with a uniform set of conventions and standards. The RefSeq collection supports the following activities:</p><ul><li class="half_rhythm"><div>genome annotation</div></li><li class="half_rhythm"><div>gene characterization</div></li><li class="half_rhythm"><div>comparative genomics</div></li><li class="half_rhythm"><div>reporting sequence variation, and</div></li><li class="half_rhythm"><div>expression studies</div></li></ul></div><div id="ch18.Database_Content_Background"><h2 id="_ch18_Database_Content_Background_">Database Content: Background</h2><p>The May 2011 <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> collection (Release 47) includes sequences from more than 12,000 distinct taxonomic identifiers, ranging from viruses to bacteria to eukaryotes. It represents chromosomes, organelles, plasmids, viruses, transcripts, and more than 12.6 million proteins. Every sequence has a stable accession number, a version number, and an integer identifier (gi) assigned to it. Outdated versions are always available if a sequence is updated. RefSeq records can be distinguished from <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> records by the inclusion of an underscore (“_”) at the third position of the accession number. The RefSeq accession prefix has an implied meaning in terms of the type of molecule it represents, as outlined in <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" rid-figpopup="figch18Trefseqaccessionnumbersandmole" rid-ob="figobch18Trefseqaccessionnumbersandmole">Table 1</a>.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Trefseqaccessionnumbersandmole"><a href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" title="Table 1. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Trefseqaccessionnumbersandmole" rid-ob="figobch18Trefseqaccessionnumbersandmole"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=thumb" src-large="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=previmg" alt="Table 1. . RefSeq accession numbers and molecule types." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.refseq_accession_numbers_and_mole"><a href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" rid-ob="figobch18Trefseqaccessionnumbersandmole">Table 1. </a></h4><p class="float-caption no_bottom_margin">RefSeq accession numbers and molecule types. </p></div></div><div id="ch18.Updates"><h3>Updates</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> updates are provided daily. These include new records added to the collection, and records updated to reflect sequence or annotation changes, including complete re-annotation of a genome. New and updated records are made available in <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> and <a href="/BLAST/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BLAST</a> databases as soon as possible. The <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">RefSeq FTP site</a> also provides daily update information.</p></div><div id="ch18.Flat_File_Format_and_Annotated_Feat"><h3>Flat File Format and Annotated Features</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records appear similar in format to <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> records. Attributes novel to RefSeq records include a unique accession prefix followed by an underscore (<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" rid-figpopup="figch18Trefseqaccessionnumbersandmole" rid-ob="figobch18Trefseqaccessionnumbersandmole">Table 1</a>) and a <span class="bk_pgobj">COMMENT</span> field that indicates the RefSeq <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> and the <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> source of the sequence information (Figures <a class="figpopup" href="/books/NBK21091/figure/ch18.F1A/?report=objectonly" target="object" rid-figpopup="figch18F1A" rid-ob="figobch18F1A">1A</a>, <a class="figpopup" href="/books/NBK21091/figure/ch18.F1B/?report=objectonly" target="object" rid-figpopup="figch18F1B" rid-ob="figobch18F1B">1B</a>, <a class="figpopup" href="/books/NBK21091/figure/ch18.F1C/?report=objectonly" target="object" rid-figpopup="figch18F1C" rid-ob="figobch18F1C">1C</a>, and <a class="figpopup" href="/books/NBK21091/figure/ch18.F1D/?report=objectonly" target="object" rid-figpopup="figch18F1D" rid-ob="figobch18F1D">1D</a>). For human RefSeqs, the <span class="bk_pgobj">COMMENT</span> field also indicates whether the RefSeq is a reference standard from the <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> project. Some RefSeq records may include feature annotations or database cross-references (db_xrefs) that are not seen in the underlying <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> record. This annotation is provided by computation and by manual curation. For example, nucleotide variation, <a class="def" href="/books/n/handbook/A1237/def-item/app173/">STS</a>, and tRNA features are computed for a subset of RefSeq entries using the data available in <a href="/projects/SNP/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">dbSNP</a> (<a href="/books/n/handbook/ch5/">Chapter 5</a>), <a href="/entrez/query.fcgi?db=unists" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">UniSTS</a>, and through tRNA-scan prediction (<a href="/pubmed?term=9023104%5buid%5d" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Lowe and Eddy, 1997</a>). For human and mouse, <a class="def" href="/books/n/handbook/A1237/def-item/app50/">exon</a> feature annotation is also calculated for RefSeq transcript and non-transcribed <a class="def" href="/books/n/handbook/A1237/def-item/app147/">pseudogene</a> records. Db_xrefs provide links to <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a>, nomenclature authorities, such as the HUGO Gene Nomenclature Committee (<a href="http://www.genenames.org/index.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">HGNC</a>) for human RefSeq records, and to the Consensus <a class="def" href="/books/n/handbook/A1237/def-item/app22/">CDS</a> (<a href="/projects/CCDS/CcdsBrowse.cgi" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">CCDS</a>) project. RefSeq proteins also report conserved domains computed by <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>'s <a href="/entrez/query.fcgi?db=cdd" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Conserved Domain Database</a> (<a href="/books/n/handbook/ch3/">Chapter 3</a>). Additional protein features are propagated from the corresponding <a href="http://www.uniprot.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">UniProtKB/Swiss-Prot</a> records for a subset of species. Other nucleotide and protein features, publications, and comments may be added by collaborating groups or NCBI staff.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Trefseqstatuscodes"><a href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" title="Table 2. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=thumb" src-large="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=previmg" alt="Table 2. . RefSeq status codes." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.refseq_status_codes"><a href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-ob="figobch18Trefseqstatuscodes">Table 2. </a></h4><p class="float-caption no_bottom_margin">RefSeq status codes. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F1A" co-legend-rid="figlgndch18F1A"><a href="/books/NBK21091/figure/ch18.F1A/?report=objectonly" target="object" title="Figure 1A. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F1A" rid-ob="figobch18F1A"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image001.gif" src-large="/books/NBK21091/bin/ch18-Image001.jpg" alt="Figure 1A. . Features of a RefSeq record." /></a><div class="icnblk_cntnt" id="figlgndch18F1A"><h4 id="ch18.F1A"><a href="/books/NBK21091/figure/ch18.F1A/?report=objectonly" target="object" rid-ob="figobch18F1A">Figure 1A. </a></h4><p class="float-caption no_bottom_margin">Features of a RefSeq record. The beginning of a RefSeq record when displayed in the GenBank flat file format is shown. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F1B" co-legend-rid="figlgndch18F1B"><a href="/books/NBK21091/figure/ch18.F1B/?report=objectonly" target="object" title="Figure 1B. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F1B" rid-ob="figobch18F1B"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image002.gif" src-large="/books/NBK21091/bin/ch18-Image002.jpg" alt="Figure 1B. . The COMMENT and PRIMARY sections." /></a><div class="icnblk_cntnt" id="figlgndch18F1B"><h4 id="ch18.F1B"><a href="/books/NBK21091/figure/ch18.F1B/?report=objectonly" target="object" rid-ob="figobch18F1B">Figure 1B. </a></h4><p class="float-caption no_bottom_margin">The COMMENT and PRIMARY sections. The gene Summary is provided for RefSeqs with a REVIEWED status only. The PRIMARY block, providing the RefSeq assembly details, is displayed for vertebrate records predominantly. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F1C" co-legend-rid="figlgndch18F1C"><a href="/books/NBK21091/figure/ch18.F1C/?report=objectonly" target="object" title="Figure 1C. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F1C" rid-ob="figobch18F1C"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image003.gif" src-large="/books/NBK21091/bin/ch18-Image003.jpg" alt="Figure 1C. . The FEATURES section." /></a><div class="icnblk_cntnt" id="figlgndch18F1C"><h4 id="ch18.F1C"><a href="/books/NBK21091/figure/ch18.F1C/?report=objectonly" target="object" rid-ob="figobch18F1C">Figure 1C. </a></h4><p class="float-caption no_bottom_margin">The FEATURES section. Only a subset of the available feature annotation is shown. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F1D" co-legend-rid="figlgndch18F1D"><a href="/books/NBK21091/figure/ch18.F1D/?report=objectonly" target="object" title="Figure 1D. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F1D" rid-ob="figobch18F1D"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image004.gif" src-large="/books/NBK21091/bin/ch18-Image004.jpg" alt="Figure 1D. . NCBI’s Sequence Viewer." /></a><div class="icnblk_cntnt" id="figlgndch18F1D"><h4 id="ch18.F1D"><a href="/books/NBK21091/figure/ch18.F1D/?report=objectonly" target="object" rid-ob="figobch18F1D">Figure 1D. </a></h4><p class="float-caption no_bottom_margin">NCBI’s Sequence Viewer. The annotated features on a RefSeq record can be displayed in a graphical format (note the link ‘Graphics’ in Figure 1A). The display can be modified by following the ‘Configure’ link. The <a href="/books/NBK21091/figure/ch18.F1D/?report=objectonly" target="object" rid-ob="figobch18F1D">(more...)</a></p></div></div></div></div><div id="ch18.Assembling_and_Maintaining_the_RefS"><h2 id="_ch18_Assembling_and_Maintaining_the_RefS_">Assembling and Maintaining the RefSeq Collection</h2><div id="ch18.Summary_1"><h3>Summary</h3><p>The <a href="/RefSeq/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeq</a> collection is the result of data extraction from <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submissions, curation, and computation, combined with extensive collaboration with authoritative groups. Each molecule is annotated as accurately as possible with the organism name, strain (or breed, ecotype, cultivar, or isolate), gene symbol for that organism, and informative protein name. Collaborations with authoritative groups outside of <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> provide a variety of information, including curated sequence data, nomenclature, feature annotations, and links to external organism-specific resources. When no collaboration has been established, NCBI staff assembles the data from the <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submission. Each record has a <span class="bk_pgobj">COMMENT</span>, indicating the level of curation that it has received (<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">Table 2</a>), and attribution of the collaborating group. Thus, a RefSeq record may be an essentially unchanged, validated copy of the original <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submission, or include updated or additional information supplied by collaborators or NCBI staff.</p><p>If multiple <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submissions represent the same molecule for an organism, the "best" sequence is chosen to represent as the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> record. Known mutations, sequencing errors, cloning artifacts and erroneous annotation are avoided. Sequences are validated to confirm that the genomic sequence corresponding to an annotated <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> feature matches the mRNA sequence record, and that coding region features translate into the corresponding protein sequence.</p><p>Working groups using distinct process pipelines compile the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> collection for different organisms (<a class="figpopup" href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" rid-figpopup="figch18F2" rid-ob="figobch18F2">Figure 2</a>). RefSeq records are provided via several distinct approaches including:</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F2" co-legend-rid="figlgndch18F2"><a href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" title="Figure 2. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F2" rid-ob="figobch18F2"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image005.gif" src-large="/books/NBK21091/bin/ch18-Image005.jpg" alt="Figure 2. . RefSeq Processing Pipelines." /></a><div class="icnblk_cntnt" id="figlgndch18F2"><h4 id="ch18.F2"><a href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" rid-ob="figobch18F2">Figure 2. </a></h4><p class="float-caption no_bottom_margin">RefSeq Processing Pipelines. Sequence data deposited in the public archival databases is available for RefSeq processing. Processing pipelines include the vertebrate curation pipeline, the computational genome annotation pipeline, and extraction from <a href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" rid-ob="figobch18F2">(more...)</a></p></div></div><ul><li class="half_rhythm"><div>collaboration</div></li><li class="half_rhythm"><div>extraction from <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a></div></li><li class="half_rhythm"><div>computational genome annotation pipeline</div></li><li class="half_rhythm"><div>curation by <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> staff</div></li></ul></div><div id="ch18.Collaboration"><h3>Collaboration</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> welcomes collaborations with authoritative groups outside of <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> that are willing to provide sequences, nomenclature, annotation, or links to phenotypic or organism-specific resources. The RefSeq <a href="/RefSeq/update.cgi" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">feedback form</a> can be used to provide corrections or to initiate collaboration. The extent of collaboration may vary. For some species, the sequences and annotation of the entire RefSeq collection is provided by a collaborating authoritative group (see <a class="figpopup" href="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=objectonly" target="object" rid-figpopup="figch18Texamplesofcollaboratorswhocon" rid-ob="figobch18Texamplesofcollaboratorswhocon">Table 3</a> for examples). For others, most notably the human and mouse RefSeq collections, numerous collaborations with individual scientists contribute to the representation of specific genes or complete gene families. Nomenclature for human and mouse is also provided via collaboration with the HUGO Gene Nomenclature Committee (<a href="http://www.genenames.org/index.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">HGNC</a>) and the Mouse Genome Informatics group (<a href="http://www.informatics.jax.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">MGI</a>), respectively; <a class="figpopup" href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object" rid-figpopup="figch18Texamplesofcollaboratinggroups" rid-ob="figobch18Texamplesofcollaboratinggroups">Table 4</a> provides additional examples. Other collaborations extend across entire sets of organisms; for example, a board of <a href="/genomes/GenomesHome.cgi?taxid=10239&hopt=advisors" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Viral Genomes Advisors</a> supports curation of the viral RefSeq collection. Thus, RefSeq records may contain information provided by an external authoritative source and/or analyses and curation at NCBI. The collaborating group is identified on the record.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Texamplesofcollaboratorswhocon"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=objectonly" target="object" title="Table 3. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Texamplesofcollaboratorswhocon" rid-ob="figobch18Texamplesofcollaboratorswhocon"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=thumb" src-large="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=previmg" alt="Table 3. . Examples of collaborators who contribute RefSeq records." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.examples_of_collaborators_who_con"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=objectonly" target="object" rid-ob="figobch18Texamplesofcollaboratorswhocon">Table 3. </a></h4><p class="float-caption no_bottom_margin">Examples of collaborators who contribute RefSeq records. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Texamplesofcollaboratinggroups"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object" title="Table 4. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Texamplesofcollaboratinggroups" rid-ob="figobch18Texamplesofcollaboratinggroups"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=thumb" src-large="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=previmg" alt="Table 4. " /></a><div class="icnblk_cntnt"><h4 id="ch18.T.examples_of_collaborating_groups"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object" rid-ob="figobch18Texamplesofcollaboratinggroups">Table 4. </a></h4><p class="float-caption no_bottom_margin">Examples of collaborating groups </p></div></div><p>Processing of <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records supplied entirely by an external group is largely automated. The sequence and/or annotation is periodically submitted, validated to detect conflicts in the annotation, and modified slightly to format the submission as a RefSeq record, including addition of db_xrefs to <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a>. <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> staff do not directly curate the annotation or modify the sequence of RefSeq records provided by collaborating groups. Any problems identified by the validation process or by the scientific community are reported to the submitting group, and any update made to the annotation or sequence is reflected in a future RefSeq release.</p></div><div id="ch18.Extraction_from_GenBank_records"><h3>Extraction from GenBank records</h3><p>Complete genome data for viruses, organelles, prokaryotes, and some eukaryotes is propagated to <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records from the whole genome sequence data and annotation available in <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> (also in the ENA and <a class="def" href="/books/n/handbook/A1237/def-item/app35/">DDBJ</a> public archives). Generally, an initial validation step is performed before the RefSeq record is made public. The resulting RefSeq record is a copy of the <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission but may contain some additional annotations as a result of the validation step. In particular, transcripts are provided as separate RefSeq records for most eukaryotic organisms; the <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission of the genome sequence from which the RefSeq record is propagated instantiates the protein only, not the transcript.</p><p>This process flow is supported by the <a href="/bioproject" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BioProject</a> and <a href="/entrez/query.fcgi?db=genome" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Genome</a> databases. The <a href="/bioproject" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BioProject</a> database tracks the status of whole-genome sequencing projects submitted to <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a>, other types of large-scale projects, and provides an overview of the organism and links to data and other resources. The resulting genomic <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> data is represented in the <a href="/sites/genome/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Genome</a> database, which includes bacteria, archaea, eukaryotes, viroids, viruses, plasmids, and organelles. The <a href="/sites/genome/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Genome</a> website provides custom displays, analysis, and tools for prokaryotic and some eukaryotic genomes (see <a class="figpopup" href="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=objectonly" target="object" rid-figpopup="figch18Tselectedentrezgenomeresources" rid-ob="figobch18Tselectedentrezgenomeresources">Table 5</a>).</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Tselectedentrezgenomeresources"><a href="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=objectonly" target="object" title="Table 5. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Tselectedentrezgenomeresources" rid-ob="figobch18Tselectedentrezgenomeresources"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=thumb" src-large="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=previmg" alt="Table 5. . Selected Entrez Genome resources." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.selected_entrez_genome_resources"><a href="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=objectonly" target="object" rid-ob="figobch18Tselectedentrezgenomeresources">Table 5. </a></h4><p class="float-caption no_bottom_margin">Selected Entrez Genome resources. </p></div></div><p>Note that processing of most eukaryotic genomes is more complex, requires more than basic extraction from <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a>, and occurs independently, largely because the volume of data is significantly greater.</p><p>Extraction of <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> whole genome data for processing into <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records falls into four primary categories: <a href="#ch18.Chromosomes">chromosomes</a>, <a href="#ch18.Microbial_genomes">microbial genomes</a>, <a href="#ch18.Small_complete_genomes">small complete genomes</a>, <a href="#ch18.Targeted_loci">and targeted loci</a>.</p><div id="ch18.Chromosomes"><h4>Chromosomes</h4><p>Complete chromosome sequence assembled from individual clones (that are themselves available from the <a href="http://www.insdc.org" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a>) is propagated into a <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> record. For some genomes, the RefSeq representation uses a unit of interest to the research community; for example, some of the RefSeq genomic records for <i>Drosophila melanogaster</i> represent chromosome arms rather than complete chromosomes. RefSeq records may also be available for some genomes that are not yet fully sequenced but for which complete sequence is available for individual chromosomes. These complete chromosome RefSeq records may be annotated by the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> computational annotation pipeline, or they may be curated by an organism-specific collaborating group and undergo NCBI validation before being released.</p></div><div id="ch18.Microbial_genomes"><h4>Microbial genomes</h4><p>For microbial species, historically all complete and draft genomes submitted to <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> were propagated to the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> collection. This is no longer tenable because of the volume of genomic data being generated, so additional RefSeq records are created from new <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submissions only to span the taxonomic diversity; this means in general, one genomic RefSeq per species is provided. If significant sequence diversity exists, or if subspecies or subgroups require representation as determined by <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> staff, more than one RefSeq may exist for a given species.</p></div><div id="ch18.Small_complete_genomes"><h4>Small complete genomes</h4><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records representing organelle, viral, and plasmid genomes are based on single <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> records. For organelle and viral genomes, if more than one <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission is available for a species, typically only one is chosen to propagate to the RefSeq collection. Various factors, including the level of annotation, strain information, and community input are considered when deciding which <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission to represent. There is no plasmid taxonomy; a <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission is propagated to the RefSeq collection if it is part of a larger registered genome sequencing project, or if it exhibits significant sequence divergence when compared to other plasmids.</p></div><div id="ch18.Targeted_loci"><h4>Targeted loci</h4><p>The <a href="/genomes/static/refseqtarget.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeq Targeted Loci Project</a> is a collaborative effort to curate and maintain molecular markers of use in the identification and classification of organisms. The initial focus is on ribosomal RNAs, although expansion to other informative sequences is anticipated. From <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submissions, the project creates RefSeq records for the small subunit of ribosomal <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a> (16S in prokaryotes and 18S in eukaryotes) and the large subunit ribosomal RNA (23S in prokaryotes and 28S in eukaryotes). As of November 2010, there are 3331 16S rDNA RefSeq records from bacteria and archaea and 137 18S rDNA, and 97 28S rDNA RefSeq records from fungi.</p></div></div><div id="ch18.Computational_Genome_Annotation_Pip"><h3>Computational Genome Annotation Pipeline</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> computes annotation of genomic sequence data for some genomes including some microbes, vertebrates (<i>e.g.</i>, human, mouse, rat, cow, and zebrafish, and others) and invertebrates (<i>e.g.</i>, honey bee, acorn worm, and pea aphid). The annotation pipeline is automated and yields genomic, transcript, and protein (when appropriate) <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records. Names annotated on the transcript and protein products are based on sequence similarity. Annotation data are refreshed periodically, and records generated from this process flow are not curated or updated between annotation runs (see <a href="/books/n/handbook/ch14/">Chapter 14</a> for more information on the eukaryotic genome annotation pipeline; information about NCBI’s prokaryotic annotation pipeline is also <a href="/genomes/static/Pipeline.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">available</a>). For some species, including human, RefSeq records may be provided by a mixture of methods. In other words, there may be a set of curated transcript and protein records (see the following section) in addition to a set of records generated computationally. RefSeq records that are processed by NCBI's pipelines are displayed in the NCBI <a href="/mapview/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Map Viewer</a> (<a href="/books/n/handbook/ch20/">Chapter 20</a>), included in <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a>, and are available in NCBI’s sequence databases.</p></div><div id="ch18.Curation_by_NCBI_Staff"><h3>Curation by NCBI Staff</h3><p>A portion of the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> dataset is curated by <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> staff. This subset includes viral, mitochondrial, vertebrate, and some invertebrate organisms. Most bacterial, plant, and fungal records are provided either by collaboration or by processing the annotated genome data submitted to the <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a>; however, a small number of bacterial genomes are annotated and curated by NCBI staff.</p><div id="ch18.Curation_of_Microbial_Viral_and_Mit"><h4>Curation of Microbial, Viral, and Mitochondrial RefSeqs</h4><p>Microbial, viral, and metazoan mitochondrial <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records are validated for content propagated from the original <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission, including taxonomy, publications, and annotation, prior to becoming public. This content may be modified, augmented, or deleted by <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> curation staff.</p><p>For microbial genomes, a set of minimal annotation standards (described <a href="/genomes/AnnotationWorkshop.html#refAD" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">here</a>) are automatically provided on all legacy and new <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records. These include ribosomal RNAs, transfer RNAs, and protein-coding genes with locus_tags. Ribosomal RNAs are predicted using BLASTn tools against an <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a> sequence database and/or using Infernal (Eddy, 2002) and Rfam models (Griffiths-Jones, et al, 2003). Transfer RNAs are predicted using tRNAscan-SE (Lowe and Eddy, 1997). Other annotation above the minimum standards may be added based on an external source or literature review. Annotation associated with the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>’s <a href="/proteinclusters" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein Clusters</a> database is also propagated to the RefSeq records (both proteins and genes) at selected intervals. The <a href="/proteinclusters" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein Clusters</a> database is a collection of RefSeq proteins from complete genomes broadly organized into the following groups: archeal and bacterial genomes and plasmids, viruses, protists, plants, and chloroplasts and mitochondria, and annotated based on sequence similarity and protein function. This clustering allows the entire group to be curated as a single set, permitting well characterized proteins to seed the annotation of less studied ones within the same <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a>. NCBI staff use literature and information from other databases, including <a href="http://www.uniprot.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">UniProtKB/Swiss-Prot</a>, to annotate each cluster with standardized protein names, biochemical descriptions, and other data, which is then transferred to individual proteins within the relevant RefSeq records. A microbial genome RefSeq record typically has a <span class="bk_pgobj">PROVISIONAL</span> review <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a>.</p><p>Annotation of viral genomes relies on an established group of <a href="/genomes/GenomesHome.cgi?taxid=10239&hopt=advisors" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Viral RefSeq Genome Advisors</a>, members of the <a href="http://www.ictvonline.org/index.asp?bhcp=1" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">International Committee on the Taxonomy of Viruses</a>, and other experts outside of <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>. For example, the <a class="def" href="/books/n/handbook/A1237/def-item/app70/">HIV</a>-1 RefSeq (<a href="/nuccore/NC_001802" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">NC_001802</a>) was curated by NCBI staff in collaboration with the authors of the book <a href="/books/NBK19376/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Retroviruses</a>, and many of the adenovirus and herpesvirus records have been curated by outside experts. Based on literature review, NCBI curators may modify the <a class="def" href="/books/n/handbook/A1237/def-item/app22/">CDS</a> and <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a> annotation compared to the <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission, as was done for the Measles virus RefSeq record (<a href="/nuccore/NC_001498" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">NC_001498</a>). Additional NCBI resources used during the curation of viral RefSeq records include the <a href="/proteinclusters" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein Clusters</a> database and <a href="/sutils/pasc/viridty.cgi?textpage=documentation#refAA" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">PASC</a>, a virus classification tool used to validate the taxonomy of virus RefSeq records across a number of taxonomic families. NCBI also maintains several specialized annotation pipelines for use in the <a href="/genomes/VirusVariation/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Virus Variation</a> and <a href="/genomes/FLU/FLU.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Influenza Virus</a> resources. Manually curated viral RefSeq records are annotated with a <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">REVIEWED</span> or <span class="bk_pgobj">VALIDATED</span> in the RefSeq COMMENT block.</p><p>For metazoan mitochondrial <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records, standardized protein, gene, and <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a> names are annotated independent of species-specific nomenclature guidelines. Additional curation may include adding common names or missing tRNAs and adjusting the coding region spans based on the <a href="/sites/entrez?db=proteinclusters" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein Clusters</a> database. Curated metazoan mitochondrial records are annotated with a <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">REVIEWED</span>. Non-metazoan and plant chloroplast RefSeq records are not curated, are derived entirely from the original <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submission, and have a <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">PROVISIONAL</span>.</p><p>For targeted loci, vector or primer sequence from the <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission is excluded from the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> record. Any feature annotation may be modified to represent a standard format, and collection identifiers and publications referencing the original <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> submission may be added.</p></div><div id="ch18.Curation_of_Vertebrate_and_Inverteb"><h4>Curation of Vertebrate and Invertebrate Records</h4><p>Curation of higher eukaryotic organisms is focused on mammalian genomes, especially human and mouse, but also includes many other species with existing or planned genome assemblies. The <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> processing for these organisms provides transcripts and protein records as well as some genomic region records representing gene clusters or pseudogenes; these genomic region records facilitate genome-wide annotation. Because RefSeq uses evidence independent of a genome assembly to represent RNAs and proteins, the dataset can represent sequence not currently part of that genome assembly. RefSeq processing integrates the official nomenclature and other information, including alternate names, <a href="http://www.geneontology.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene Ontology</a> (GO) terms, and literature and <a href="/projects/GeneRIF/GeneRIFhelp.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GeneRIFs</a> available in <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a>. Multiple collaborations support the collection of this descriptive information (<a class="figpopup" href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object" rid-figpopup="figch18Texamplesofcollaboratinggroups" rid-ob="figobch18Texamplesofcollaboratinggroups">Table 4</a>; see also <a href="/books/n/handbook/ch19/">Chapter 19</a>).</p><p>Sequences enter <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> curation processing by a combination of computational analysis, collaboration, and in-house curation. As illustrated in <a class="figpopup" href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" rid-figpopup="figch18F2" rid-ob="figobch18F2">Figure 2</a>, generation of the initial RefSeq record depends on identifying a representative sequence for a gene. New genes and sequence data are added to the in-house version of the <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> database by RefSeq curators, collaborators, <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>’s genome annotation pipeline, and NCBI-based mining of <a href="/entrez/query.fcgi?db=unigene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">UniGene</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app21/">cDNA</a> alignments, and <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submissions. Quality assessment (QA) processes are executed regularly to identify questionable data for review. These assessments include analysis of nomenclature, sequence similarity, genomic placement, and potential cloning errors (<i>e.g.</i>, chimeras). The QA steps also leverage data from other NCBI resources, including <a href="/entrez/query.fcgi?db=homologene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">HomoloGene</a>, <a href="/mapview/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Map Viewer</a>, and <a href="/genbank/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank</a> related sequences. Data conflicts must be resolved before the <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submission is used to generate a RefSeq record.</p><p>A sequence record unambiguously associated with a <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> record may be propagated into a <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> record. The completeness of the sequence (<i>e.g.</i>, complete vs. partial <a class="def" href="/books/n/handbook/A1237/def-item/app22/">CDS</a>) and the category of the gene (<i>e.g.</i>, protein coding, <a class="def" href="/books/n/handbook/A1237/def-item/app147/">pseudogene</a>) determine whether a RefSeq will be made, and if so, of what type (<a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> plus protein). RefSeq records are not made for incomplete proteins, transposable elements, or those loci for which the product type is uncertain (<i>e.g.</i>, protein coding or not). It should be noted, however, that the RefSeq collection does include partial transcripts and proteins that are provided by collaborating groups or when the RefSeq is based on an annotated whole genome sequence submitted to the <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a>.</p><p>Once a suitable “source” sequence is identified, the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> record is generated using the sequence data from the <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submission and the annotation data from the in-house version of the <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> database. Information from <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> includes the <a class="def" href="/books/n/handbook/A1237/def-item/app96/">GeneID</a>, cross-references to other databases, official nomenclature, aliases, alternate descriptive names, map location, and citations, including those submitted as GeneRIFs. RefSeq records are also subject to programmatic validation to identify annotation format errors and to provide annotation in a more consistent format. Records at this stage have a <span class="bk_pgobj">PROVISIONAL</span>, <span class="bk_pgobj">PREDICTED</span>, or <span class="bk_pgobj">INFERRED</span>
|
||
<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> depending on the evidence existing in support of the <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> record.</p><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> processing for non-protein-coding <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a> loci uses the longest defining transcript record associated with the Gene record. For non-transcribed loci (such as non-transcribed pseudogenes), the RefSeq record is typically derived from a region of a larger genomic sequence. Curation of these types of records is minimal because the current focus is on curation of protein-coding loci; however, these records provide an important reagent for the computational annotation pipeline and support annotation of non-protein-coding genes that might otherwise be missed or misrepresented as a predicted protein-coding gene.</p><p>Other <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records are provided to represent larger genomic regions, including <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> sequences, gene clusters, genes requiring rearrangement to express a product (immunoglobulins and T-cell receptors), and haplotypes with known differences in gene content. These genomic region records are annotated by <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> curation staff, often in collaboration with scientific experts, and are not provided by automatic processing.</p><p><a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a>, a partner of the international Locus Reference Genomic (<a href="http://www.lrg-sequence.org" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">LRG</a>) collaboration, provides stable reference standard genomic, <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a>, and protein RefSeqs for medically important genes. These standards support the <a href="http://www.hgvs.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">HGVS</a> expressions used to describe sequence variation in medical records, and thus are constructed to represent standard alleles. The <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> usually represents a single gene, on the positive strand of the sequence, beginning 5 Kb upstream and extending 2 kb downstream. <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> records also include alignments of the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> transcripts for the gene. All sequences annotated on the <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> have a review <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">VALIDATED</span> or <span class="bk_pgobj">REVIEWED</span>.</p><p>Additional curation of vertebrate and some invertebrate <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records occurs at the request of public users and collaborators, or as indicated by in-house QA analyses. QA analyses focus on, but are not restricted to, <a href="/homologene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">HomoloGene</a>-based reporting of inconsistent protein lengths, identification of RefSeqs with repeat elements, questions about gene-to-sequence associations or potentially redundant genes, and reports of genes annotated at one time on a genome but not during subsequent re-annotation of that genome. Additionally, alignment-based tests are conducted for human and mouse that identify RefSeq records with poor quality alignment to the genome, non-consensus splicing, or very short or very long exons. Review of these records by skilled curators results in the most current and complete representation of the nucleotide and protein sequence and feature annotation available at that time. Sequence review may allow removal of vector and linker sequence, extension of the UTRs to define the full-length transcript, modification of the <a class="def" href="/books/n/handbook/A1237/def-item/app22/">CDS</a> annotation associated with the original <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> source accession, or the creation of additional RefSeq records to represent the products of alternative splicing. A variety of feature annotations can be added to the RefSeq transcript and protein records. For nucleotide records, these include an indication of the transcript completeness, location of poly(A) signal and site, and sites of sequence variation and <a class="def" href="/books/n/handbook/A1237/def-item/app158/">RNA</a> editing. Exon annotation is provided for RefSeq transcripts and non-transcribed pseudogenes of human and mouse only; for transcripts, <a class="def" href="/books/n/handbook/A1237/def-item/app50/">exon</a> annotation is determined from the alignment of the transcript to the reference genome assembly using <a href="/sutils/splign/splign.cgi" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Splign</a>, and, for non-transcribed pseudogenes, from the <a href="/sutils/splign/splign.cgi" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Splign</a> alignment of the functional gene to the <a class="def" href="/books/n/handbook/A1237/def-item/app147/">pseudogene</a> genomic region. For protein records, feature annotations may include alternate or non-AUG initiating codons, Enzyme Commission (<a href="http://us.expasy.org/enzyme/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">EC</a>) numbers, mature peptide products, protein domains, and selenocysteine residues. Finally, literature review is another source of alternate names, aliases, and functional information, the latter which may be used to construct a Reference Sequence Summary on the RefSeq record. A RefSeq record that has undergone the complete review process has a <span class="bk_pgobj">REVIEWED</span>
|
||
<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a>. Note that for many genes, intermediate levels of manual curation may address issues concerning the RefSeq sequence alone; these records have a review <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">VALIDATED</span> pending full review.</p><p>The review process may result in updating a <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> record, providing new RefSeq records, modifying sequence-to-gene associations, merging <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> records, or discontinuing a RefSeq, <a class="def" href="/books/n/handbook/A1237/def-item/app96/">GeneID</a>, or both. A RefSeq record is suppressed if it is found to represent a transcribed repeat element, to be derived from the wrong organism (<i>i.e.</i>, the <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> sequence it was based on has incorrect organism annotation), or not to represent a "gene". Records determined to represent an incomplete sequence, such as a partial protein sequence or an incompletely spliced transcript, are temporarily suppressed until more complete sequence data are available. Suppressed records can still be retrieved and will have a disclaimer appearing on the query result document summary (<a class="figpopup" href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" rid-figpopup="figch18F3" rid-ob="figobch18F3">Figure 3a</a>). A suppressed record is not included in <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> databases, in the calculation of related sequences, in the <a class="def" href="/books/n/handbook/A1237/def-item/app13/">BLink</a> display (BLink are pre-computed protein BLAST results), or in <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/release/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">RefSeq FTP</a> releases. If a RefSeq is found to be redundant with another public RefSeq, then one is retained and the other becomes secondary (<a class="figpopup" href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" rid-figpopup="figch18F3" rid-ob="figobch18F3">Figure 3b</a>). If the sequences were associated with two different Gene records, then the records are merged so that a query of <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> with either of the original GeneIDs will retrieve the remaining single record.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F3" co-legend-rid="figlgndch18F3"><a href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" title="Figure 3. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F3" rid-ob="figobch18F3"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image006.gif" src-large="/books/NBK21091/bin/ch18-Image006.jpg" alt="Figure 3. . Suppressed or redundant RefSeq records." /></a><div class="icnblk_cntnt" id="figlgndch18F3"><h4 id="ch18.F3"><a href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" rid-ob="figobch18F3">Figure 3. </a></h4><p class="float-caption no_bottom_margin">Suppressed or redundant RefSeq records. (A) A standard text statement is included on the Entrez document summary for suppressed RefSeq records. (A) If redundant RefSeq records are merged, then both accession numbers appear on the flat file ACCESSION line <a href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" rid-ob="figobch18F3">(more...)</a></p></div></div><p>We welcome input from the research community to improve the quality of the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> collection. Interested parties are invited to contact us by sending an email to the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> Help Desk (<a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@ofni" class="oemail">vog.hin.mln.ibcn@ofni</a>) or by using our <a href="/RefSeq/update.cgi" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">feedback form</a>.</p></div></div></div><div id="ch18.Access_and_Retrieval"><h2 id="_ch18_Access_and_Retrieval_">Access and Retrieval</h2><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records can be accessed by direct query, <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> download, or indirectly through links provided from several <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> resources, including <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a>, <a href="/entrez/query.fcgi?db=genome" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Genome</a>, <a href="/bioproject" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BioProject</a>, and <a href="/mapview/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Map Viewer</a> (<a class="figpopup" href="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=objectonly" target="object" rid-figpopup="figch18Tncbiresourceswithlinkstorefs" rid-ob="figobch18Tncbiresourceswithlinkstorefs">Table 6</a>). In addition, RefSeq records are included in some computed resources and so links may be found from those pages to individual RefSeq records. Some links from <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> databases to RefSeq records are based on <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> associations (e.g., links from <a href="/omim" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">OMIM</a>; <a href="/books/n/handbook/ch7/">Chapter 7</a>), whereas others are based on sequence similarity or RefSeq annotation content, including links from <a href="/pubmed" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">PubMed</a>. RefSeq records are easy to distinguish in these resources by their unique accession number format (<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" rid-figpopup="figch18Trefseqaccessionnumbersandmole" rid-ob="figobch18Trefseqaccessionnumbersandmole">Table 1</a>).</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Tncbiresourceswithlinkstorefs"><a href="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=objectonly" target="object" title="Table 6. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Tncbiresourceswithlinkstorefs" rid-ob="figobch18Tncbiresourceswithlinkstorefs"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=thumb" src-large="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=previmg" alt="Table 6. . NCBI resources with links to RefSeq records." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.ncbi_resources_with_links_to_refs"><a href="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=objectonly" target="object" rid-ob="figobch18Tncbiresourceswithlinkstorefs">Table 6. </a></h4><p class="float-caption no_bottom_margin">NCBI resources with links to RefSeq records. </p></div></div><p>How to access and retrieve <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records is described below.</p><div id="ch18.Entrez_Query_Access"><h3>Entrez Query Access</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records can be retrieved from the <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> system (<a href="/books/n/handbook/ch15/">Chapter 15</a>) by querying with an accession number, symbol or locus_tag, name, or by using Entrez <a href="/gene/limits" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Limits</a> and <a href="/books/NBK3841/#EntrezGene.Properties" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Property</a> terms. All RefSeqs can be found in the <a href="/entrez/query.fcgi?db=Nucleotide" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Entrez Nucleotide</a> or <a href="/entrez/query.fcgi?db=Protein" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein</a> databases; both RefSeq and <a href="http://www.insdc.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">INSDC</a> submissions will be included but a filter is provided at the top right hand corner of the results page to allow display of only the RefSeq accessions, if desired. Filters can be configured using the <a href="/sites/myncbi/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">MyNCBI</a> interface. Alternatively, a query can be restricted to retrieve only RefSeq-specific results using the <a href="/sites/myncbi/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Limits</a> page or by querying with a <a href="/books/NBK3841/#EntrezGene.Properties" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Property</a>, such as “srcdb_refseq[property]”, or others listed in <a class="figpopup" href="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=objectonly" target="object" rid-figpopup="figch18Tentrezqueriestoretrievesetso" rid-ob="figobch18Tentrezqueriestoretrievesetso">Table 7</a>. <a href="/gene/limits" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Limits</a> and <a href="/books/NBK3841/#EntrezGene.Properties" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Properties</a> can also be used to restrict results to molecule type, such as <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> versus <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a>. The <a href="/books/NBK3837/#EntrezHelp.Entrez_Searching_Options" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Entrez Help</a> document provides additional information about querying.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Tentrezqueriestoretrievesetso"><a href="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=objectonly" target="object" title="Table 7. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Tentrezqueriestoretrievesetso" rid-ob="figobch18Tentrezqueriestoretrievesetso"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=thumb" src-large="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=previmg" alt="Table 7. . Entrez queries to retrieve sets of RefSeq records." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.entrez_queries_to_retrieve_sets_o"><a href="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=objectonly" target="object" rid-ob="figobch18Tentrezqueriestoretrievesetso">Table 7. </a></h4><p class="float-caption no_bottom_margin">Entrez queries to retrieve sets of RefSeq records. </p></div></div><p><a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> contains the majority of the <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> collection and also supports querying using all the above strategies. RefSeq-to-Gene connections are also provided by direct links; RefSeq records include a link to the <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> report page via the <a class="def" href="/books/n/handbook/A1237/def-item/app96/">GeneID</a> <span class="bk_pgobj">db_xref</span> link on the gene and <a class="def" href="/books/n/handbook/A1237/def-item/app22/">CDS</a> features (<a class="figpopup" href="/books/NBK21091/figure/ch18.F1C/?report=objectonly" target="object" rid-figpopup="figch18F1C" rid-ob="figobch18F1C">Figure 1C</a>). <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> reports the RefSeq accession numbers in the RefSeq section of the report, with links to the <a href="/sites/entrez?db=nucleotide" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Nucleotide</a> or <a href="/sites/entrez?db=protein" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein</a> records. The Links menu in <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> also provides distinct links to RefSeq RNAs, RefSeq proteins, and <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a>. <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> reports may include a graphical depiction of genome annotation data in the <span class="bk_pgobj">Genomic regions, transcripts, and products</span> section, with links to <a href="/sites/entrez?db=nucleotide" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Nucleotide</a> and <a href="/sites/entrez?db=protein" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein</a> displays. When this graphical section is provided, an additional report is available with details about <a class="def" href="/books/n/handbook/A1237/def-item/app50/">exon</a> and <a class="def" href="/books/n/handbook/A1237/def-item/app86/">intron</a> boundaries and length. You can change the display format from <span class="bk_pgobj">Full Report</span> to <span class="bk_pgobj">Gene Table</span> to access this report. Note that RefSeq records representing assembled environmental samples (with an NS_ accession prefix) are not included in <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> but can be found in the <a href="/sites/entrez?db=genome" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Genome</a> and <a href="/sites/entrez?db=nucleotide" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Nucleotide</a> databases.</p><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records in the <a href="/sites/entrez?db=genome" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Genome</a> or <a href="/bioproject" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BioProject</a> databases can be retrieved using an accession number for a complete genomic molecule (NC_ accession prefix) or organism name. The <a href="/bioproject" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BioProject</a> database can also be queried using the property restriction “srcdb_refseq[property]”.</p><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records belonging to the <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> set can be retrieved from the <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> system using “RefSeqGene[keyword]”.</p></div><div id="ch18.BLAST"><h3>BLAST</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> transcript records are included in the <a href="/sites/entrez?db=nucleotide" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Nucleotide</a> non-redundant (nr) and the RefSeq <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> sequences databases. RefSeq protein records are included in the <a href="/sites/entrez?db=protein" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein</a> database. Accessions in the results set, either RefSeq or <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a>, that are associated with a <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> record are indicated by a small blue <span class="bk_pgobj">G</span> icon, which is linked to the <a href="/sites/entrez?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a> report. RefSeq genomic records (whole chromosome or scaffold RefSeq records and <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> records) are provided in the Reference genomic sequences database or via organism-specific genome <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> databases, which can be accessed via <a href="/mapview/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Map Viewer</a>, <a href="/bioproject" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BioProject</a> reports, or the <a href="/Genomes/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Genomic Biology</a> webpage. <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> records are also retrieved from the nr database in BLAST results and in a dedicated RefSeqGene database.</p></div><div id="ch18.Map_Viewer"><h3>Map Viewer</h3><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> <a href="/mapview/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Map Viewer</a> supports queries by <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> and <a href="/refseq/rsg/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeqGene</a> accession numbers if the annotated genome is available in that resource.</p></div><div id="ch18.FTP"><h3>FTP</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> data are available in three <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> areas:</p><ul><li class="half_rhythm"><div>Configured <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> databases are available for download from the <a href="ftp://ftp.ncbi.nlm.nih.gov/blast/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">BLAST FTP</a> site; separate databases are provided for genomic, transcript, and protein records.</div></li><li class="half_rhythm"><div>Organism-specific sequence files are provided in the <a href="ftp://ftp.ncbi.nlm.nih.gov/genomes/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">Genomes FTP</a> site. This area includes <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> records that are generated by, or used in, <a href="/mapview/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Map Viewer</a> and <a href="/sites/entrez?db=genome" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Genomes</a> processing. <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>’s annotation of genomic RefSeqs is also available; a file in the latest specification (version 1.20) of Generic Feature Format version 3 (<a href="http://www.sequenceontology.org/gff3.shtml" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GFF3</a>) is provided in a GFF subdirectory for the latest assembly of many organisms.</div></li><li class="half_rhythm"><div>The full <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> collection, including the human <a href="ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">RefSeqGene set</a>, is available from the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">RefSeq FTP</a> site, with the exception of the NS_ accession series environmental sample records. The RefSeq collection is provided as comprehensive bi-monthly releases in addition to daily updates for records that are new or updated between RefSeq release cycles. The comprehensive release provides data in multiple file formats, including flat file and <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a>, organized into primary taxonomic groups in addition to the complete dataset. For organisms with more frequent updates to curated records, including human and mouse, subdirectories containing weekly comprehensive releases of transcript and protein RefSeq records are provided also. Information about the RefSeq release is documented on the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/release/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">RefSeq FTP</a> site in the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/release/release-notes/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">release-notes</a> subdirectory. The availability of new releases is announced on the <a href="/RefSeq/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">RefSeq</a> website, on <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>’s <a href="http://www.facebook.com/ncbi.nlm" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Facebook</a> and <a href="http://twitter.com/ncbi" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Twitter</a> accounts, to subscribers of the <a href="/mailman/listinfo/refseq-announce" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">refseq-announce</a> email list, and in the <a href="/books/NBK1969/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">NCBI Newsletter</a>.</div></li></ul></div></div><div id="ch18.Related_Resources"><h2 id="_ch18_Related_Resources_">Related Resources</h2><div id="ch18.The_Consensus_Coding_Sequence_CCDS"><h3>The Consensus Coding Sequence (CCDS) Project</h3><p>The <a href="/projects/CCDS/CcdsBrowse.cgi" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">CCDS project</a> aims to provide a complete set of high quality annotations of protein-coding genes on the human and mouse genomes. It leverages the computational annotation pipelines of <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> and <a href="http://www.ensembl.org/index.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Ensembl</a>, and expert curation provided predominantly by the Havana team of the <a href="http://www.sanger.ac.uk/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Wellcome Trust Sanger Institute</a> and NCBI’s <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> staff, to track identical protein annotations on the reference assemblies of the human and mouse genomes, and to ensure they are consistently and accurately represented in public resources. The CCDS set includes coding regions that are annotated as full-length (with an initiating AUG and valid stop-<a class="def" href="/books/n/handbook/A1237/def-item/app206/">codon</a>), can be translated from the genome without frameshifts, and use consensus splice-sites. Annotated genes in the CCDS set are associated with a unique identifying number and version. The version number will change with a change to the <a class="def" href="/books/n/handbook/A1237/def-item/app22/">CDS</a> structure or to the underlying genomic sequence, although any change requires collaborative agreement. See <a class="def" href="/books/n/handbook/A1237/def-item/app150/">PubMed</a> ID <a href="/pubmed/19498102" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">19498102</a> for more information.</p></div></div><div id="ch18.Related_Reading"><h2 id="_ch18_Related_Reading_">Related Reading</h2><ol><li><div class="bk_ref" id="ch18.REF.blake.2011.d842">Blake JA, Bult CJ, Kadin JA, Richardson JE, Eppig JT., Mouse Genome Database Group. The Mouse Genome Database (MGD): premier model organism resource for mammalian genomics and genetics. <span><span class="ref-journal">Nucl. Acids Res. </span>2011;<span class="ref-vol">39</span>:D842–8.</span> (PubMed ID ) [<a href="/pmc/articles/PMC3013640/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC3013640</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/21051359" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 21051359</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.coffin.1997">Coffin JM, Hughes SH, and E Varmus. (1997) Retroviruses. Cold Spring Harbor (NY): Cold Spring Harbor Laboratory Press. [<a href="https://pubmed.ncbi.nlm.nih.gov/21433340" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 21433340</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.dwinell.2009.d744">Dwinell MR, Worthey EA, Shimoyama M, Bakir-Gungor B, DePons J, Laulederkind S, Lowry T, Nigram R, Petri V, Smith J, Stoddard A, Twigger SN, Jacob HJ, Team RGD. The Rat Genome Database 2009: variation, ontologies and pathways. <span><span class="ref-journal">Nucl. Acids Res. </span>2009;<span class="ref-vol">37</span>:D744–9.</span> (PubMed ) [<a href="/pmc/articles/PMC2686558/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC2686558</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/18996890" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 18996890</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.eddy.2002.18">Eddy SR. A memory-efficient dynamic programming algorithm for optimal alignment of a sequence to an RNA secondary structure. <span><span class="ref-journal">BMC Bioinformatics. </span>2002;<span class="ref-vol">3</span>:18.</span> (PubMed ID ) [<a href="/pmc/articles/PMC119854/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC119854</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/12095421" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 12095421</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.griffithsjones.2003.439">Griffiths-Jones S, Bateman A, Marshall M, Khanna A, Eddy SR. Rfam: an RNA family database. <span><span class="ref-journal">Nucl. Acids Res. </span>2003;<span class="ref-vol">31</span>:439–441.</span> (PubMed ID ) [<a href="/pmc/articles/PMC165453/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC165453</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/12520045" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 12520045</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.amberger.2011">Amberger, J., Bocchini, C. and Hamosh, A. (2011), A new face and new challenges for online mendelian inheritance in man (OMIM®). Human Mutation, 32:n/a. doi: 10.1002/humu.21466. (PubMed ID ).21472891. [<a href="https://pubmed.ncbi.nlm.nih.gov/21472891" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 21472891</span></a>] [<a href="http://dx.crossref.org/10.1002/humu.21466" ref="pagearea=cite-ref&targetsite=external&targetcat=link&targettype=uri">CrossRef</a>]</div></li><li><div class="bk_ref" id="ch18.REF.lowe.1997.955">Lowe TM, Eddy SR. tRNAscan-SE: a program for improved detection of transfer RNA genes in genomic sequence. <span><span class="ref-journal">Nucl. Acids Res. </span>1997;<span class="ref-vol">25</span>:955–964.</span> (PubMed ID ) [<a href="/pmc/articles/PMC146525/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC146525</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/9023104" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 9023104</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.maglott.2011.d52">Maglott D, Ostell J, Pruitt KD, Tatusova T. Entrez Gene: gene-centered information at NCBI. <span><span class="ref-journal">Nucl. Acids Res. </span>2011;<span class="ref-vol">39</span>:D52–7.</span> (PubMed ID ) [<a href="/pmc/articles/PMC3013746/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC3013746</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/21115458" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 21115458</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.marchlerbauer.2011.d225">Marchler-Bauer A, Lu S, Anderson JB, Chitsaz F, Derbyshire MK, Deweese-Scott C, Fong JH, Geer LY, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Jackson JD, Ke Z, Lanczycki CJ, Lu F, Marchler GH, Mullokandov M, Omelchenko MV, Robertson CL, Song JS, Thanki N, Yamashita RA, Zhang D, Zhang N, Zheng C, Bryant SH. CDD: a Conserved Domain Database for the functional annotation of proteins. <span><span class="ref-journal">Nucl. Acids Res. </span>2011;<span class="ref-vol">39</span>:D225–9.</span> (PubMed ID ) [<a href="/pmc/articles/PMC3013737/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC3013737</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/21109532" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 21109532</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.pruitt.2008.1316">Pruitt KD, Harrow J, Harte RA, Wallin C, Diekhans M, Maglott DR, Searle S, Farrell CM, et al. The consensus coding sequence (CCDS) project: Identifying a common protein-coding gene set for the human and mouse genomes. <span><span class="ref-journal">Genome Res. </span>2008;<span class="ref-vol">19</span>(7):1316–1323.</span> (PubMed ID ) [<a href="/pmc/articles/PMC2704439/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC2704439</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/19498102" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 19498102</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.pruitt.2009.d32">Pruitt KD, Tatusova T, Klimke W, Maglott DR. NCBI Reference Sequences: current status, policy and new initiatives. <span><span class="ref-journal">Nucl. Acids Res. </span>2009;<span class="ref-vol">37</span>:D32–36.</span> (PubMed ID ) [<a href="/pmc/articles/PMC2686572/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC2686572</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/18927115" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 18927115</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.tatusova.1999.536">Tatusova TA, Karsch-Mizrachi I, Ostell JA. Complete genomes in WWW Entrez: data representation and analysis. <span><span class="ref-journal">Bioinformatics. </span>1999;<span class="ref-vol">15</span>:536–43.</span> (PubMed ID ) [<a href="https://pubmed.ncbi.nlm.nih.gov/10487861" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 10487861</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF13"><a href="/pubmed/18996890" ref="pagearea=cite-ref&targetsite=external&targetcat=link&targettype=uri">http://www<wbr style="display:inline-block"></wbr>.ncbi.nlm.nih<wbr style="display:inline-block"></wbr>.gov/pubmed/18996890</a>Sprague J, Bayraktaroglu L, Clements D, Conlin T, Fashena D, Frazer K, Haendel M, Howe D, Mani P, Ramachandran S, Schaper K, Segerdell E, Song P, Sprunger B, Taylor S, Van Slyke C, and M Westerfield. (2006) The Zebrafish Information Network: the zebrafish model organism database. Nucl. Acids Res. 34:D581-D585 (PubMed ID ).16381936. [<a href="/pmc/articles/PMC1347449/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC1347449</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/16381936" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 16381936</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.seal.2011.d519">Seal RL, Gordon SM, Lush MJ, Wright MW, Bruford EA. genenames.org: the HGNC resources in 2011. <span><span class="ref-journal">Nucleic Acids Res. </span>2011;<span class="ref-vol">39</span>:D519–9.</span> (PubMed ID ) [<a href="/pmc/articles/PMC3013772/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC3013772</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/20929869" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 20929869</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF15">Tweedie S, Ashburner M, Falls K, Leyland P, McQuilton P, Marygold S, Millburn G, Osumi-Sutherland D, Schroeder A, Seal R, Zhang Z, and The FlyBase Consortium. (2009) FlyBase: enhancing Drosophila Gene Ontology annotations. Nucl. Acids Res. 37: D555-D559 (PubMed ID ).18948289. [<a href="/pmc/articles/PMC2686450/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC2686450</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/18948289" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 18948289</span></a>]</div></li></ol></div><div id="bk_toc_contnr"></div></div></div>
|
||
<div class="post-content"><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div><div class="small"><span class="label">Bookshelf ID: NBK21091</span></div><div style="margin-top:2em" class="bk_noprnt"><a class="bk_cntns" href="/books/n/handbook/">Contents</a><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch17/" title="Previous page in this title">< Prev</a><a class="active page_link next" href="/books/n/handbook/ch19/" title="Next page in this title">Next ></a></div></div></div></div>
|
||
|
||
</div>
|
||
|
||
<!-- Custom content below content -->
|
||
<div class="col4">
|
||
|
||
</div>
|
||
|
||
|
||
<!-- Book content -->
|
||
|
||
<!-- Custom contetnt below bottom nav -->
|
||
<div class="col5">
|
||
|
||
</div>
|
||
</div>
|
||
|
||
<div id="rightcolumn" class="four_col col last">
|
||
<!-- Custom content above discovery portlets -->
|
||
<div class="col6">
|
||
<div id="ncbi_share_book"><a href="#" class="ncbi_share" data-ncbi_share_config="popup:false,shorten:true" ref="id=NBK21091&db=books">Share</a></div>
|
||
|
||
</div>
|
||
<div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Views</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="PDF_download" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="/books/NBK21091/?report=reader">PubReader</a></li><li><a href="/books/NBK21091/?report=printable">Print View</a></li><li><a data-jig="ncbidialog" href="#_ncbi_dlg_citbx_NBK21091" data-jigconfig="width:400,modal:true">Cite this Page</a><div id="_ncbi_dlg_citbx_NBK21091" style="display:none" title="Cite this Page"><div class="bk_tt">Pruitt K, Brown G, Tatusova T, et al. The Reference Sequence (RefSeq) Database. 2002 Oct 9 [Updated 2012 Apr 6]. In: McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. Chapter 18.<span class="bk_cite_avail"></span></div></div></li><li><a href="/books/NBK21091/pdf/Bookshelf_NBK21091.pdf">PDF version of this page</a> (863K)</li><li><a href="/books/n/handbook/pdf/">PDF version of this title</a> (7.2M)</li><li><a href="#" class="toggle-glossary-link" title="Enable/disable links to the glossary">Disable Glossary Links</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>In this Page</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="page-toc" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="#ch18.Summary" ref="log$=inpage&link_id=inpage">Summary</a></li><li><a href="#ch18.Introduction" ref="log$=inpage&link_id=inpage">Introduction</a></li><li><a href="#ch18.Database_Content_Background" ref="log$=inpage&link_id=inpage">Database Content: Background</a></li><li><a href="#ch18.Assembling_and_Maintaining_the_RefS" ref="log$=inpage&link_id=inpage">Assembling and Maintaining the RefSeq Collection</a></li><li><a href="#ch18.Access_and_Retrieval" ref="log$=inpage&link_id=inpage">Access and Retrieval</a></li><li><a href="#ch18.Related_Resources" ref="log$=inpage&link_id=inpage">Related Resources</a></li><li><a href="#ch18.Related_Reading" ref="log$=inpage&link_id=inpage">Related Reading</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Recent Activity</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="recent_activity" id="Shutter"></a></div><div class="portlet_content"><div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="HTDisplay" class=""><div class="action"><a href="javascript:historyDisplayState('ClearHT')">Clear</a><a href="javascript:historyDisplayState('HTOff')" class="HTOn">Turn Off</a><a href="javascript:historyDisplayState('HTOn')" class="HTOff">Turn On</a></div><ul id="activity"><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=1" href="/portal/utils/pageresolver.fcgi?recordid=67c81204feee5b00ac5f56b4">The Reference Sequence (RefSeq) Database - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The Reference Sequence (RefSeq) Database - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=2" href="/portal/utils/pageresolver.fcgi?recordid=67c81201feee5b00ac5f3c1c">CENPB centromere protein B [Homo sapiens]</a><div class="ralinkpop offscreen_noflow">CENPB centromere protein B [Homo sapiens]<div class="brieflinkpopdesc">Gene ID:1059</div></div><div class="tertiary">Gene</div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=3" href="/portal/utils/pageresolver.fcgi?recordid=67c811fefeee5b00ac5f2b29">Using Preferences - My NCBI Help</a><div class="ralinkpop offscreen_noflow">Using Preferences - My NCBI Help<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_qry two_line"><a class="htb" ref="log$=activity&linkpos=4" href="/portal/utils/pageresolver.fcgi?recordid=67c811fcb70fbb196064d55e">gene_clinvar[filter] AND (alive[prop]) <span class="number">(92260)</span></a><div class="tertiary">Gene</div></li><li class="ra_qry two_line"><a class="htb" ref="log$=activity&linkpos=5" href="/portal/utils/pageresolver.fcgi?recordid=67c811fafeee5b00ac5f09e5">BRCA1[sym] AND (alive[prop]) <span class="number">(530)</span></a><div class="tertiary">Gene</div></li></ul><p class="HTOn">Your browsing activity is empty.</p><p class="HTOff">Activity recording is turned off.</p><p id="turnOn" class="HTOff"><a href="javascript:historyDisplayState('HTOn')">Turn recording back on</a></p><a class="seemore" href="/sites/myncbi/recentactivity">See more...</a></div></div></div>
|
||
|
||
<!-- Custom content below discovery portlets -->
|
||
<div class="col7">
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Custom content after all -->
|
||
<div class="col8">
|
||
|
||
</div>
|
||
<div class="col9">
|
||
|
||
</div>
|
||
|
||
<script type="text/javascript" src="/corehtml/pmc/js/jquery.scrollTo-1.4.2.js"></script>
|
||
<script type="text/javascript">
|
||
(function($){
|
||
$('.skiplink').each(function(i, item){
|
||
var href = $($(item).attr('href'));
|
||
href.attr('tabindex', '-1').addClass('skiptarget'); // ensure the target can receive focus
|
||
$(item).on('click', function(event){
|
||
event.preventDefault();
|
||
$.scrollTo(href, 0, {
|
||
onAfter: function(){
|
||
href.focus();
|
||
}
|
||
});
|
||
});
|
||
});
|
||
})(jQuery);
|
||
</script>
|
||
</div>
|
||
<div class="bottom">
|
||
<script type="text/javascript">
|
||
var PBooksSearchTermData = {
|
||
highlighter: "bold",
|
||
dateTime: "03/05/2025 03:12:13",
|
||
terms: [
|
||
'2010'
|
||
]
|
||
};
|
||
</script>
|
||
<div id="NCBIFooter_dynamic">
|
||
<!--<component id="Breadcrumbs" label="breadcrumbs"/>
|
||
<component id="Breadcrumbs" label="helpdesk"/>-->
|
||
|
||
</div>
|
||
|
||
<div class="footer" id="footer">
|
||
<section class="icon-section">
|
||
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
|
||
<div class="grid-container container">
|
||
<div class="icon-section_container">
|
||
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<defs>
|
||
<style>
|
||
.cls-11 {
|
||
fill: #737373;
|
||
}
|
||
</style>
|
||
</defs>
|
||
<title>Twitter</title>
|
||
<path class="cls-11" d="M250.11,105.48c-7,3.14-13,3.25-19.27.14,8.12-4.86,8.49-8.27,11.43-17.46a78.8,78.8,0,0,1-25,9.55,39.35,39.35,0,0,0-67,35.85,111.6,111.6,0,0,1-81-41.08A39.37,39.37,0,0,0,81.47,145a39.08,39.08,0,0,1-17.8-4.92c0,.17,0,.33,0,.5a39.32,39.32,0,0,0,31.53,38.54,39.26,39.26,0,0,1-17.75.68,39.37,39.37,0,0,0,36.72,27.3A79.07,79.07,0,0,1,56,223.34,111.31,111.31,0,0,0,116.22,241c72.3,0,111.83-59.9,111.83-111.84,0-1.71,0-3.4-.1-5.09C235.62,118.54,244.84,113.37,250.11,105.48Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<title>Facebook</title>
|
||
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<title>LinkedIn</title>
|
||
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<defs>
|
||
<style>
|
||
.cls-11,
|
||
.cls-12 {
|
||
fill: #737373;
|
||
}
|
||
|
||
.cls-11 {
|
||
fill-rule: evenodd;
|
||
}
|
||
</style>
|
||
</defs>
|
||
<title>GitHub</title>
|
||
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
|
||
</path>
|
||
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
|
||
</path>
|
||
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
|
||
</path>
|
||
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
|
||
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
|
||
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
|
||
<defs><style>.cls-1{fill:#737373;}</style></defs>
|
||
<title>NCBI Insights Blog</title>
|
||
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
|
||
</svg>
|
||
</a>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section class="container-fluid bg-primary">
|
||
<div class="container pt-5">
|
||
<div class="row mt-3">
|
||
<div class="col-lg-3 col-12">
|
||
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
|
||
<ul class="list-inline social_media">
|
||
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
||
<style type="text/css">
|
||
.st20 {
|
||
fill: #FFFFFF;
|
||
}
|
||
|
||
.st30 {
|
||
fill: none;
|
||
stroke: #FFFFFF;
|
||
stroke-width: 8;
|
||
stroke-miterlimit: 10;
|
||
}
|
||
</style>
|
||
<title>Twitter</title>
|
||
<g>
|
||
<g>
|
||
<g>
|
||
<path class="st20" d="M192.9,88.1c-5,2.2-9.2,2.3-13.6,0.1c5.7-3.4,6-5.8,8.1-12.3c-5.4,3.2-11.4,5.5-17.6,6.7 c-10.5-11.2-28.1-11.7-39.2-1.2c-7.2,6.8-10.2,16.9-8,26.5c-22.3-1.1-43.1-11.7-57.2-29C58,91.6,61.8,107.9,74,116 c-4.4-0.1-8.7-1.3-12.6-3.4c0,0.1,0,0.2,0,0.4c0,13.2,9.3,24.6,22.3,27.2c-4.1,1.1-8.4,1.3-12.5,0.5c3.6,11.3,14,19,25.9,19.3 c-11.6,9.1-26.4,13.2-41.1,11.5c12.7,8.1,27.4,12.5,42.5,12.5c51,0,78.9-42.2,78.9-78.9c0-1.2,0-2.4-0.1-3.6 C182.7,97.4,189.2,93.7,192.9,88.1z"></path>
|
||
</g>
|
||
</g>
|
||
<circle class="st30" cx="124.4" cy="128.8" r="108.2"></circle>
|
||
</g>
|
||
</svg></a></li>
|
||
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
|
||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
||
<style type="text/css">
|
||
.st10 {
|
||
fill: #FFFFFF;
|
||
}
|
||
|
||
.st110 {
|
||
fill: none;
|
||
stroke: #FFFFFF;
|
||
stroke-width: 8;
|
||
stroke-miterlimit: 10;
|
||
}
|
||
</style>
|
||
<title>Facebook</title>
|
||
<g>
|
||
<g>
|
||
<path class="st10" d="M159,99.1h-24V88.4c0-5,3.3-6.2,5.7-6.2h16.8V60l-24.4-0.1c-22.1,0-26.2,16.5-26.2,27.1v12.1H90v22.5h16.9 v67.5H135v-67.5h21.7L159,99.1z"></path>
|
||
</g>
|
||
</g>
|
||
<circle class="st110" cx="123.6" cy="123.2" r="108.2"></circle>
|
||
</svg>
|
||
</a></li>
|
||
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
||
<title>Youtube</title>
|
||
<style type="text/css">
|
||
.st4 {
|
||
fill: none;
|
||
stroke: #FFFFFF;
|
||
stroke-width: 8;
|
||
stroke-miterlimit: 10;
|
||
}
|
||
|
||
.st5 {
|
||
fill: #FFFFFF;
|
||
}
|
||
</style>
|
||
<circle class="st4" cx="124.2" cy="123.4" r="108.2"></circle>
|
||
<g transform="translate(0,-952.36218)">
|
||
<path class="st5" d="M88.4,1037.4c-10.4,0-18.7,8.3-18.7,18.7v40.1c0,10.4,8.3,18.7,18.7,18.7h72.1c10.4,0,18.7-8.3,18.7-18.7 v-40.1c0-10.4-8.3-18.7-18.7-18.7H88.4z M115.2,1058.8l29.4,17.4l-29.4,17.4V1058.8z"></path>
|
||
</g>
|
||
</svg></a></li>
|
||
</ul>
|
||
</div>
|
||
<div class="col-lg-3 col-12">
|
||
<p class="address_footer text-white">National Library of Medicine<br />
|
||
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
|
||
Bethesda, MD 20894</a></p>
|
||
</div>
|
||
<div class="col-lg-3 col-12 centered-lg">
|
||
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
|
||
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
|
||
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
|
||
</div>
|
||
<div class="col-lg-3 col-12 centered-lg">
|
||
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
|
||
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
|
||
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
|
||
</div>
|
||
</div>
|
||
<div class="row">
|
||
<div class="col-lg-12 centered-lg">
|
||
<nav class="bottom-links">
|
||
<ul class="mt-3">
|
||
<li>
|
||
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.nih.gov/">NIH</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
|
||
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<!--/.page-->
|
||
</div>
|
||
<!--/.wrap-->
|
||
</div><!-- /.twelve_col -->
|
||
</div>
|
||
<!-- /.grid -->
|
||
|
||
<span class="PAFAppResources"></span>
|
||
|
||
<!-- BESelector tab -->
|
||
|
||
|
||
|
||
<noscript><img alt="statistics" src="/stat?jsdisabled=true&ncbi_db=books&ncbi_pdid=book-part&ncbi_acc=NBK21091&ncbi_domain=handbook&ncbi_report=record&ncbi_type=fulltext&ncbi_objectid=&ncbi_pcid=/NBK21091/&ncbi_pagename=The Reference Sequence (RefSeq) Database - The NCBI Handbook - NCBI Bookshelf&ncbi_bookparttype=chapter&ncbi_app=bookshelf" /></noscript>
|
||
|
||
|
||
<!-- usually for JS scripts at page bottom -->
|
||
<!--<component id="PageFixtures" label="styles"></component>-->
|
||
|
||
|
||
<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal106 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
|
||
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
|
||
|
||
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3879255/4121861/3501987/4008961/3893018/3821238/4062932/4209313/4212053/4076480/3921943/3400083/3426610.js" snapshot="books"></script></body>
|
||
</html> |