nih-gov/www.ncbi.nlm.nih.gov/books/n/handbook/ch18/index.html?report=reader

195 lines
153 KiB
Text

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" class="no-js no-jr">
<head>
<!-- For pinger, set start time and add meta elements. -->
<script type="text/javascript">var ncbi_startTime = new Date();</script>
<!-- Logger begin -->
<meta name="ncbi_db" content="books">
<meta name="ncbi_pdid" content="book-part">
<meta name="ncbi_acc" content="NBK21091">
<meta name="ncbi_domain" content="handbook">
<meta name="ncbi_report" content="reader">
<meta name="ncbi_type" content="fulltext">
<meta name="ncbi_objectid" content="">
<meta name="ncbi_pcid" content="/NBK21091/?report=reader">
<meta name="ncbi_pagename" content="The Reference Sequence (RefSeq) Database - The NCBI Handbook - NCBI Bookshelf">
<meta name="ncbi_bookparttype" content="chapter">
<meta name="ncbi_app" content="bookshelf">
<!-- Logger end -->
<!--component id="Page" label="meta"/-->
<script type="text/javascript" src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.boots.min.js"> </script><title>The Reference Sequence (RefSeq) Database - The NCBI Handbook - NCBI Bookshelf</title>
<meta charset="utf-8">
<meta name="apple-mobile-web-app-capable" content="no">
<meta name="viewport" content="initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no">
<meta name="jr-col-layout" content="auto">
<meta name="jr-prev-unit" content="/books/n/handbook/ch17/?report=reader">
<meta name="jr-next-unit" content="/books/n/handbook/ch19/?report=reader">
<meta name="bk-toc-url" content="/books/n/handbook/?report=toc">
<meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX">
<meta name="citation_inbook_title" content="The NCBI Handbook [Internet]">
<meta name="citation_title" content="The Reference Sequence (RefSeq) Database">
<meta name="citation_publisher" content="National Center for Biotechnology Information (US)">
<meta name="citation_date" content="2012/04/06">
<meta name="citation_author" content="Kim Pruitt">
<meta name="citation_author" content="Garth Brown">
<meta name="citation_author" content="Tatiana Tatusova">
<meta name="citation_author" content="Donna Maglott">
<meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK21091/">
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0/">
<meta name="DC.Title" content="The Reference Sequence (RefSeq) Database">
<meta name="DC.Type" content="Text">
<meta name="DC.Publisher" content="National Center for Biotechnology Information (US)">
<meta name="DC.Contributor" content="Kim Pruitt">
<meta name="DC.Contributor" content="Garth Brown">
<meta name="DC.Contributor" content="Tatiana Tatusova">
<meta name="DC.Contributor" content="Donna Maglott">
<meta name="DC.Date" content="2012/04/06">
<meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK21091/">
<meta name="description" content="NCBI&rsquo;s Reference Sequence (RefSeq) database is a collection of taxonomically diverse, non-redundant and richly annotated sequences representing naturally occurring molecules of DNA, RNA, and protein. Included are sequences from plasmids, organelles, viruses, archaea, bacteria, and eukaryotes. Each RefSeq is constructed wholly from sequence data submitted to the International Nucleotide Sequence Database Collaboration (INSDC). Similar to a review article, a RefSeq is a synthesis of information integrated across multiple sources at a given time. RefSeqs provide a foundation for uniting sequence data with genetic and functional information. They are generated to provide reference standards for multiple purposes ranging from genome annotation to reporting locations of sequence variation in medical records. The RefSeq collection is available without restriction and can be retrieved in several different ways, such as by searching or by available links in NCBI resources, including PubMed, Nucleotide, Protein, Gene, and Map Viewer, searching with a sequence via BLAST, and downloading from the RefSeq FTP site.">
<meta name="og:title" content="The Reference Sequence (RefSeq) Database">
<meta name="og:type" content="book">
<meta name="og:description" content="NCBI&rsquo;s Reference Sequence (RefSeq) database is a collection of taxonomically diverse, non-redundant and richly annotated sequences representing naturally occurring molecules of DNA, RNA, and protein. Included are sequences from plasmids, organelles, viruses, archaea, bacteria, and eukaryotes. Each RefSeq is constructed wholly from sequence data submitted to the International Nucleotide Sequence Database Collaboration (INSDC). Similar to a review article, a RefSeq is a synthesis of information integrated across multiple sources at a given time. RefSeqs provide a foundation for uniting sequence data with genetic and functional information. They are generated to provide reference standards for multiple purposes ranging from genome annotation to reporting locations of sequence variation in medical records. The RefSeq collection is available without restriction and can be retrieved in several different ways, such as by searching or by available links in NCBI resources, including PubMed, Nucleotide, Protein, Gene, and Map Viewer, searching with a sequence via BLAST, and downloading from the RefSeq FTP site.">
<meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK21091/">
<meta name="og:site_name" content="NCBI Bookshelf">
<meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png">
<meta name="twitter:card" content="summary">
<meta name="twitter:site" content="@ncbibooks">
<meta name="warning" content="This publication is provided for historical reference only and the information may be out of date.">
<meta name="bk-non-canon-loc" content="/books/n/handbook/ch18/?report=reader">
<link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK21091/">
<link href="https://fonts.googleapis.com/css?family=Archivo+Narrow:400,700,400italic,700italic&amp;subset=latin" rel="stylesheet" type="text/css">
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/libs.min.css">
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/jr.min.css">
<meta name="format-detection" content="telephone=no">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css//books_print.min.css" type="text/css" media="print">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_reader.min.css" type="text/css">
<style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style>
<style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style>
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico">
<meta name="ncbi_phid" content="CE8B9BD67C9C0A8100000000002D0027.m_5">
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3849091.css"></head>
<body>
<!-- Book content! -->
<div id="jr" data-jr-path="/corehtml/pmc/jatsreader/ptpmc_3.22/"><div class="jr-unsupported"><table class="modal"><tr><td><span class="attn inline-block"></span><br />Your browser does not support the NLM PubReader view.<br />Go to <a href="/pmc/about/pr-browsers/">this page</a> to see a list of supported browsers<br />or return to the <br /><a href="/books/NBK21091/?report=classic">regular view</a>.</td></tr></table></div><div id="jr-ui" class="hidden"><nav id="jr-head"><div class="flexh tb"><div id="jr-tb1"><a id="jr-links-sw" class="hidden" title="Links"><svg xmlns="http://www.w3.org/2000/svg" version="1.1" x="0px" y="0px" viewBox="0 0 70.6 85.3" style="enable-background:new 0 0 70.6 85.3;vertical-align:middle" xml:space="preserve" width="24" height="24">
<style type="text/css">.st0{fill:#939598;}</style>
<g>
<path class="st0" d="M36,0C12.8,2.2-22.4,14.6,19.6,32.5C40.7,41.4-30.6,14,35.9,9.8"></path>
<path class="st0" d="M34.5,85.3c23.2-2.2,58.4-14.6,16.4-32.5c-21.1-8.9,50.2,18.5-16.3,22.7"></path>
<path class="st0" d="M34.7,37.1c66.5-4.2-4.8-31.6,16.3-22.7c42.1,17.9,6.9,30.3-16.4,32.5h1.7c-66.2,4.4,4.8,31.6-16.3,22.7 c-42.1-17.9-6.9-30.3,16.4-32.5"></path>
</g>
</svg> Books</a></div><div class="jr-rhead f1 flexh"><div class="head"><a href="/books/n/handbook/ch17/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="body"><div class="t">Chapter 18, The Reference Sequence (RefSeq) Database</div><div class="j">The NCBI Handbook [Internet]</div></div><div class="tail"><a href="/books/n/handbook/ch19/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div></div><div id="jr-tb2"><a id="jr-bkhelp-sw" class="btn wsprkl hidden" title="Help with NLM PubReader">?</a><a id="jr-help-sw" class="btn wsprkl hidden" title="Settings and typography in NLM PubReader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" preserveAspectRatio="none"><path d="M462,283.742v-55.485l-29.981-10.662c-11.431-4.065-20.628-12.794-25.274-24.001 c-0.002-0.004-0.004-0.009-0.006-0.013c-4.659-11.235-4.333-23.918,0.889-34.903l13.653-28.724l-39.234-39.234l-28.72,13.652 c-10.979,5.219-23.68,5.546-34.908,0.889c-0.005-0.002-0.01-0.003-0.014-0.005c-11.215-4.65-19.933-13.834-24-25.273L283.741,50 h-55.484l-10.662,29.981c-4.065,11.431-12.794,20.627-24.001,25.274c-0.005,0.002-0.009,0.004-0.014,0.005 c-11.235,4.66-23.919,4.333-34.905-0.889l-28.723-13.653l-39.234,39.234l13.653,28.721c5.219,10.979,5.545,23.681,0.889,34.91 c-0.002,0.004-0.004,0.009-0.006,0.013c-4.649,11.214-13.834,19.931-25.271,23.998L50,228.257v55.485l29.98,10.661 c11.431,4.065,20.627,12.794,25.274,24c0.002,0.005,0.003,0.01,0.005,0.014c4.66,11.236,4.334,23.921-0.888,34.906l-13.654,28.723 l39.234,39.234l28.721-13.652c10.979-5.219,23.681-5.546,34.909-0.889c0.005,0.002,0.01,0.004,0.014,0.006 c11.214,4.649,19.93,13.833,23.998,25.271L228.257,462h55.484l10.595-29.79c4.103-11.538,12.908-20.824,24.216-25.525 c0.005-0.002,0.009-0.004,0.014-0.006c11.127-4.628,23.694-4.311,34.578,0.863l28.902,13.738l39.234-39.234l-13.66-28.737 c-5.214-10.969-5.539-23.659-0.886-34.877c0.002-0.005,0.004-0.009,0.006-0.014c4.654-11.225,13.848-19.949,25.297-24.021 L462,283.742z M256,331.546c-41.724,0-75.548-33.823-75.548-75.546s33.824-75.547,75.548-75.547 c41.723,0,75.546,33.824,75.546,75.547S297.723,331.546,256,331.546z"></path></svg></a><a id="jr-fip-sw" class="btn wsprkl hidden" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-rtoc-sw" class="btn wsprkl hidden" title="Table of Contents"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,20h10v8H20V20zM36,20h44v8H36V20zM20,37.33h10v8H20V37.33zM36,37.33h44v8H36V37.33zM20,54.66h10v8H20V54.66zM36,54.66h44v8H36V54.66zM20,72h10v8 H20V72zM36,72h44v8H36V72z"></path></svg></a></div></div></nav><nav id="jr-dash" class="noselect"><nav id="jr-dash" class="noselect"><div id="jr-pi" class="hidden"><a id="jr-pi-prev" class="hidden" title="Previous page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a><div class="pginfo">Page <i class="jr-pg-pn">0</i> of <i class="jr-pg-lp">0</i></div><a id="jr-pi-next" class="hidden" title="Next page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div><div id="jr-is-tb"><a id="jr-is-sw" class="btn wsprkl hidden" title="Switch between Figures/Tables strip and Progress bar"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><rect x="10" y="40" width="20" height="20"></rect><rect x="40" y="40" width="20" height="20"></rect><rect x="70" y="40" width="20" height="20"></rect></svg></a></div><nav id="jr-istrip" class="istrip hidden"><a id="jr-is-prev" href="#" class="hidden" title="Previous"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M80,40 60,65 80,90 70,90 50,65 70,40z M50,40 30,65 50,90 40,90 20,65 40,40z"></path><text x="35" y="25" textLength="60" style="font-size:25px">Prev</text></svg></a><a id="jr-is-next" href="#" class="hidden" title="Next"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,40 40,65 20,90 30,90 50,65 30,40z M50,40 70,65 50,90 60,90 80,65 60,40z"></path><text x="15" y="25" textLength="60" style="font-size:25px">Next</text></svg></a></nav><nav id="jr-progress"></nav></nav></nav><aside id="jr-links-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">NCBI Bookshelf</div></div><div class="cnt lol f1"><a href="/books/">Home</a><a href="/books/browse/">Browse All Titles</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://www.facebook.com/sharer/sharer.php?u=https://www.ncbi.nlm.nih.gov/books/NBK21091/"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24" preserveAspectRatio="none"><g><path d="M 17.996,32L 12,32 L 12,16 l-4,0 l0-5.514 l 4-0.002l-0.006-3.248C 11.993,2.737, 13.213,0, 18.512,0l 4.412,0 l0,5.515 l-2.757,0 c-2.063,0-2.163,0.77-2.163,2.209l-0.008,2.76l 4.959,0 l-0.585,5.514L 18,16L 17.996,32z"></path></g></svg> Share on Facebook</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://twitter.com/intent/tweet?url=https://www.ncbi.nlm.nih.gov/books/NBK21091/&amp;text=The%20Reference%20Sequence%20(RefSeq)%20Database"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24"><g><path d="M 32,6.076c-1.177,0.522-2.443,0.875-3.771,1.034c 1.355-0.813, 2.396-2.099, 2.887-3.632 c-1.269,0.752-2.674,1.299-4.169,1.593c-1.198-1.276-2.904-2.073-4.792-2.073c-3.626,0-6.565,2.939-6.565,6.565 c0,0.515, 0.058,1.016, 0.17,1.496c-5.456-0.274-10.294-2.888-13.532-6.86c-0.565,0.97-0.889,2.097-0.889,3.301 c0,2.278, 1.159,4.287, 2.921,5.465c-1.076-0.034-2.088-0.329-2.974-0.821c-0.001,0.027-0.001,0.055-0.001,0.083 c0,3.181, 2.263,5.834, 5.266,6.438c-0.551,0.15-1.131,0.23-1.73,0.23c-0.423,0-0.834-0.041-1.235-0.118 c 0.836,2.608, 3.26,4.506, 6.133,4.559c-2.247,1.761-5.078,2.81-8.154,2.81c-0.53,0-1.052-0.031-1.566-0.092 c 2.905,1.863, 6.356,2.95, 10.064,2.95c 12.076,0, 18.679-10.004, 18.679-18.68c0-0.285-0.006-0.568-0.019-0.849 C 30.007,8.548, 31.12,7.392, 32,6.076z"></path></g></svg> Share on Twitter</a></div></aside><aside id="jr-rtoc-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Table of Content</div></div><div class="cnt lol f1"><a href="/books/n/handbook/?report=reader">Title Information</a><a href="/books/n/handbook/toc/?report=reader">Table of Contents Page</a></div></aside><aside id="jr-help-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Settings</div></div><div class="cnt f1"><div id="jr-typo-p" class="typo"><div><a class="sf btn wsprkl">A-</a><a class="lf btn wsprkl">A+</a></div><div><a class="bcol-auto btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 100" preserveAspectRatio="none"><text x="10" y="70" style="font-size:60px;font-family: Trebuchet MS, ArialMT, Arial, sans-serif" textLength="180">AUTO</text></svg></a><a class="bcol-1 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M15,25 85,25zM15,40 85,40zM15,55 85,55zM15,70 85,70z"></path></svg></a><a class="bcol-2 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M5,25 45,25z M55,25 95,25zM5,40 45,40z M55,40 95,40zM5,55 45,55z M55,55 95,55zM5,70 45,70z M55,70 95,70z"></path></svg></a></div></div><div class="lol"><a class="" href="/books/NBK21091/?report=classic">Switch to classic view</a><a href="/books/NBK21091/pdf/Bookshelf_NBK21091.pdf">PDF (863K)</a><a href="/books/n/handbook/pdf/">PDF (7.2M)</a><a href="/books/NBK21091/?report=printable">Print View</a></div></div></aside><aside id="jr-bkhelp-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Help</div></div><div class="cnt f1 lol"><a id="jr-helpobj-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/help.xml" href="">Help</a><a href="mailto:info@ncbi.nlm.nih.gov?subject=PubReader%20feedback%20%2F%20NBK21091%20%2F%20sid%3ACE8B5AF87C7FFCB1_0191SID%20%2F%20phid%3ACE8B9BD67C9C0A8100000000002D0027.4">Send us feedback</a><a id="jr-about-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/about.xml" href="">About PubReader</a></div></aside><aside id="jr-objectbox" class="thidden hidden"><div class="jr-objectbox-close wsprkl">&#10008;</div><div class="jr-objectbox-inner cnt"><div class="jr-objectbox-drawer"></div></div></aside><nav id="jr-pm-left" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Previous Page</text></svg></nav><nav id="jr-pm-right" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Next Page</text></svg></nav><nav id="jr-fip" class="hidden"><nav id="jr-fip-term-p"><input type="search" placeholder="search this page" id="jr-fip-term" autocorrect="off" autocomplete="off" /><a id="jr-fip-mg" class="wsprkl btn" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-fip-done" class="wsprkl btn" title="Dismiss find">&#10008;</a></nav><nav id="jr-fip-info-p"><a id="jr-fip-prev" class="wsprkl btn" title="Jump to previuos match">&#9664;</a><button id="jr-fip-matches">no matches yet</button><a id="jr-fip-next" class="wsprkl btn" title="Jump to next match">&#9654;</a></nav></nav></div><div id="jr-epub-interstitial" class="hidden"></div><div id="jr-content"><article data-type="main"><p class="vip-notice"><strong><a href="/books/n/handbook2e/?report=reader">See "The NCBI Handbook, 2nd Edition"</a></strong></p><p class="vip-notice retraction"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p><div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><div class="fm-sec"><h1 id="_NBK21091_"><span class="label">Chapter 18</span><span class="title" itemprop="name">The Reference Sequence (RefSeq) Database</span></h1><p class="contribs">Pruitt K, Brown G, Tatusova T, et al.</p><p class="fm-aai"><a href="#_NBK21091_pubdet_">Publication Details</a></p><p><em>Estimated reading time: 24 minutes</em></p></div></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><div id="ch18.Summary"><h2 id="_ch18_Summary_">Summary</h2><p>NCBI&#x02019;s Reference Sequence (RefSeq) database is a collection of taxonomically diverse, non-redundant and richly annotated sequences representing naturally occurring molecules of DNA, RNA, and protein. Included are sequences from plasmids, organelles, viruses, archaea, bacteria, and eukaryotes. Each RefSeq is constructed wholly from sequence data submitted to the International Nucleotide Sequence Database Collaboration (INSDC). Similar to a review article, a RefSeq is a synthesis of information integrated across multiple sources at a given time. RefSeqs provide a foundation for uniting sequence data with genetic and functional information. They are generated to provide reference standards for multiple purposes ranging from genome annotation to reporting locations of sequence variation in medical records. The RefSeq collection is available without restriction and can be retrieved in several different ways, such as by searching or by available links in NCBI resources, including <a href="/sites/entrez?db=pubmed" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">PubMed</a>, <a href="/entrez/query.fcgi?db=Nucleotide" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Nucleotide</a>, <a href="/entrez/query.fcgi?db=Protein" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein</a>, <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a>, and <a href="/mapview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Map Viewer</a>, searching with a sequence via <a href="/BLAST/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLAST</a>, and downloading from the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">RefSeq FTP</a> site.</p><p>This chapter describes:</p><ul><li class="half_rhythm"><div>The database content</div></li><li class="half_rhythm"><div>How data are assembled and maintained</div></li><li class="half_rhythm"><div>How RefSeqs can be accessed and retrieved</div></li></ul></div><div id="ch18.Introduction"><h2 id="_ch18_Introduction_">Introduction</h2><p>NCBI&#x02019;s Reference Sequence (RefSeq) collection is a freely accessible database of naturally occurring DNA, RNA, and protein sequences. It is a unique resource because it provides a large, multi-species, curated sequence database representing separate but explicitly linked records from genomes to transcripts and translation products, as appropriate. Unlike the sequence redundancy found in the public sequence repositories that comprise the <a href="http://www.insdc.org" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a>, (<i>i.e.</i>, NCBI&#x02019;s <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a>, the <a href="http://www.ebi.ac.uk/ena/home" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">European Nucleotide Archive</a> [ENA], and the <a href="http://www.ddbj.nig.ac.jp/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">DNA Data Bank of Japan</a> [DDBJ]), the RefSeq collection aims to provide, for each included species, a complete set of non-redundant, extensively cross-linked, and richly annotated nucleic acid and protein records. It is recognized, however, that the coverage and finishing of public sequence data varies from organism to organism so intermediate genomic records are provided in some circumstances.</p><p>The non-redundant nature of the RefSeq collection facilitates database inquiries based on genomic location, sequence, or text annotation. Be aware, however, that the RefSeq collection does include alternatively spliced transcripts encoding the same protein or distinct protein isoforms, in addition to orthologs, paralogs, and alternative haplotypes for some organisms, which will affect the outcome of a database query.</p><p>RefSeq records are based on sequence records submitted to the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a>. However, the RefSeq collection is a distinct database. The public archival databases house sequences and annotations supplied by original authors and cannot be altered by others. The RefSeq collection differs from the archival databases in the same way that a review article differs from a related collection of primary research articles on the same subject. Each RefSeq record represents a synthesis, by a person or group, of the primary information that was generated and submitted by others. Other organizing principles or standards of judgment are possible, which is why the work is attributed to the synthesizing "editors". The RefSeq dataset is curated on an ongoing basis by collaborating groups and by NCBI staff. Sequence records are presented in a standard format and subjected to computational validation. The <a href="http://www.insdc.org" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> source of the RefSeq record, the curation status, and attribution to the curation group are also indicated.</p><p>The RefSeq collection establishes a useful baseline for integrating diverse data types, including sequence, genetic, expression, and functional information, into one consistent framework with a uniform set of conventions and standards. The RefSeq collection supports the following activities:</p><ul><li class="half_rhythm"><div>genome annotation</div></li><li class="half_rhythm"><div>gene characterization</div></li><li class="half_rhythm"><div>comparative genomics</div></li><li class="half_rhythm"><div>reporting sequence variation, and</div></li><li class="half_rhythm"><div>expression studies</div></li></ul></div><div id="ch18.Database_Content_Background"><h2 id="_ch18_Database_Content_Background_">Database Content: Background</h2><p>The May 2011 RefSeq collection (Release 47) includes sequences from more than 12,000 distinct taxonomic identifiers, ranging from viruses to bacteria to eukaryotes. It represents chromosomes, organelles, plasmids, viruses, transcripts, and more than 12.6 million proteins. Every sequence has a stable accession number, a version number, and an integer identifier (gi) assigned to it. Outdated versions are always available if a sequence is updated. RefSeq records can be distinguished from <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> records by the inclusion of an underscore (&#x0201c;_&#x0201d;) at the third position of the accession number. The RefSeq accession prefix has an implied meaning in terms of the type of molecule it represents, as outlined in <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" rid-figpopup="figch18Trefseqaccessionnumbersandmole" rid-ob="figobch18Trefseqaccessionnumbersandmole">Table 1</a>.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Trefseqaccessionnumbersandmole"><a href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" title="Table 1. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Trefseqaccessionnumbersandmole" rid-ob="figobch18Trefseqaccessionnumbersandmole"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=thumb" src-large="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=previmg" alt="Table 1. . RefSeq accession numbers and molecule types." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.refseq_accession_numbers_and_mole"><a href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" rid-ob="figobch18Trefseqaccessionnumbersandmole">Table 1. </a></h4><p class="float-caption no_bottom_margin">RefSeq accession numbers and molecule types. </p></div></div><div id="ch18.Updates"><h3>Updates</h3><p>RefSeq updates are provided daily. These include new records added to the collection, and records updated to reflect sequence or annotation changes, including complete re-annotation of a genome. New and updated records are made available in Entrez and <a href="/BLAST/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLAST</a> databases as soon as possible. The <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">RefSeq FTP site</a> also provides daily update information.</p></div><div id="ch18.Flat_File_Format_and_Annotated_Feat"><h3>Flat File Format and Annotated Features</h3><p>RefSeq records appear similar in format to <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> records. Attributes novel to RefSeq records include a unique accession prefix followed by an underscore (<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" rid-figpopup="figch18Trefseqaccessionnumbersandmole" rid-ob="figobch18Trefseqaccessionnumbersandmole">Table 1</a>) and a <span class="bk_pgobj">COMMENT</span> field that indicates the RefSeq <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> and the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> source of the sequence information (Figures <a class="figpopup" href="/books/NBK21091/figure/ch18.F1A/?report=objectonly" target="object" rid-figpopup="figch18F1A" rid-ob="figobch18F1A">1A</a>, <a class="figpopup" href="/books/NBK21091/figure/ch18.F1B/?report=objectonly" target="object" rid-figpopup="figch18F1B" rid-ob="figobch18F1B">1B</a>, <a class="figpopup" href="/books/NBK21091/figure/ch18.F1C/?report=objectonly" target="object" rid-figpopup="figch18F1C" rid-ob="figobch18F1C">1C</a>, and <a class="figpopup" href="/books/NBK21091/figure/ch18.F1D/?report=objectonly" target="object" rid-figpopup="figch18F1D" rid-ob="figobch18F1D">1D</a>). For human RefSeqs, the <span class="bk_pgobj">COMMENT</span> field also indicates whether the RefSeq is a reference standard from the <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> project. Some RefSeq records may include feature annotations or database cross-references (db_xrefs) that are not seen in the underlying <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> record. This annotation is provided by computation and by manual curation. For example, nucleotide variation, STS, and tRNA features are computed for a subset of RefSeq entries using the data available in <a href="/projects/SNP/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">dbSNP</a> (<a href="/books/n/handbook/ch5/?report=reader">Chapter 5</a>), <a href="/entrez/query.fcgi?db=unists" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">UniSTS</a>, and through tRNA-scan prediction (<a href="/pubmed?term=9023104%5buid%5d" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Lowe and Eddy, 1997</a>). For human and mouse, exon feature annotation is also calculated for RefSeq transcript and non-transcribed pseudogene records. Db_xrefs provide links to <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a>, nomenclature authorities, such as the HUGO Gene Nomenclature Committee (<a href="http://www.genenames.org/index.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">HGNC</a>) for human RefSeq records, and to the Consensus CDS (<a href="/projects/CCDS/CcdsBrowse.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">CCDS</a>) project. RefSeq proteins also report conserved domains computed by NCBI's <a href="/entrez/query.fcgi?db=cdd" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Conserved Domain Database</a> (<a href="/books/n/handbook/ch3/?report=reader">Chapter 3</a>). Additional protein features are propagated from the corresponding <a href="http://www.uniprot.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">UniProtKB/Swiss-Prot</a> records for a subset of species. Other nucleotide and protein features, publications, and comments may be added by collaborating groups or NCBI staff.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Trefseqstatuscodes"><a href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" title="Table 2. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=thumb" src-large="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=previmg" alt="Table 2. . RefSeq status codes." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.refseq_status_codes"><a href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-ob="figobch18Trefseqstatuscodes">Table 2. </a></h4><p class="float-caption no_bottom_margin">RefSeq status codes. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F1A" co-legend-rid="figlgndch18F1A"><a href="/books/NBK21091/figure/ch18.F1A/?report=objectonly" target="object" title="Figure 1A. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F1A" rid-ob="figobch18F1A"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image001.gif" src-large="/books/NBK21091/bin/ch18-Image001.jpg" alt="Figure 1A. . Features of a RefSeq record." /></a><div class="icnblk_cntnt" id="figlgndch18F1A"><h4 id="ch18.F1A"><a href="/books/NBK21091/figure/ch18.F1A/?report=objectonly" target="object" rid-ob="figobch18F1A">Figure 1A. </a></h4><p class="float-caption no_bottom_margin">Features of a RefSeq record. The beginning of a RefSeq record when displayed in the GenBank flat file format is shown. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F1B" co-legend-rid="figlgndch18F1B"><a href="/books/NBK21091/figure/ch18.F1B/?report=objectonly" target="object" title="Figure 1B. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F1B" rid-ob="figobch18F1B"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image002.gif" src-large="/books/NBK21091/bin/ch18-Image002.jpg" alt="Figure 1B. . The COMMENT and PRIMARY sections." /></a><div class="icnblk_cntnt" id="figlgndch18F1B"><h4 id="ch18.F1B"><a href="/books/NBK21091/figure/ch18.F1B/?report=objectonly" target="object" rid-ob="figobch18F1B">Figure 1B. </a></h4><p class="float-caption no_bottom_margin">The COMMENT and PRIMARY sections. The gene Summary is provided for RefSeqs with a REVIEWED status only. The PRIMARY block, providing the RefSeq assembly details, is displayed for vertebrate records predominantly. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F1C" co-legend-rid="figlgndch18F1C"><a href="/books/NBK21091/figure/ch18.F1C/?report=objectonly" target="object" title="Figure 1C. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F1C" rid-ob="figobch18F1C"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image003.gif" src-large="/books/NBK21091/bin/ch18-Image003.jpg" alt="Figure 1C. . The FEATURES section." /></a><div class="icnblk_cntnt" id="figlgndch18F1C"><h4 id="ch18.F1C"><a href="/books/NBK21091/figure/ch18.F1C/?report=objectonly" target="object" rid-ob="figobch18F1C">Figure 1C. </a></h4><p class="float-caption no_bottom_margin">The FEATURES section. Only a subset of the available feature annotation is shown. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F1D" co-legend-rid="figlgndch18F1D"><a href="/books/NBK21091/figure/ch18.F1D/?report=objectonly" target="object" title="Figure 1D. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F1D" rid-ob="figobch18F1D"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image004.gif" src-large="/books/NBK21091/bin/ch18-Image004.jpg" alt="Figure 1D. . NCBI&#x02019;s Sequence Viewer." /></a><div class="icnblk_cntnt" id="figlgndch18F1D"><h4 id="ch18.F1D"><a href="/books/NBK21091/figure/ch18.F1D/?report=objectonly" target="object" rid-ob="figobch18F1D">Figure 1D. </a></h4><p class="float-caption no_bottom_margin">NCBI&#x02019;s Sequence Viewer. The annotated features on a RefSeq record can be displayed in a graphical format (note the link &#x02018;Graphics&#x02019; in Figure 1A). The display can be modified by following the &#x02018;Configure&#x02019; link. The <a href="/books/NBK21091/figure/ch18.F1D/?report=objectonly" target="object" rid-ob="figobch18F1D">(more...)</a></p></div></div></div></div><div id="ch18.Assembling_and_Maintaining_the_RefS"><h2 id="_ch18_Assembling_and_Maintaining_the_RefS_">Assembling and Maintaining the RefSeq Collection</h2><div id="ch18.Summary_1"><h3>Summary</h3><p>The <a href="/RefSeq/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeq</a> collection is the result of data extraction from <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submissions, curation, and computation, combined with extensive collaboration with authoritative groups. Each molecule is annotated as accurately as possible with the organism name, strain (or breed, ecotype, cultivar, or isolate), gene symbol for that organism, and informative protein name. Collaborations with authoritative groups outside of NCBI provide a variety of information, including curated sequence data, nomenclature, feature annotations, and links to external organism-specific resources. When no collaboration has been established, NCBI staff assembles the data from the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submission. Each record has a <span class="bk_pgobj">COMMENT</span>, indicating the level of curation that it has received (<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">Table 2</a>), and attribution of the collaborating group. Thus, a RefSeq record may be an essentially unchanged, validated copy of the original <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submission, or include updated or additional information supplied by collaborators or NCBI staff.</p><p>If multiple <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submissions represent the same molecule for an organism, the "best" sequence is chosen to represent as the RefSeq record. Known mutations, sequencing errors, cloning artifacts and erroneous annotation are avoided. Sequences are validated to confirm that the genomic sequence corresponding to an annotated mRNA feature matches the mRNA sequence record, and that coding region features translate into the corresponding protein sequence.</p><p>Working groups using distinct process pipelines compile the RefSeq collection for different organisms (<a class="figpopup" href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" rid-figpopup="figch18F2" rid-ob="figobch18F2">Figure 2</a>). RefSeq records are provided via several distinct approaches including:</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F2" co-legend-rid="figlgndch18F2"><a href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" title="Figure 2. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F2" rid-ob="figobch18F2"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image005.gif" src-large="/books/NBK21091/bin/ch18-Image005.jpg" alt="Figure 2. . RefSeq Processing Pipelines." /></a><div class="icnblk_cntnt" id="figlgndch18F2"><h4 id="ch18.F2"><a href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" rid-ob="figobch18F2">Figure 2. </a></h4><p class="float-caption no_bottom_margin">RefSeq Processing Pipelines. Sequence data deposited in the public archival databases is available for RefSeq processing. Processing pipelines include the vertebrate curation pipeline, the computational genome annotation pipeline, and extraction from <a href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" rid-ob="figobch18F2">(more...)</a></p></div></div><ul><li class="half_rhythm"><div>collaboration</div></li><li class="half_rhythm"><div>extraction from GenBank</div></li><li class="half_rhythm"><div>computational genome annotation pipeline</div></li><li class="half_rhythm"><div>curation by NCBI staff</div></li></ul></div><div id="ch18.Collaboration"><h3>Collaboration</h3><p>RefSeq welcomes collaborations with authoritative groups outside of NCBI that are willing to provide sequences, nomenclature, annotation, or links to phenotypic or organism-specific resources. The RefSeq <a href="/RefSeq/update.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">feedback form</a> can be used to provide corrections or to initiate collaboration. The extent of collaboration may vary. For some species, the sequences and annotation of the entire RefSeq collection is provided by a collaborating authoritative group (see <a class="figpopup" href="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=objectonly" target="object" rid-figpopup="figch18Texamplesofcollaboratorswhocon" rid-ob="figobch18Texamplesofcollaboratorswhocon">Table 3</a> for examples). For others, most notably the human and mouse RefSeq collections, numerous collaborations with individual scientists contribute to the representation of specific genes or complete gene families. Nomenclature for human and mouse is also provided via collaboration with the HUGO Gene Nomenclature Committee (<a href="http://www.genenames.org/index.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">HGNC</a>) and the Mouse Genome Informatics group (<a href="http://www.informatics.jax.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">MGI</a>), respectively; <a class="figpopup" href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object" rid-figpopup="figch18Texamplesofcollaboratinggroups" rid-ob="figobch18Texamplesofcollaboratinggroups">Table 4</a> provides additional examples. Other collaborations extend across entire sets of organisms; for example, a board of <a href="/genomes/GenomesHome.cgi?taxid=10239&#x00026;hopt=advisors" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Viral Genomes Advisors</a> supports curation of the viral RefSeq collection. Thus, RefSeq records may contain information provided by an external authoritative source and/or analyses and curation at NCBI. The collaborating group is identified on the record.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Texamplesofcollaboratorswhocon"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=objectonly" target="object" title="Table 3. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Texamplesofcollaboratorswhocon" rid-ob="figobch18Texamplesofcollaboratorswhocon"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=thumb" src-large="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=previmg" alt="Table 3. . Examples of collaborators who contribute RefSeq records." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.examples_of_collaborators_who_con"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=objectonly" target="object" rid-ob="figobch18Texamplesofcollaboratorswhocon">Table 3. </a></h4><p class="float-caption no_bottom_margin">Examples of collaborators who contribute RefSeq records. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Texamplesofcollaboratinggroups"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object" title="Table 4. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Texamplesofcollaboratinggroups" rid-ob="figobch18Texamplesofcollaboratinggroups"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=thumb" src-large="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=previmg" alt="Table 4. " /></a><div class="icnblk_cntnt"><h4 id="ch18.T.examples_of_collaborating_groups"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object" rid-ob="figobch18Texamplesofcollaboratinggroups">Table 4. </a></h4><p class="float-caption no_bottom_margin">Examples of collaborating groups </p></div></div><p>Processing of RefSeq records supplied entirely by an external group is largely automated. The sequence and/or annotation is periodically submitted, validated to detect conflicts in the annotation, and modified slightly to format the submission as a RefSeq record, including addition of db_xrefs to <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a>. NCBI staff do not directly curate the annotation or modify the sequence of RefSeq records provided by collaborating groups. Any problems identified by the validation process or by the scientific community are reported to the submitting group, and any update made to the annotation or sequence is reflected in a future RefSeq release.</p></div><div id="ch18.Extraction_from_GenBank_records"><h3>Extraction from GenBank records</h3><p>Complete genome data for viruses, organelles, prokaryotes, and some eukaryotes is propagated to RefSeq records from the whole genome sequence data and annotation available in <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> (also in the ENA and DDBJ public archives). Generally, an initial validation step is performed before the RefSeq record is made public. The resulting RefSeq record is a copy of the <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission but may contain some additional annotations as a result of the validation step. In particular, transcripts are provided as separate RefSeq records for most eukaryotic organisms; the <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission of the genome sequence from which the RefSeq record is propagated instantiates the protein only, not the transcript.</p><p>This process flow is supported by the <a href="/bioproject" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BioProject</a> and <a href="/entrez/query.fcgi?db=genome" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genome</a> databases. The <a href="/bioproject" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BioProject</a> database tracks the status of whole-genome sequencing projects submitted to <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a>, other types of large-scale projects, and provides an overview of the organism and links to data and other resources. The resulting genomic RefSeq data is represented in the <a href="/sites/genome/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genome</a> database, which includes bacteria, archaea, eukaryotes, viroids, viruses, plasmids, and organelles. The <a href="/sites/genome/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genome</a> website provides custom displays, analysis, and tools for prokaryotic and some eukaryotic genomes (see <a class="figpopup" href="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=objectonly" target="object" rid-figpopup="figch18Tselectedentrezgenomeresources" rid-ob="figobch18Tselectedentrezgenomeresources">Table 5</a>).</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Tselectedentrezgenomeresources"><a href="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=objectonly" target="object" title="Table 5. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Tselectedentrezgenomeresources" rid-ob="figobch18Tselectedentrezgenomeresources"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=thumb" src-large="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=previmg" alt="Table 5. . Selected Entrez Genome resources." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.selected_entrez_genome_resources"><a href="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=objectonly" target="object" rid-ob="figobch18Tselectedentrezgenomeresources">Table 5. </a></h4><p class="float-caption no_bottom_margin">Selected Entrez Genome resources. </p></div></div><p>Note that processing of most eukaryotic genomes is more complex, requires more than basic extraction from <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a>, and occurs independently, largely because the volume of data is significantly greater.</p><p>Extraction of <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> whole genome data for processing into RefSeq records falls into four primary categories: <a href="#ch18.Chromosomes">chromosomes</a>, <a href="#ch18.Microbial_genomes">microbial genomes</a>, <a href="#ch18.Small_complete_genomes">small complete genomes</a>, <a href="#ch18.Targeted_loci">and targeted loci</a>.</p><div id="ch18.Chromosomes"><h4>Chromosomes</h4><p>Complete chromosome sequence assembled from individual clones (that are themselves available from the <a href="http://www.insdc.org" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a>) is propagated into a RefSeq record. For some genomes, the RefSeq representation uses a unit of interest to the research community; for example, some of the RefSeq genomic records for <i>Drosophila melanogaster</i> represent chromosome arms rather than complete chromosomes. RefSeq records may also be available for some genomes that are not yet fully sequenced but for which complete sequence is available for individual chromosomes. These complete chromosome RefSeq records may be annotated by the NCBI computational annotation pipeline, or they may be curated by an organism-specific collaborating group and undergo NCBI validation before being released.</p></div><div id="ch18.Microbial_genomes"><h4>Microbial genomes</h4><p>For microbial species, historically all complete and draft genomes submitted to <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> were propagated to the RefSeq collection. This is no longer tenable because of the volume of genomic data being generated, so additional RefSeq records are created from new <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submissions only to span the taxonomic diversity; this means in general, one genomic RefSeq per species is provided. If significant sequence diversity exists, or if subspecies or subgroups require representation as determined by NCBI staff, more than one RefSeq may exist for a given species.</p></div><div id="ch18.Small_complete_genomes"><h4>Small complete genomes</h4><p>RefSeq records representing organelle, viral, and plasmid genomes are based on single <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> records. For organelle and viral genomes, if more than one <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission is available for a species, typically only one is chosen to propagate to the RefSeq collection. Various factors, including the level of annotation, strain information, and community input are considered when deciding which <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission to represent. There is no plasmid taxonomy; a <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission is propagated to the RefSeq collection if it is part of a larger registered genome sequencing project, or if it exhibits significant sequence divergence when compared to other plasmids.</p></div><div id="ch18.Targeted_loci"><h4>Targeted loci</h4><p>The <a href="/genomes/static/refseqtarget.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeq Targeted Loci Project</a> is a collaborative effort to curate and maintain molecular markers of use in the identification and classification of organisms. The initial focus is on ribosomal RNAs, although expansion to other informative sequences is anticipated. From <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submissions, the project creates RefSeq records for the small subunit of ribosomal RNA (16S in prokaryotes and 18S in eukaryotes) and the large subunit ribosomal RNA (23S in prokaryotes and 28S in eukaryotes). As of November 2010, there are 3331 16S rDNA RefSeq records from bacteria and archaea and 137 18S rDNA, and 97 28S rDNA RefSeq records from fungi.</p></div></div><div id="ch18.Computational_Genome_Annotation_Pip"><h3>Computational Genome Annotation Pipeline</h3><p>NCBI computes annotation of genomic sequence data for some genomes including some microbes, vertebrates (<i>e.g.</i>, human, mouse, rat, cow, and zebrafish, and others) and invertebrates (<i>e.g.</i>, honey bee, acorn worm, and pea aphid). The annotation pipeline is automated and yields genomic, transcript, and protein (when appropriate) RefSeq records. Names annotated on the transcript and protein products are based on sequence similarity. Annotation data are refreshed periodically, and records generated from this process flow are not curated or updated between annotation runs (see <a href="/books/n/handbook/ch14/?report=reader">Chapter 14</a> for more information on the eukaryotic genome annotation pipeline; information about NCBI&#x02019;s prokaryotic annotation pipeline is also <a href="/genomes/static/Pipeline.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">available</a>). For some species, including human, RefSeq records may be provided by a mixture of methods. In other words, there may be a set of curated transcript and protein records (see the following section) in addition to a set of records generated computationally. RefSeq records that are processed by NCBI's pipelines are displayed in the NCBI <a href="/mapview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Map Viewer</a> (<a href="/books/n/handbook/ch20/?report=reader">Chapter 20</a>), included in <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a>, and are available in NCBI&#x02019;s sequence databases.</p></div><div id="ch18.Curation_by_NCBI_Staff"><h3>Curation by NCBI Staff</h3><p>A portion of the RefSeq dataset is curated by NCBI staff. This subset includes viral, mitochondrial, vertebrate, and some invertebrate organisms. Most bacterial, plant, and fungal records are provided either by collaboration or by processing the annotated genome data submitted to the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a>; however, a small number of bacterial genomes are annotated and curated by NCBI staff.</p><div id="ch18.Curation_of_Microbial_Viral_and_Mit"><h4>Curation of Microbial, Viral, and Mitochondrial RefSeqs</h4><p>Microbial, viral, and metazoan mitochondrial RefSeq records are validated for content propagated from the original <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission, including taxonomy, publications, and annotation, prior to becoming public. This content may be modified, augmented, or deleted by NCBI curation staff.</p><p>For microbial genomes, a set of minimal annotation standards (described <a href="/genomes/AnnotationWorkshop.html#refAD" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">here</a>) are automatically provided on all legacy and new RefSeq records. These include ribosomal RNAs, transfer RNAs, and protein-coding genes with locus_tags. Ribosomal RNAs are predicted using BLASTn tools against an RNA sequence database and/or using Infernal (Eddy, 2002) and Rfam models (Griffiths-Jones, et al, 2003). Transfer RNAs are predicted using tRNAscan-SE (Lowe and Eddy, 1997). Other annotation above the minimum standards may be added based on an external source or literature review. Annotation associated with the NCBI&#x02019;s <a href="/proteinclusters" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Clusters</a> database is also propagated to the RefSeq records (both proteins and genes) at selected intervals. The <a href="/proteinclusters" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Clusters</a> database is a collection of RefSeq proteins from complete genomes broadly organized into the following groups: archeal and bacterial genomes and plasmids, viruses, protists, plants, and chloroplasts and mitochondria, and annotated based on sequence similarity and protein function. This clustering allows the entire group to be curated as a single set, permitting well characterized proteins to seed the annotation of less studied ones within the same cluster. NCBI staff use literature and information from other databases, including <a href="http://www.uniprot.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">UniProtKB/Swiss-Prot</a>, to annotate each cluster with standardized protein names, biochemical descriptions, and other data, which is then transferred to individual proteins within the relevant RefSeq records. A microbial genome RefSeq record typically has a <span class="bk_pgobj">PROVISIONAL</span> review <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a>.</p><p>Annotation of viral genomes relies on an established group of <a href="/genomes/GenomesHome.cgi?taxid=10239&#x00026;hopt=advisors" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Viral RefSeq Genome Advisors</a>, members of the <a href="http://www.ictvonline.org/index.asp?bhcp=1" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">International Committee on the Taxonomy of Viruses</a>, and other experts outside of NCBI. For example, the HIV-1 RefSeq (<a href="/nuccore/NC_001802" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">NC_001802</a>) was curated by NCBI staff in collaboration with the authors of the book <a href="/books/NBK19376/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Retroviruses</a>, and many of the adenovirus and herpesvirus records have been curated by outside experts. Based on literature review, NCBI curators may modify the CDS and RNA annotation compared to the <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission, as was done for the Measles virus RefSeq record (<a href="/nuccore/NC_001498" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">NC_001498</a>). Additional NCBI resources used during the curation of viral RefSeq records include the <a href="/proteinclusters" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Clusters</a> database and <a href="/sutils/pasc/viridty.cgi?textpage=documentation#refAA" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">PASC</a>, a virus classification tool used to validate the taxonomy of virus RefSeq records across a number of taxonomic families. NCBI also maintains several specialized annotation pipelines for use in the <a href="/genomes/VirusVariation/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Virus Variation</a> and <a href="/genomes/FLU/FLU.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Influenza Virus</a> resources. Manually curated viral RefSeq records are annotated with a <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">REVIEWED</span> or <span class="bk_pgobj">VALIDATED</span> in the RefSeq COMMENT block.</p><p>For metazoan mitochondrial RefSeq records, standardized protein, gene, and RNA names are annotated independent of species-specific nomenclature guidelines. Additional curation may include adding common names or missing tRNAs and adjusting the coding region spans based on the <a href="/sites/entrez?db=proteinclusters" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Clusters</a> database. Curated metazoan mitochondrial records are annotated with a <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">REVIEWED</span>. Non-metazoan and plant chloroplast RefSeq records are not curated, are derived entirely from the original <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submission, and have a <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">PROVISIONAL</span>.</p><p>For targeted loci, vector or primer sequence from the <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission is excluded from the RefSeq record. Any feature annotation may be modified to represent a standard format, and collection identifiers and publications referencing the original <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> submission may be added.</p></div><div id="ch18.Curation_of_Vertebrate_and_Inverteb"><h4>Curation of Vertebrate and Invertebrate Records</h4><p>Curation of higher eukaryotic organisms is focused on mammalian genomes, especially human and mouse, but also includes many other species with existing or planned genome assemblies. The RefSeq processing for these organisms provides transcripts and protein records as well as some genomic region records representing gene clusters or pseudogenes; these genomic region records facilitate genome-wide annotation. Because RefSeq uses evidence independent of a genome assembly to represent RNAs and proteins, the dataset can represent sequence not currently part of that genome assembly. RefSeq processing integrates the official nomenclature and other information, including alternate names, <a href="http://www.geneontology.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene Ontology</a> (GO) terms, and literature and <a href="/projects/GeneRIF/GeneRIFhelp.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GeneRIFs</a> available in <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a>. Multiple collaborations support the collection of this descriptive information (<a class="figpopup" href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object" rid-figpopup="figch18Texamplesofcollaboratinggroups" rid-ob="figobch18Texamplesofcollaboratinggroups">Table 4</a>; see also <a href="/books/n/handbook/ch19/?report=reader">Chapter 19</a>).</p><p>Sequences enter RefSeq curation processing by a combination of computational analysis, collaboration, and in-house curation. As illustrated in <a class="figpopup" href="/books/NBK21091/figure/ch18.F2/?report=objectonly" target="object" rid-figpopup="figch18F2" rid-ob="figobch18F2">Figure 2</a>, generation of the initial RefSeq record depends on identifying a representative sequence for a gene. New genes and sequence data are added to the in-house version of the <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> database by RefSeq curators, collaborators, NCBI&#x02019;s genome annotation pipeline, and NCBI-based mining of <a href="/entrez/query.fcgi?db=unigene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">UniGene</a>, cDNA alignments, and <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submissions. Quality assessment (QA) processes are executed regularly to identify questionable data for review. These assessments include analysis of nomenclature, sequence similarity, genomic placement, and potential cloning errors (<i>e.g.</i>, chimeras). The QA steps also leverage data from other NCBI resources, including <a href="/entrez/query.fcgi?db=homologene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">HomoloGene</a>, <a href="/mapview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Map Viewer</a>, and <a href="/genbank/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank</a> related sequences. Data conflicts must be resolved before the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submission is used to generate a RefSeq record.</p><p>A sequence record unambiguously associated with a <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> record may be propagated into a RefSeq record. The completeness of the sequence (<i>e.g.</i>, complete vs. partial CDS) and the category of the gene (<i>e.g.</i>, protein coding, pseudogene) determine whether a RefSeq will be made, and if so, of what type (DNA, RNA, mRNA plus protein). RefSeq records are not made for incomplete proteins, transposable elements, or those loci for which the product type is uncertain (<i>e.g.</i>, protein coding or not). It should be noted, however, that the RefSeq collection does include partial transcripts and proteins that are provided by collaborating groups or when the RefSeq is based on an annotated whole genome sequence submitted to the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a>.</p><p>Once a suitable &#x0201c;source&#x0201d; sequence is identified, the RefSeq record is generated using the sequence data from the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submission and the annotation data from the in-house version of the <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> database. Information from <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> includes the GeneID, cross-references to other databases, official nomenclature, aliases, alternate descriptive names, map location, and citations, including those submitted as GeneRIFs. RefSeq records are also subject to programmatic validation to identify annotation format errors and to provide annotation in a more consistent format. Records at this stage have a <span class="bk_pgobj">PROVISIONAL</span>, <span class="bk_pgobj">PREDICTED</span>, or <span class="bk_pgobj">INFERRED</span>
<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> depending on the evidence existing in support of the <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> record.</p><p>RefSeq processing for non-protein-coding RNA loci uses the longest defining transcript record associated with the Gene record. For non-transcribed loci (such as non-transcribed pseudogenes), the RefSeq record is typically derived from a region of a larger genomic sequence. Curation of these types of records is minimal because the current focus is on curation of protein-coding loci; however, these records provide an important reagent for the computational annotation pipeline and support annotation of non-protein-coding genes that might otherwise be missed or misrepresented as a predicted protein-coding gene.</p><p>Other RefSeq records are provided to represent larger genomic regions, including <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> sequences, gene clusters, genes requiring rearrangement to express a product (immunoglobulins and T-cell receptors), and haplotypes with known differences in gene content. These genomic region records are annotated by NCBI curation staff, often in collaboration with scientific experts, and are not provided by automatic processing.</p><p><a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a>, a partner of the international Locus Reference Genomic (<a href="http://www.lrg-sequence.org" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">LRG</a>) collaboration, provides stable reference standard genomic, RNA, and protein RefSeqs for medically important genes. These standards support the <a href="http://www.hgvs.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">HGVS</a> expressions used to describe sequence variation in medical records, and thus are constructed to represent standard alleles. The <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> usually represents a single gene, on the positive strand of the sequence, beginning 5 Kb upstream and extending 2 kb downstream. <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> records also include alignments of the RefSeq transcripts for the gene. All sequences annotated on the <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> have a review <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">VALIDATED</span> or <span class="bk_pgobj">REVIEWED</span>.</p><p>Additional curation of vertebrate and some invertebrate RefSeq records occurs at the request of public users and collaborators, or as indicated by in-house QA analyses. QA analyses focus on, but are not restricted to, <a href="/homologene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">HomoloGene</a>-based reporting of inconsistent protein lengths, identification of RefSeqs with repeat elements, questions about gene-to-sequence associations or potentially redundant genes, and reports of genes annotated at one time on a genome but not during subsequent re-annotation of that genome. Additionally, alignment-based tests are conducted for human and mouse that identify RefSeq records with poor quality alignment to the genome, non-consensus splicing, or very short or very long exons. Review of these records by skilled curators results in the most current and complete representation of the nucleotide and protein sequence and feature annotation available at that time. Sequence review may allow removal of vector and linker sequence, extension of the UTRs to define the full-length transcript, modification of the CDS annotation associated with the original <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> source accession, or the creation of additional RefSeq records to represent the products of alternative splicing. A variety of feature annotations can be added to the RefSeq transcript and protein records. For nucleotide records, these include an indication of the transcript completeness, location of poly(A) signal and site, and sites of sequence variation and RNA editing. Exon annotation is provided for RefSeq transcripts and non-transcribed pseudogenes of human and mouse only; for transcripts, exon annotation is determined from the alignment of the transcript to the reference genome assembly using <a href="/sutils/splign/splign.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Splign</a>, and, for non-transcribed pseudogenes, from the <a href="/sutils/splign/splign.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Splign</a> alignment of the functional gene to the pseudogene genomic region. For protein records, feature annotations may include alternate or non-AUG initiating codons, Enzyme Commission (<a href="http://us.expasy.org/enzyme/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">EC</a>) numbers, mature peptide products, protein domains, and selenocysteine residues. Finally, literature review is another source of alternate names, aliases, and functional information, the latter which may be used to construct a Reference Sequence Summary on the RefSeq record. A RefSeq record that has undergone the complete review process has a <span class="bk_pgobj">REVIEWED</span>
<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a>. Note that for many genes, intermediate levels of manual curation may address issues concerning the RefSeq sequence alone; these records have a review <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">VALIDATED</span> pending full review.</p><p>The review process may result in updating a RefSeq record, providing new RefSeq records, modifying sequence-to-gene associations, merging <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> records, or discontinuing a RefSeq, GeneID, or both. A RefSeq record is suppressed if it is found to represent a transcribed repeat element, to be derived from the wrong organism (<i>i.e.</i>, the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> sequence it was based on has incorrect organism annotation), or not to represent a "gene". Records determined to represent an incomplete sequence, such as a partial protein sequence or an incompletely spliced transcript, are temporarily suppressed until more complete sequence data are available. Suppressed records can still be retrieved and will have a disclaimer appearing on the query result document summary (<a class="figpopup" href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" rid-figpopup="figch18F3" rid-ob="figobch18F3">Figure 3a</a>). A suppressed record is not included in BLAST databases, in the calculation of related sequences, in the BLink display (BLink are pre-computed protein BLAST results), or in <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/release/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">RefSeq FTP</a> releases. If a RefSeq is found to be redundant with another public RefSeq, then one is retained and the other becomes secondary (<a class="figpopup" href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" rid-figpopup="figch18F3" rid-ob="figobch18F3">Figure 3b</a>). If the sequences were associated with two different Gene records, then the records are merged so that a query of <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> with either of the original GeneIDs will retrieve the remaining single record.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch18F3" co-legend-rid="figlgndch18F3"><a href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" title="Figure 3. " class="img_link icnblk_img figpopup" rid-figpopup="figch18F3" rid-ob="figobch18F3"><img class="small-thumb" src="/books/NBK21091/bin/ch18-Image006.gif" src-large="/books/NBK21091/bin/ch18-Image006.jpg" alt="Figure 3. . Suppressed or redundant RefSeq records." /></a><div class="icnblk_cntnt" id="figlgndch18F3"><h4 id="ch18.F3"><a href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" rid-ob="figobch18F3">Figure 3. </a></h4><p class="float-caption no_bottom_margin">Suppressed or redundant RefSeq records. (A) A standard text statement is included on the Entrez document summary for suppressed RefSeq records. (A) If redundant RefSeq records are merged, then both accession numbers appear on the flat file ACCESSION line <a href="/books/NBK21091/figure/ch18.F3/?report=objectonly" target="object" rid-ob="figobch18F3">(more...)</a></p></div></div><p>We welcome input from the research community to improve the quality of the RefSeq collection. Interested parties are invited to contact us by sending an email to the NCBI Help Desk (<a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@ofni" class="oemail">vog.hin.mln.ibcn@ofni</a>) or by using our <a href="/RefSeq/update.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">feedback form</a>.</p></div></div></div><div id="ch18.Access_and_Retrieval"><h2 id="_ch18_Access_and_Retrieval_">Access and Retrieval</h2><p>RefSeq records can be accessed by direct query, BLAST, FTP download, or indirectly through links provided from several NCBI resources, including <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a>, <a href="/entrez/query.fcgi?db=genome" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genome</a>, <a href="/bioproject" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BioProject</a>, and <a href="/mapview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Map Viewer</a> (<a class="figpopup" href="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=objectonly" target="object" rid-figpopup="figch18Tncbiresourceswithlinkstorefs" rid-ob="figobch18Tncbiresourceswithlinkstorefs">Table 6</a>). In addition, RefSeq records are included in some computed resources and so links may be found from those pages to individual RefSeq records. Some links from Entrez databases to RefSeq records are based on <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> associations (e.g., links from <a href="/omim" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">OMIM</a>; <a href="/books/n/handbook/ch7/?report=reader">Chapter 7</a>), whereas others are based on sequence similarity or RefSeq annotation content, including links from <a href="/pubmed" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">PubMed</a>. RefSeq records are easy to distinguish in these resources by their unique accession number format (<a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object" rid-figpopup="figch18Trefseqaccessionnumbersandmole" rid-ob="figobch18Trefseqaccessionnumbersandmole">Table 1</a>).</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Tncbiresourceswithlinkstorefs"><a href="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=objectonly" target="object" title="Table 6. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Tncbiresourceswithlinkstorefs" rid-ob="figobch18Tncbiresourceswithlinkstorefs"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=thumb" src-large="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=previmg" alt="Table 6. . NCBI resources with links to RefSeq records." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.ncbi_resources_with_links_to_refs"><a href="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=objectonly" target="object" rid-ob="figobch18Tncbiresourceswithlinkstorefs">Table 6. </a></h4><p class="float-caption no_bottom_margin">NCBI resources with links to RefSeq records. </p></div></div><p>How to access and retrieve RefSeq records is described below.</p><div id="ch18.Entrez_Query_Access"><h3>Entrez Query Access</h3><p>RefSeq records can be retrieved from the Entrez system (<a href="/books/n/handbook/ch15/?report=reader">Chapter 15</a>) by querying with an accession number, symbol or locus_tag, name, or by using Entrez <a href="/gene/limits" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Limits</a> and <a href="/books/NBK3841/#EntrezGene.Properties" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Property</a> terms. All RefSeqs can be found in the <a href="/entrez/query.fcgi?db=Nucleotide" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez Nucleotide</a> or <a href="/entrez/query.fcgi?db=Protein" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein</a> databases; both RefSeq and <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> submissions will be included but a filter is provided at the top right hand corner of the results page to allow display of only the RefSeq accessions, if desired. Filters can be configured using the <a href="/sites/myncbi/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">MyNCBI</a> interface. Alternatively, a query can be restricted to retrieve only RefSeq-specific results using the <a href="/sites/myncbi/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Limits</a> page or by querying with a <a href="/books/NBK3841/#EntrezGene.Properties" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Property</a>, such as &#x0201c;srcdb_refseq[property]&#x0201d;, or others listed in <a class="figpopup" href="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=objectonly" target="object" rid-figpopup="figch18Tentrezqueriestoretrievesetso" rid-ob="figobch18Tentrezqueriestoretrievesetso">Table 7</a>. <a href="/gene/limits" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Limits</a> and <a href="/books/NBK3841/#EntrezGene.Properties" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Properties</a> can also be used to restrict results to molecule type, such as DNA versus mRNA. The <a href="/books/NBK3837/#EntrezHelp.Entrez_Searching_Options" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez Help</a> document provides additional information about querying.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch18Tentrezqueriestoretrievesetso"><a href="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=objectonly" target="object" title="Table 7. " class="img_link icnblk_img figpopup" rid-figpopup="figch18Tentrezqueriestoretrievesetso" rid-ob="figobch18Tentrezqueriestoretrievesetso"><img class="small-thumb" src="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=thumb" src-large="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=previmg" alt="Table 7. . Entrez queries to retrieve sets of RefSeq records." /></a><div class="icnblk_cntnt"><h4 id="ch18.T.entrez_queries_to_retrieve_sets_o"><a href="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=objectonly" target="object" rid-ob="figobch18Tentrezqueriestoretrievesetso">Table 7. </a></h4><p class="float-caption no_bottom_margin">Entrez queries to retrieve sets of RefSeq records. </p></div></div><p><a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> contains the majority of the RefSeq collection and also supports querying using all the above strategies. RefSeq-to-Gene connections are also provided by direct links; RefSeq records include a link to the <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> report page via the GeneID <span class="bk_pgobj">db_xref</span> link on the gene and CDS features (<a class="figpopup" href="/books/NBK21091/figure/ch18.F1C/?report=objectonly" target="object" rid-figpopup="figch18F1C" rid-ob="figobch18F1C">Figure 1C</a>). <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> reports the RefSeq accession numbers in the RefSeq section of the report, with links to the <a href="/sites/entrez?db=nucleotide" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Nucleotide</a> or <a href="/sites/entrez?db=protein" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein</a> records. The Links menu in <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> also provides distinct links to RefSeq RNAs, RefSeq proteins, and <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a>. <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> reports may include a graphical depiction of genome annotation data in the <span class="bk_pgobj">Genomic regions, transcripts, and products</span> section, with links to <a href="/sites/entrez?db=nucleotide" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Nucleotide</a> and <a href="/sites/entrez?db=protein" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein</a> displays. When this graphical section is provided, an additional report is available with details about exon and intron boundaries and length. You can change the display format from <span class="bk_pgobj">Full Report</span> to <span class="bk_pgobj">Gene Table</span> to access this report. Note that RefSeq records representing assembled environmental samples (with an NS_ accession prefix) are not included in <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> but can be found in the <a href="/sites/entrez?db=genome" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genome</a> and <a href="/sites/entrez?db=nucleotide" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Nucleotide</a> databases.</p><p>RefSeq records in the <a href="/sites/entrez?db=genome" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genome</a> or <a href="/bioproject" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BioProject</a> databases can be retrieved using an accession number for a complete genomic molecule (NC_ accession prefix) or organism name. The <a href="/bioproject" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BioProject</a> database can also be queried using the property restriction &#x0201c;srcdb_refseq[property]&#x0201d;.</p><p>RefSeq records belonging to the <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> set can be retrieved from the Entrez system using &#x0201c;RefSeqGene[keyword]&#x0201d;.</p></div><div id="ch18.BLAST"><h3>BLAST</h3><p>RefSeq transcript records are included in the <a href="/sites/entrez?db=nucleotide" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Nucleotide</a> non-redundant (nr) and the RefSeq mRNA sequences databases. RefSeq protein records are included in the <a href="/sites/entrez?db=protein" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein</a> database. Accessions in the results set, either RefSeq or GenBank, that are associated with a <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> record are indicated by a small blue <span class="bk_pgobj">G</span> icon, which is linked to the <a href="/sites/entrez?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> report. RefSeq genomic records (whole chromosome or scaffold RefSeq records and <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> records) are provided in the Reference genomic sequences database or via organism-specific genome BLAST databases, which can be accessed via <a href="/mapview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Map Viewer</a>, <a href="/bioproject" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BioProject</a> reports, or the <a href="/Genomes/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genomic Biology</a> webpage. <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> records are also retrieved from the nr database in BLAST results and in a dedicated RefSeqGene database.</p></div><div id="ch18.Map_Viewer"><h3>Map Viewer</h3><p>The NCBI <a href="/mapview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Map Viewer</a> supports queries by RefSeq and <a href="/refseq/rsg/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeqGene</a> accession numbers if the annotated genome is available in that resource.</p></div><div id="ch18.FTP"><h3>FTP</h3><p>RefSeq data are available in three FTP areas:</p><ul><li class="half_rhythm"><div>Configured RefSeq BLAST databases are available for download from the <a href="ftp://ftp.ncbi.nlm.nih.gov/blast/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">BLAST FTP</a> site; separate databases are provided for genomic, transcript, and protein records.</div></li><li class="half_rhythm"><div>Organism-specific sequence files are provided in the <a href="ftp://ftp.ncbi.nlm.nih.gov/genomes/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">Genomes FTP</a> site. This area includes RefSeq records that are generated by, or used in, <a href="/mapview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Map Viewer</a> and <a href="/sites/entrez?db=genome" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genomes</a> processing. NCBI&#x02019;s annotation of genomic RefSeqs is also available; a file in the latest specification (version 1.20) of Generic Feature Format version 3 (<a href="http://www.sequenceontology.org/gff3.shtml" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GFF3</a>) is provided in a GFF subdirectory for the latest assembly of many organisms.</div></li><li class="half_rhythm"><div>The full RefSeq collection, including the human <a href="ftp://ftp.ncbi.nih.gov/refseq/H_sapiens/RefSeqGene/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">RefSeqGene set</a>, is available from the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">RefSeq FTP</a> site, with the exception of the NS_ accession series environmental sample records. The RefSeq collection is provided as comprehensive bi-monthly releases in addition to daily updates for records that are new or updated between RefSeq release cycles. The comprehensive release provides data in multiple file formats, including flat file and FASTA, organized into primary taxonomic groups in addition to the complete dataset. For organisms with more frequent updates to curated records, including human and mouse, subdirectories containing weekly comprehensive releases of transcript and protein RefSeq records are provided also. Information about the RefSeq release is documented on the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/release/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">RefSeq FTP</a> site in the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/release/release-notes/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">release-notes</a> subdirectory. The availability of new releases is announced on the <a href="/RefSeq/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RefSeq</a> website, on NCBI&#x02019;s <a href="http://www.facebook.com/ncbi.nlm" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Facebook</a> and <a href="http://twitter.com/ncbi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Twitter</a> accounts, to subscribers of the <a href="/mailman/listinfo/refseq-announce" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">refseq-announce</a> email list, and in the <a href="/books/NBK1969/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">NCBI Newsletter</a>.</div></li></ul></div></div><div id="ch18.Related_Resources"><h2 id="_ch18_Related_Resources_">Related Resources</h2><div id="ch18.The_Consensus_Coding_Sequence_CCDS"><h3>The Consensus Coding Sequence (CCDS) Project</h3><p>The <a href="/projects/CCDS/CcdsBrowse.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">CCDS project</a> aims to provide a complete set of high quality annotations of protein-coding genes on the human and mouse genomes. It leverages the computational annotation pipelines of NCBI and <a href="http://www.ensembl.org/index.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Ensembl</a>, and expert curation provided predominantly by the Havana team of the <a href="http://www.sanger.ac.uk/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Wellcome Trust Sanger Institute</a> and NCBI&#x02019;s RefSeq staff, to track identical protein annotations on the reference assemblies of the human and mouse genomes, and to ensure they are consistently and accurately represented in public resources. The CCDS set includes coding regions that are annotated as full-length (with an initiating AUG and valid stop-codon), can be translated from the genome without frameshifts, and use consensus splice-sites. Annotated genes in the CCDS set are associated with a unique identifying number and version. The version number will change with a change to the CDS structure or to the underlying genomic sequence, although any change requires collaborative agreement. See PubMed ID <a href="/pubmed/19498102" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">19498102</a> for more information.</p></div></div><div id="ch18.Related_Reading"><h2 id="_ch18_Related_Reading_">Related Reading</h2><ol><li><div class="bk_ref" id="ch18.REF.blake.2011.d842">Blake JA, Bult CJ, Kadin JA, Richardson JE, Eppig JT., Mouse Genome Database Group. The Mouse Genome Database (MGD): premier model organism resource for mammalian genomics and genetics. <span><span class="ref-journal">Nucl. Acids Res. </span>2011;<span class="ref-vol">39</span>:D842&ndash;8.</span> (PubMed ID ) [<a href="/pmc/articles/PMC3013640/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC3013640</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/21051359" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 21051359</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.coffin.1997">Coffin JM, Hughes SH, and E Varmus. (1997) Retroviruses. Cold Spring Harbor (NY): Cold Spring Harbor Laboratory Press. [<a href="https://pubmed.ncbi.nlm.nih.gov/21433340" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 21433340</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.dwinell.2009.d744">Dwinell MR, Worthey EA, Shimoyama M, Bakir-Gungor B, DePons J, Laulederkind S, Lowry T, Nigram R, Petri V, Smith J, Stoddard A, Twigger SN, Jacob HJ, Team RGD. The Rat Genome Database 2009: variation, ontologies and pathways. <span><span class="ref-journal">Nucl. Acids Res. </span>2009;<span class="ref-vol">37</span>:D744&ndash;9.</span> (PubMed ) [<a href="/pmc/articles/PMC2686558/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC2686558</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/18996890" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 18996890</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.eddy.2002.18">Eddy SR. A memory-efficient dynamic programming algorithm for optimal alignment of a sequence to an RNA secondary structure. <span><span class="ref-journal">BMC Bioinformatics. </span>2002;<span class="ref-vol">3</span>:18.</span> (PubMed ID ) [<a href="/pmc/articles/PMC119854/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC119854</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/12095421" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 12095421</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.griffithsjones.2003.439">Griffiths-Jones S, Bateman A, Marshall M, Khanna A, Eddy SR. Rfam: an RNA family database. <span><span class="ref-journal">Nucl. Acids Res. </span>2003;<span class="ref-vol">31</span>:439&ndash;441.</span> (PubMed ID ) [<a href="/pmc/articles/PMC165453/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC165453</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/12520045" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 12520045</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.amberger.2011">Amberger, J., Bocchini, C. and Hamosh, A. (2011), A new face and new challenges for online mendelian inheritance in man (OMIM&#x000ae;). Human Mutation, 32:n/a. doi: 10.1002/humu.21466. (PubMed ID ).21472891. [<a href="https://pubmed.ncbi.nlm.nih.gov/21472891" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 21472891</span></a>] [<a href="http://dx.crossref.org/10.1002/humu.21466" ref="pagearea=cite-ref&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">CrossRef</a>]</div></li><li><div class="bk_ref" id="ch18.REF.lowe.1997.955">Lowe TM, Eddy SR. tRNAscan-SE: a program for improved detection of transfer RNA genes in genomic sequence. <span><span class="ref-journal">Nucl. Acids Res. </span>1997;<span class="ref-vol">25</span>:955&ndash;964.</span> (PubMed ID ) [<a href="/pmc/articles/PMC146525/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC146525</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/9023104" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 9023104</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.maglott.2011.d52">Maglott D, Ostell J, Pruitt KD, Tatusova T. Entrez Gene: gene-centered information at NCBI. <span><span class="ref-journal">Nucl. Acids Res. </span>2011;<span class="ref-vol">39</span>:D52&ndash;7.</span> (PubMed ID ) [<a href="/pmc/articles/PMC3013746/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC3013746</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/21115458" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 21115458</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.marchlerbauer.2011.d225">Marchler-Bauer A, Lu S, Anderson JB, Chitsaz F, Derbyshire MK, Deweese-Scott C, Fong JH, Geer LY, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Jackson JD, Ke Z, Lanczycki CJ, Lu F, Marchler GH, Mullokandov M, Omelchenko MV, Robertson CL, Song JS, Thanki N, Yamashita RA, Zhang D, Zhang N, Zheng C, Bryant SH. CDD: a Conserved Domain Database for the functional annotation of proteins. <span><span class="ref-journal">Nucl. Acids Res. </span>2011;<span class="ref-vol">39</span>:D225&ndash;9.</span> (PubMed ID ) [<a href="/pmc/articles/PMC3013737/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC3013737</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/21109532" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 21109532</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.pruitt.2008.1316">Pruitt KD, Harrow J, Harte RA, Wallin C, Diekhans M, Maglott DR, Searle S, Farrell CM, et al. The consensus coding sequence (CCDS) project: Identifying a common protein-coding gene set for the human and mouse genomes. <span><span class="ref-journal">Genome Res. </span>2008;<span class="ref-vol">19</span>(7):1316&ndash;1323.</span> (PubMed ID ) [<a href="/pmc/articles/PMC2704439/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC2704439</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/19498102" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 19498102</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.pruitt.2009.d32">Pruitt KD, Tatusova T, Klimke W, Maglott DR. NCBI Reference Sequences: current status, policy and new initiatives. <span><span class="ref-journal">Nucl. Acids Res. </span>2009;<span class="ref-vol">37</span>:D32&ndash;36.</span> (PubMed ID ) [<a href="/pmc/articles/PMC2686572/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC2686572</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/18927115" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 18927115</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.tatusova.1999.536">Tatusova TA, Karsch-Mizrachi I, Ostell JA. Complete genomes in WWW Entrez: data representation and analysis. <span><span class="ref-journal">Bioinformatics. </span>1999;<span class="ref-vol">15</span>:536&ndash;43.</span> (PubMed ID ) [<a href="https://pubmed.ncbi.nlm.nih.gov/10487861" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 10487861</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF13"><a href="/pubmed/18996890" ref="pagearea=cite-ref&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">http://www<wbr style="display:inline-block"></wbr>&#8203;.ncbi.nlm.nih<wbr style="display:inline-block"></wbr>&#8203;.gov/pubmed/18996890</a>Sprague J, Bayraktaroglu L, Clements D, Conlin T, Fashena D, Frazer K, Haendel M, Howe D, Mani P, Ramachandran S, Schaper K, Segerdell E, Song P, Sprunger B, Taylor S, Van Slyke C, and M Westerfield. (2006) The Zebrafish Information Network: the zebrafish model organism database. Nucl. Acids Res. 34:D581-D585 (PubMed ID ).16381936. [<a href="/pmc/articles/PMC1347449/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC1347449</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/16381936" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 16381936</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF.seal.2011.d519">Seal RL, Gordon SM, Lush MJ, Wright MW, Bruford EA. genenames.org: the HGNC resources in 2011. <span><span class="ref-journal">Nucleic Acids Res. </span>2011;<span class="ref-vol">39</span>:D519&ndash;9.</span> (PubMed ID ) [<a href="/pmc/articles/PMC3013772/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC3013772</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/20929869" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 20929869</span></a>]</div></li><li><div class="bk_ref" id="ch18.REF15">Tweedie S, Ashburner M, Falls K, Leyland P, McQuilton P, Marygold S, Millburn G, Osumi-Sutherland D, Schroeder A, Seal R, Zhang Z, and The FlyBase Consortium. (2009) FlyBase: enhancing Drosophila Gene Ontology annotations. Nucl. Acids Res. 37: D555-D559 (PubMed ID ).18948289. [<a href="/pmc/articles/PMC2686450/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC2686450</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/18948289" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 18948289</span></a>]</div></li></ol></div><div id="bk_toc_contnr"></div></div></div><div class="fm-sec"><h2 id="_NBK21091_pubdet_">Publication Details</h2><h3>Author Information and Affiliations</h3><p class="contrib-group"><h4>Authors</h4><span itemprop="author">Kim Pruitt</span>, <span itemprop="author">Garth Brown</span>, <span itemprop="author">Tatiana Tatusova</span>, and <span itemprop="author">Donna Maglott</span>.</p><h3>Publication History</h3><p class="small">Created: <span itemprop="datePublished">October 9, 2002</span>; Last Update: <span itemprop="dateModified">April 6, 2012</span>.</p><h3>Copyright</h3><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div></div><h3>Publisher</h3><p><a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&amp;targetsite=external&amp;targetcat=link&amp;targettype=publisher">National Center for Biotechnology Information (US)</a>, Bethesda (MD)</p><h3>NLM Citation</h3><p>Pruitt K, Brown G, Tatusova T, et al. The Reference Sequence (RefSeq) Database. 2002 Oct 9 [Updated 2012 Apr 6]. In: McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. Chapter 18.<span class="bk_cite_avail"></span></p></div><div class="small-screen-prev"><a href="/books/n/handbook/ch17/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="small-screen-next"><a href="/books/n/handbook/ch19/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div></article><article data-type="fig" id="figobch18F1A"><div id="ch18.F1A" class="figure bk_fig"><div class="graphic"><img data-src="/books/NBK21091/bin/ch18-Image001.jpg" alt="Figure 1A. . Features of a RefSeq record." /></div><h3><span class="label">Figure 1A. </span></h3><div class="caption"><p>Features of a RefSeq record. The beginning of a RefSeq record when displayed in the GenBank flat file format is shown.</p></div></div></article><article data-type="fig" id="figobch18F1B"><div id="ch18.F1B" class="figure bk_fig"><div class="graphic"><img data-src="/books/NBK21091/bin/ch18-Image002.jpg" alt="Figure 1B. . The COMMENT and PRIMARY sections." /></div><h3><span class="label">Figure 1B. </span></h3><div class="caption"><p>The COMMENT and PRIMARY sections. The gene Summary is provided for RefSeqs with a <span class="bk_pgobj">REVIEWED</span> status only. The PRIMARY block, providing the RefSeq assembly details, is displayed for vertebrate records predominantly.</p></div></div></article><article data-type="fig" id="figobch18F1C"><div id="ch18.F1C" class="figure bk_fig"><div class="graphic"><img data-src="/books/NBK21091/bin/ch18-Image003.jpg" alt="Figure 1C. . The FEATURES section." /></div><h3><span class="label">Figure 1C. </span></h3><div class="caption"><p>The FEATURES section. Only a subset of the available feature annotation is shown.</p></div></div></article><article data-type="fig" id="figobch18F1D"><div id="ch18.F1D" class="figure bk_fig"><div class="graphic"><img data-src="/books/NBK21091/bin/ch18-Image004.jpg" alt="Figure 1D. . NCBI&#x02019;s Sequence Viewer." /></div><h3><span class="label">Figure 1D. </span></h3><div class="caption"><p>NCBI&#x02019;s Sequence Viewer. The annotated features on a RefSeq record can be displayed in a graphical format (note the link &#x02018;Graphics&#x02019; in <a class="figpopup" href="/books/NBK21091/figure/ch18.F1A/?report=objectonly" target="object" rid-figpopup="figch18F1A" rid-ob="figobch18F1A">Figure 1A</a>). The display can be modified by following the &#x02018;Configure&#x02019; link. The Help document provides additional information about the display and includes the <a href="/projects/sviewer/help/legends.pdf" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Graphical View Legend</a>, which provides details on how features are rendered.</p></div></div></article><article data-type="fig" id="figobch18F2"><div id="ch18.F2" class="figure bk_fig"><div class="graphic"><img data-src="/books/NBK21091/bin/ch18-Image005.jpg" alt="Figure 2. . RefSeq Processing Pipelines." /></div><h3><span class="label">Figure 2. </span></h3><div class="caption"><p>RefSeq Processing Pipelines. Sequence data deposited in the public archival databases is available for RefSeq processing. Processing pipelines include the <a href="#ch18.Curation_of_Vertebrate_and_Inverteb">vertebrate curation pipeline</a>, the <a href="#ch18.Computational_Genome_Annotation_Pip">computational genome annotation pipeline</a>, and <a href="#ch18.Extraction_from_GenBank_records">extraction from GenBank</a>. These pipelines generate new and updated RefSeq records that become publicly available in <a href="/entrez/query.fcgi?db=Nucleotide" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez Nucleotide</a>, <a href="/entrez/query.fcgi?db=Protein" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein</a>, and <a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> databases. (A) Once a gene is defined and associated with sufficient sequence information in an internal curation database, it can be pushed into the RefSeq pipeline. The RefSeq process is initiated by selecting the longest mRNA annotated with a complete coding sequence for each locus. This RefSeq record has a <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of <span class="bk_pgobj">PROVISIONAL</span>, <span class="bk_pgobj">PREDICTED</span>, or <span class="bk_pgobj">INFERRED</span>. Subsequent curation may result in a sequence or annotation update and a RefSeq <a class="figpopup" href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object" rid-figpopup="figch18Trefseqstatuscodes" rid-ob="figobch18Trefseqstatuscodes">status</a> of VALIDATED or REVIEWED. Records are updated if the underlying<a href="http://www.insdc.org" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri"> INSDC</a> submission is updated or if other associated data are updated, including nomenclature, publications, or map location. (B) Available RefSeq and <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a> data are aligned to an assembled genome, <i>ab initio</i> gene prediction that uses the alignment data is performed, and an analysis program integrates all available data to define the annotation models. New <span class="bk_pgobj">MODEL</span> RefSeq records are generated by this pipeline. (C) When a complete, annotated genome becomes available in the <a href="http://www.insdc.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">INSDC</a>, a set of corresponding RefSeq records are generated by duplicating the GenBank records, followed by validation and addition of cross-references to Gene (via a db_xref citing the GeneID) and more informative and standardized protein names, when available.</p></div></div></article><article data-type="fig" id="figobch18F3"><div id="ch18.F3" class="figure bk_fig"><div class="graphic"><img data-src="/books/NBK21091/bin/ch18-Image006.jpg" alt="Figure 3. . Suppressed or redundant RefSeq records." /></div><h3><span class="label">Figure 3. </span></h3><div class="caption"><p>Suppressed or redundant RefSeq records. (A) A standard text statement is included on the Entrez document summary for suppressed RefSeq records. (A) If redundant RefSeq records are merged, then both accession numbers appear on the flat file <span class="bk_pgobj">ACCESSION</span> line (yellow arrow). The first <span class="bk_pgobj">ACCESSION</span> number listed is the primary identifier and all others listed are "secondary" accession numbers.</p></div></div></article><article data-type="table-wrap" id="figobch18Trefseqaccessionnumbersandmole"><div id="ch18.T.refseq_accession_numbers_and_mole" class="table"><h3><span class="label">Table 1. </span></h3><div class="caption"><p>RefSeq accession numbers and molecule types.</p></div><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21091/table/ch18.T.refseq_accession_numbers_and_mole/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch18.T.refseq_accession_numbers_and_mole_lrgtbl__"><table class="no_bottom_margin"><thead><tr><th id="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Accession prefix</th><th id="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Molecule type</th><th id="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Comment</th></tr></thead><tbody><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">AC_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Genomic</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Complete genomic molecule, usually alternate assembly</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NC_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Genomic</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Complete genomic molecule, usually reference assembly</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NG_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Genomic</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Incomplete genomic region</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NT_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Genomic</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Contig or scaffold, clone-based or WGS<sup>a</sup></td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NW_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Genomic</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Contig or scaffold, primarily WGS<sup>a</sup></td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NZ_<sup>b</sup></td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Genomic</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Complete genomes and unfinished WGS data</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NM_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">mRNA</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Protein-coding transcripts (usually curated)</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NR_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">RNA</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Non-protein-coding transcripts</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">XM_<sup>c</sup></td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">mRNA</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Predicted model protein-coding transcript</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">XR_<sup>c</sup></td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">RNA</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Predicted model non-protein-coding transcript</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">AP_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Protein</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Annotated on AC_ alternate assembly</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NP_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Protein</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Associated with an NM_ or NC_ accession</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">YP_<sup>c</sup></td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Protein</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Annotated on genomic molecules without an instantiated<br />transcript record</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">XP_<sup>c</sup></td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Protein</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Predicted model, associated with an XM_ accession</td></tr><tr><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">WP_</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Protein</td><td headers="hd_h_ch18.T.refseq_accession_numbers_and_mole_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Non-redundant across multiple strains and species</td></tr></tbody></table></div><div class="tblwrap-foot"><div><dl class="temp-labeled-list small"><dl class="bkr_refwrap"><dt><sup>a</sup>
</dt><dd><div id="ch18.TF.1.1"><p class="no_margin">Whole Genome Shotgun sequence data.</p></div></dd></dl><dl class="bkr_refwrap"><dt><sup>b</sup>
</dt><dd><div id="ch18.TF.1.2"><p class="no_margin">An ordered collection of WGS sequence for a genome.</p></div></dd></dl><dl class="bkr_refwrap"><dt><sup>c</sup>
</dt><dd><div id="ch18.TF.1.3"><p class="no_margin">Computed.</p></div></dd></dl></dl></div></div></div></article><article data-type="table-wrap" id="figobch18Trefseqstatuscodes"><div id="ch18.T.refseq_status_codes" class="table"><h3><span class="label">Table 2. </span></h3><div class="caption"><p>RefSeq status codes.</p></div><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21091/table/ch18.T.refseq_status_codes/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch18.T.refseq_status_codes_lrgtbl__"><table><thead><tr><th id="hd_h_ch18.T.refseq_status_codes_1_1_1_1" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Code</th><th id="hd_h_ch18.T.refseq_status_codes_1_1_1_2" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Description</th></tr></thead><tbody><tr><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">MODEL</td><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The RefSeq record is provided by the NCBI Genome Annotation pipeline and is not subject to individual review or revision between annotation runs.</td></tr><tr><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">INFERRED</td><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The RefSeq record has been predicted by genome sequence analysis, but it is not yet supported by experimental evidence. The record may be partially supported by homology data.</td></tr><tr><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">PREDICTED</td><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The RefSeq record has not yet been subject to individual review, and some aspect of the RefSeq record is predicted.</td></tr><tr><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">PROVISIONAL</td><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The RefSeq record has not yet been subject to individual review. The initial sequence-to-gene association has been established by outside collaborators or NCBI staff.</td></tr><tr><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">REVIEWED</td><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The RefSeq record has been reviewed by NCBI staff or by a collaborator. The NCBI review process includes assessing available sequence data and the literature. Some RefSeq records may incorporate expanded sequence and annotation information.</td></tr><tr><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">VALIDATED</td><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The RefSeq record has undergone an initial review to provide the preferred sequence standard. The record has not yet been subject to final review at which time additional functional information may be provided.</td></tr><tr><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">WGS</td><td headers="hd_h_ch18.T.refseq_status_codes_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The RefSeq record is provided to represent a collection of whole genome shotgun sequences. These records are not subject to individual review or revisions between genome updates.</td></tr></tbody></table></div></div></article><article data-type="table-wrap" id="figobch18Texamplesofcollaboratorswhocon"><div id="ch18.T.examples_of_collaborators_who_con" class="table"><h3><span class="label">Table 3. </span></h3><div class="caption"><p>Examples of collaborators who contribute RefSeq records.</p></div><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborators_who_con/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch18.T.examples_of_collaborators_who_con_lrgtbl__"><table><thead><tr><th id="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_1" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Organism</th><th id="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_2" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Collaborator</th></tr></thead><tbody><tr><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<i>Saccharomyces cerevisiae</i>
</td><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Saccharomyces Genome Database (<a href="http://www.yeastgenome.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">SGD</a>)</td></tr><tr><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<i>Arabidopsis thaliana</i>
</td><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The Arabidopsis Information Resource (<a href="http://www.arabidopsis.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">TAIR</a>)</td></tr><tr><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<i>Pseudomonas aeruginosa</i>
</td><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"><i>Pseudomonas aeruginosa</i> Community Annotation Project (<a href="http://www.pseudomonas.com/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">PseudoCAP</a>)</td></tr><tr><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<i>Drosophila melanogaster</i>
</td><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="http://flybase.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">FlyBase</a>
</td></tr><tr><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">multiple invertebrates</td><td headers="hd_h_ch18.T.examples_of_collaborators_who_con_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="http://www.vectorbase.org" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">VectorBase</a>
</td></tr></tbody></table></div></div></article><article data-type="table-wrap" id="figobch18Texamplesofcollaboratinggroups"><div id="ch18.T.examples_of_collaborating_groups" class="table"><h3><span class="label">Table 4. </span></h3><div class="caption"><p>Examples of collaborating groups</p></div><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21091/table/ch18.T.examples_of_collaborating_groups/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch18.T.examples_of_collaborating_groups_lrgtbl__"><table><tbody><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="http://flybase.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">FlyBase</a>
</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">HUGO Gene Nomenclature Committee (<a href="http://www.genenames.org/index.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">HGNC</a>)</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/genomes/MICROBES/collaborators.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Microbial genomes</a>
</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Mouse Genome Informatics (<a href="http://www.informatics.jax.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">MGI</a>)</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Online Mendelian Inheritance in Man (<a href="/omim" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">OMIM</a>)</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Rat Genome Database (<a href="http://rgd.mcw.edu/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">RGD</a>)</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="http://www.vectorbase.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">VectorBase</a>
</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/genomes/GenomesHome.cgi?taxid=10239&#x00026;hopt=advisors" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Viral Genome Advisors</a>
</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="http://www.xenbase.org/common/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">XenBase</a>
</td></tr><tr><td rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Zebrafish Information Network (<a href="http://zfin.org/cgi-bin/webdriver?MIval=aa-ZDB_home.apg" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">ZFIN</a>)</td></tr></tbody></table></div></div></article><article data-type="table-wrap" id="figobch18Tselectedentrezgenomeresources"><div id="ch18.T.selected_entrez_genome_resources" class="table"><h3><span class="label">Table 5. </span></h3><div class="caption"><p>Selected Entrez Genome resources.</p></div><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21091/table/ch18.T.selected_entrez_genome_resources/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch18.T.selected_entrez_genome_resources_lrgtbl__"><table><thead><tr><th id="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_1" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Web Page</th><th id="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_2" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Web Site</th></tr></thead><tbody><tr><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Genome homepage</td><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?db=Genome" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">http://www<wbr style="display:inline-block"></wbr>&#8203;.ncbi.nlm.nih<wbr style="display:inline-block"></wbr>&#8203;.gov/entrez/query.fcgi?db=Genome</a>
</td></tr><tr><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Eukaryotes</td><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/genomes/leuks.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">http://www<wbr style="display:inline-block"></wbr>&#8203;.ncbi.nlm.nih<wbr style="display:inline-block"></wbr>&#8203;.gov/genomes/leuks.cgi</a>
</td></tr><tr><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Prokaryotes</td><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<pre>
<br />
<a href="/genomes/lproks.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">http://www<wbr style="display:inline-block"></wbr>&#8203;.ncbi.nlm.nih<wbr style="display:inline-block"></wbr>&#8203;.gov/genomes/lproks.cgi</a>
<br />
</pre>
</td></tr><tr><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Viral Genomes</td><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/genomes/GenomesHome.cgi?taxid=10239" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">http://www.ncbi.nlm.nih.gov/genomes/GenomesHome.cgi?taxid=10239</a>
</td></tr><tr><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Organelles</td><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/genomes/ORGANELLES/organelles.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">http://www<wbr style="display:inline-block"></wbr>&#8203;.ncbi.nlm.nih<wbr style="display:inline-block"></wbr>&#8203;.gov/genomes/ORGANELLES/organelles<wbr style="display:inline-block"></wbr>&#8203;.html</a>
</td></tr><tr><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Plant Genomes</td><td headers="hd_h_ch18.T.selected_entrez_genome_resources_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/genomes/PLANTS/PlantList.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">http://www<wbr style="display:inline-block"></wbr>&#8203;.ncbi.nlm.nih<wbr style="display:inline-block"></wbr>&#8203;.gov/genomes/PLANTS/PlantList.html</a>
</td></tr></tbody></table></div></div></article><article data-type="table-wrap" id="figobch18Tncbiresourceswithlinkstorefs"><div id="ch18.T.ncbi_resources_with_links_to_refs" class="table"><h3><span class="label">Table 6. </span></h3><div class="caption"><p>NCBI resources with links to RefSeq records.</p></div><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21091/table/ch18.T.ncbi_resources_with_links_to_refs/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch18.T.ncbi_resources_with_links_to_refs_lrgtbl__"><table><thead><tr><th id="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/biosystems/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">
<b>BioSystems</b>
</a>
</th><th id="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/geo/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">
<b>Gene Expression Omnibus</b>
</a>
<b> (</b>
<a href="/books/n/handbook/ch6/?report=reader">
<b>Chapter 6</b>
</a>
<b>)</b>
</th></tr></thead><tbody><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"><a href="/blast/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLAST</a> results (<a href="/books/n/handbook/ch16/?report=reader">Chapter 16</a>)</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?db=Genome" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Genome</a>
</td></tr><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"><a href="/sutils/static/blinkhelp.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLink</a> (pre-computed BLASTp)</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/bioproject" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BioProject</a>
</td></tr><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"><a href="/entrez/query.fcgi?db=Books" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Bookshelf</a> (<a href="/books/n/handbook/ch8/?report=reader">Chapter 8</a>)</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/homologene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">HomoloGene</a>
</td></tr><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/projects/CCDS/CcdsBrowse.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Consensus CDS project</a>
</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"><a href="/mapview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Map Viewer</a> (<a href="/books/n/handbook/ch20/?report=reader">Chapter 20</a>)</td></tr><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/books/n/handbook/ch5/?report=reader">dbSNP (Chapter 5)</a>
</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?db=probe" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Probe</a>
</td></tr><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/dbvar/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">dbVar</a>
</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/proteinclusters" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Clusters</a>
</td></tr><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"><a href="/gquery/gquery.fcgi?itool=toolbar" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez</a> (<a href="/books/n/handbook/ch15/?report=reader">Chapter 15</a>)</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">PubMed Central (<a href="/books/n/handbook/ch9/?report=reader">Chapter 9</a>)</td></tr><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/epigenomics" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Epigenomics</a>
</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"><a href="/entrez/query.fcgi?db=unigene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">UniGene</a> (<a href="/books/n/handbook/ch21/?report=reader">Chapter 21</a>)</td></tr><tr><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"><a href="/entrez/query.fcgi?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Gene</a> (<a href="/books/n/handbook/ch19/?report=reader">Chapter 19</a>)</td><td headers="hd_h_ch18.T.ncbi_resources_with_links_to_refs_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?db=unists" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">UniSTS</a>
</td></tr></tbody></table></div></div></article><article data-type="table-wrap" id="figobch18Tentrezqueriestoretrievesetso"><div id="ch18.T.entrez_queries_to_retrieve_sets_o" class="table"><h3><span class="label">Table 7. </span></h3><div class="caption"><p>Entrez queries to retrieve sets of RefSeq records.</p></div><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21091/table/ch18.T.entrez_queries_to_retrieve_sets_o/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch18.T.entrez_queries_to_retrieve_sets_o_lrgtbl__"><table><thead><tr><th id="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Query</th><th id="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Accession prefix</th><th id="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" scope="col" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">RefSeq status retrieved</th></tr></thead><tbody><tr><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?cmd=PureSearch&#x00026;db=nucleotide&#x00026;details_term=srcdb_refseq%5Bprop%5D" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">srcdb<wbr style="display:inline-block"></wbr>&#8203;_refseq[prop]</a>
</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">All RefSeq accessions</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">All</td></tr><tr><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?cmd=PureSearch&#x00026;db=nucleotide&#x00026;details_term=srcdb_refseq_known%5Bprop%5D" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">srcdb<wbr style="display:inline-block"></wbr>&#8203;_refseq_known[prop]</a>
</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NC_, AC_, NG_, NM_, NR_, NP_, AP_</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">REVIEWED, PROVISIONAL, PREDICTED, INFERRED, and VALIDATED</td></tr><tr><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?cmd=PureSearch&#x00026;db=nucleotide&#x00026;details_term=srcdb_refseq_reviewed%5Bprop%5D" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">srcdb<wbr style="display:inline-block"></wbr>&#8203;_refseq_reviewed[prop]</a>
</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NC_, AC_,NG_, NM_, NR_, NP_, AP_</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">REVIEWED</td></tr><tr><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?cmd=PureSearch&#x00026;db=nucleotide&#x00026;details_term=srcdb_refseq_validated%5Bprop%5D" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">srcdb<wbr style="display:inline-block"></wbr>&#8203;_refseq_validated[prop]</a>
</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NC_, NM_,NR_,NP_</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">VALIDATED</td></tr><tr><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?cmd=PureSearch&#x00026;db=nucleotide&#x00026;details_term=srcdb_refseq_provisional%5Bprop%5D" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">srcdb<wbr style="display:inline-block"></wbr>&#8203;_refseq_provisional[prop]</a>
</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NC_, AC_, NG_, NM_, NR_, NP_, AP_</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">PROVISIONAL</td></tr><tr><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?cmd=PureSearch&#x00026;db=nucleotide&#x00026;details_term=srcdb_refseq_predicted%5Bprop%5D" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">srcdb<wbr style="display:inline-block"></wbr>&#8203;_refseq_predicted[prop]</a>
</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NM_, NR_, NP_</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">PREDICTED</td></tr><tr><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?cmd=PureSearch&#x00026;db=nucleotide&#x00026;details_term=srcdb_refseq_inferred%5Bprop%5D" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">srcdb<wbr style="display:inline-block"></wbr>&#8203;_refseq_inferred[prop]</a>
</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">AC_, AP_, NM_,NR_,NP_</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">INFERRED</td></tr><tr><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_1" scope="row" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">
<a href="/entrez/query.fcgi?cmd=PureSearch&#x00026;db=nucleotide&#x00026;details_term=srcdb_refseq_model%5Bprop%5D" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">srcdb<wbr style="display:inline-block"></wbr>&#8203;_refseq_model[prop]</a>
<i>
<sup>a</sup>
</i>
</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_2" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">NT_, NW_, XM_, XR_, XP_, ZP_</td><td headers="hd_h_ch18.T.entrez_queries_to_retrieve_sets_o_1_1_1_3" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Genome annotation models</td></tr></tbody></table></div></div></article></div><div id="jr-scripts"><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/libs.min.js"> </script><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.min.js"> </script></div></div>
<!-- Book content -->
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js"> </script>
<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal104 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3968615.js" snapshot="books"></script></body>
</html>