256 lines
15 KiB
HTML
256 lines
15 KiB
HTML
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
|
|
<head>
|
|
<meta http-equiv="X-UA-Compatible" content="IE=8;" />
|
|
<meta name="twitter:card" content="summary_large_image">
|
|
<meta name="twitter:site" content="@NLM_NIH">
|
|
<meta name="twitter:title" content="RefSeq Release 70 Now Available with Re-annotated Bacterial Genomes for Uniformity Across Genomes and Species. NLM Technical Bulletin. 2015 May–Jun">
|
|
<meta name="twitter:description" content=" The NLM Technical Bulletin is your source to stay informed about NLM products and services.">
|
|
<meta name="twitter:image" content="https://www.nlm.nih.gov/pubs/techbull/images/nlm_tech_bulletin_graphic_twitter.jpg">
|
|
<meta property="og:url" content="https://www.nlm.nih.gov/pubs/techbull/tb.html" />
|
|
<meta property="og:type" content="article" />
|
|
<meta property="og:title" content="RefSeq Release 70 Now Available with Re-annotated Bacterial Genomes for Uniformity Across Genomes and Species. NLM Technical Bulletin. 2015 May–Jun" />
|
|
<meta property="og:description" content="The NLM Technical Bulletin is your source to stay informed about NLM products and services." />
|
|
<meta property="og:image" content="https://www.nlm.nih.gov/pubs/techbull/images/nlm_tech_bulletin_graphic_facebook.jpg" />
|
|
<link type="text/css" href="/pubs/techbull/styles/reset.css" rel="stylesheet" />
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<link type="text/css" href="/pubs/techbull/styles/technicalBulletin.css" rel="stylesheet" />
|
|
<!--Call jQuery-->
|
|
<script src="//assets.nlm.nih.gov/jquery/jquery-latest.min.js"></script>
|
|
<script src="//assets.nlm.nih.gov/jquery/jquery-migrate-latest.min.js"></script>
|
|
<script src="/pubs/techbull/scripts/techbull.js" type="text/javascript" language="javascript"></script>
|
|
<!--[if lte IE 8]>
|
|
<script type="text/javascript" src="/scripts/PIE.js"></script>
|
|
<![endif]-->
|
|
<script src="/core/nlm-notifyExternal/1.0/nlm-notifyExternal.min.js"></script>
|
|
<link type="text/css" href="/pubs/techbull/styles/print.css" rel="stylesheet" media="print"/>
|
|
<title>RefSeq Release 70 Now Available with Re-annotated Bacterial Genomes for Uniformity Across Genomes and Species. NLM Technical Bulletin. 2015 May–Jun</title>
|
|
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" title="The Dublin Core metadata Element Set" />
|
|
<meta name="DC.Title" content="RefSeq Release 70 Now Available with Re-annotated Bacterial Genomes for Uniformity Across Genomes and Species. NLM Technical Bulletin. 2015 May–Jun" />
|
|
|
|
<meta name="DC.Publisher" content="U.S. National Library of Medicine" />
|
|
<meta name="DC.Date.Issued" content="2015-05-11" />
|
|
<meta name="DC.Date.Modified" content="2015-05-11" />
|
|
<meta name="NLMDC.Date.LastReviewed" content="2015-05-11" />
|
|
<meta name="NLM.Contact.Email" content="nlmtechbull@mail.nlm.nih.gov" />
|
|
<meta name="DC.Type" content="Newsletters" />
|
|
<meta name="NLM.Permanence.Level" content="Permanent: Stable Content" />
|
|
<meta name="NLM.Permanence.Guarantor" content="U.S. National Library of Medicine" />
|
|
<meta name="DC.Rights" content="Public Domain" />
|
|
<meta name="DC.Language" content="eng" />
|
|
|
|
|
|
<meta name="DC.Subject.Keyword" content="Reference Sequence" />
|
|
<meta name="DC.Subject.Keyword" content="National Center for Biotechnology Information" />
|
|
<meta name="DC.Subject.Keyword" content="Release" />
|
|
<meta name="DC.Subject.Keyword" content="Datasets" />
|
|
<!-- Google Tag Manager -->
|
|
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-MT6MLL');</script>
|
|
<!-- End Google Tag Manager -->
|
|
</head>
|
|
<body>
|
|
<!-- Google Tag Manager -->
|
|
<noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-MT6MLL" height="0" width="0" style="display:none;visibility:hidden" title="googletagmanager"></iframe></noscript>
|
|
<!-- End Google Tag Manager -->
|
|
<div class="skipnavigation"><a title="Skip the navigation on this page" href="#skipnav" class="skipnavigation">Skip Navigation Bar</a></div>
|
|
|
|
<div>
|
|
|
|
<div class="header">
|
|
<img src="/pubs/techbull/images/tb_logo_113.jpg" alt="National Library of Medicine Technical Bulletin" title="National Library of Medicine Technical Bulletin" /><img src="/pubs/techbull/images/nlm_masthead_113.jpg" alt="National Library of Medicine Technical Bulletin" title="National Library of Medicine Technical Bulletin" usemap="#nlm_masthead_113" />
|
|
</div>
|
|
|
|
<div class="search_box">
|
|
<form method="get" action="//vsearch.nlm.nih.gov/vivisimo/cgi-bin/query-meta" target="_self" name="searchForm" class="searchForm">
|
|
<label class="displaynone" for="search">Search</label>
|
|
<input name="query" id="search" type="text" class="search-input inactive" size="50" onfocus="this.value=''" value="Search here for NLM Technical Bulletin articles" aria-label="Search NLM Technical Bulletin">
|
|
<input type="hidden" name="v:project" value="technical-bulletin">
|
|
</form>
|
|
</div>
|
|
|
|
</div>
|
|
|
|
<div id="nav">
|
|
<!--Open drop-->
|
|
<ul class="topnav">
|
|
<li class="currentissue"><a href="//www.nlm.nih.gov/pubs/techbull/current_issue.html">Current Issue</a> <img class="separator" src="/pubs/techbull/images/whitelinetransparentbackground.gif " alt=""/></li>
|
|
<li class="archive"><a href="//www.nlm.nih.gov/pubs/techbull/back_issues.html">Previous Issues</a> <img class="separator" src="/pubs/techbull/images/whitelinetransparentbackground.gif " alt=""/></li>
|
|
<li class="about"><a href="//www.nlm.nih.gov/pubs/techbull/about.html">About</a> <img class="separator" src="/pubs/techbull/images/whitelinetransparentbackground.gif " alt=""/></li>
|
|
<li class="staycurrent"><a href="//www.nlm.nih.gov/pubs/techbull/stay_current.html">Stay Current <img class="emaillogo" src="/pubs/techbull/images/email_20px.gif" alt="E-Mail Sign Up" style="margin-top: -4px;"/> <img class="rsslogo" src="/pubs/techbull/images/rss_20px.gif" alt="RSS Feed" style="margin-top: -4px;"/></a></li>
|
|
</ul>
|
|
<!--Close drop-->
|
|
</div>
|
|
<div class="body">
|
|
<a id="skipnav" name="skipnav"></a>
|
|
<div class="syndicate">
|
|
<p class="tableOfContents"><strong>Table of Contents: <a href="/pubs/techbull/mj15/mj15_issue_cover.html">2015 MAY–JUNE No. 404</a></strong></p>
|
|
<p class="prevnext"><span class="buttons">
|
|
<span class="previous"><a href="/pubs/techbull/mj15/brief/mj15_ncbi_webinar_dbvar.html">Previous</a></span> <span class="next"><a href="/pubs/techbull/mj15/brief/mj15_sis_reprint_tox_town.html">Next</a></span>
|
|
|
|
</span></p>
|
|
<hr class="hr1">
|
|
<div class="techNoteParagraph">
|
|
|
|
|
|
|
|
|
|
|
|
<a name="0"></a>
|
|
<h1 class="techNoteH1">RefSeq Release 70 Now Available with Re-annotated Bacterial Genomes for Uniformity Across Genomes and Species</h1>
|
|
<p class="noteCitation">RefSeq Release 70 Now Available with Re-annotated Bacterial Genomes for Uniformity Across Genomes and Species. NLM Tech Bull. 2015 Mar-Apr;(404):b7.</p>
|
|
|
|
<div class="actions">2015 May 11 <span class="status">[posted]</span>
|
|
|
|
|
|
|
|
|
|
|
|
</div>
|
|
<p><em>[Editor's Note: This is a reprint of an announcement published on <em>NLM/NCBI List ncbi-announce</em>, an e-mail announcement list available from the NLM/NCBI. To subscribe to this list, please see the <a href="http://www.ncbi.nlm.nih.gov/mailman/listinfo/ncbi-announce">ncbi-announce -- NCBI announcements and updates</a> page.]</em> </p>
|
|
<p>The full <a href="http://www.ncbi.nlm.nih.gov/refseq/">Reference Sequence</a> (RefSeq) release 70 is now available online, on the <a href="ftp://ftp.ncbi.nlm.nih.gov/refseq/release/">FTP site</a>, and through NCBI's programming utilities, with 74,720,563 records describing 50,351,119 proteins, 11,310,700 RNAs, and sequences from 54,118 different organisms.</p>
|
|
|
|
<p>This release reflects a large update of complete bacterial RefSeq genomes, proteins, and Genes. In order to make genome annotation comparable across genomes and species, NCBI has re-annotated all RefSeq prokaryotic genomes using NCBI's genome annotation pipeline. Previously, it was possible that the same gene, in the same species, with an identical sequence for the gene's genomic region might be annotated with a different protein simply because it was annotated using different methods. Now, the same gene in the same species with the same sequence will be annotated with exactly the same protein in RefSeq.</p>
|
|
|
|
<p>In addition, each annotated CDS used to be tracked with a distinct RefSeq protein accession number. However, due to identical protein sequences being found on multiple re-annotated RefSeq genomes and extensive bacterial genome sequencing, the RefSeq prokaryotic protein dataset rapidly became very redundant. Rather than flood the protein database with thousands of completely identical proteins, NCBI has adopted the use of non-redundant WP proteins for RefSeq prokaryotic genomes annotated with NCBI pipelines, which we first announced in June 2013. Now, if the identical protein sequence appears on more than one RefSeq genome, NCBI simply reuses the existing WP accession number instead of creating a new accession for each new occurrence and genome. As a result, over 7 million proteins were removed, significantly reducing protein redundancy for the prokaryotic dataset. A removed accession report (release70.removed-records.gz) and a supplemental data mapping file (release70.bacterial-reannotation-report.txt.gz) are available in the release-catalog directory on FTP.</p>
|
|
<p>This is a first step toward managing data in a world where genomes are sequenced for assays, rather than to discover novel proteins. We appreciate that this is a new and major change for RefSeq prokaryotic genomes, but it is also a necessary change to make as the number of disease-outbreak and other isolate sequencing continues to rapidly increase. For more information on changes to protein records, nucleotide records, the impact to NCBI Gene, and future plans, please see the latest story on NCBI News:<a href="http://www.ncbi.nlm.nih.gov/news/05-07-2015-refseq-release-70-reannotation"> http://www.ncbi.nlm.nih.gov/news/05-07-2015-refseq-release-70-reannotation</a>.</p>
|
|
<p>NCBI has created documentation to explain these changes in detail:</p><br />
|
|
<ul>
|
|
<li><b>RefSeq Re-annotation Project:</B> An explanation of what the re-annotation project is, why and how it was done, and how we will facilitate your transition to the new annotation data can be found here <a href="http://www.ncbi.nlm.nih.gov/refseq/about/prokaryotes/reannotation/">http://www.ncbi.nlm.nih.gov/refseq/about/prokaryotes/reannotation/</a>.</li>
|
|
<li><b>RefSeq non-redundant proteins:</b> A description of this new protein record type with examples can be found here <a href="http://www.ncbi.nlm.nih.gov/refseq/about/nonredundantproteins/">http://www.ncbi.nlm.nih.gov/refseq/about/nonredundantproteins/</a>.</li>
|
|
<li><b>Prokaryotic RefSeq Genomes:</b> The prokaryotic RefSeq genomes policy, as well as definitions for reference genomes and representative genomes can be found here: <a href="http://www.ncbi.nlm.nih.gov/refseq/about/prokaryotes/">http://www.ncbi.nlm.nih.gov/refseq/about/prokaryotes/</a>.</li>
|
|
<li><b>Prokaryotic annotation pipeline:</b> <a href="http://www.ncbi.nlm.nih.gov/genome/annotation_prok/process/">http://www.ncbi.nlm.nih.gov/genome/annotation_prok/process/</a>.</li>
|
|
<li><b>Prokaryotic RefSeq FAQ:</b> <a href="http://www.ncbi.nlm.nih.gov/refseq/about/prokaryotes/faq/">http://www.ncbi.nlm.nih.gov/refseq/about/prokaryotes/faq/</a>.</li>
|
|
<li><b>Supplemental data mapping file:</b> An FTP file in the release-catalog directory (release70.bacterial-reannotation-report.txt.gz) has been prepared for re-annotated complete genomes that have recently transitioned to using the new non-redundant proteins. This file reports the old protein accession and GI, the annotated CDS coordinates, the old locus_tag and NCBI GeneID values and maps to the current non-redundant protein accession and GI, the new locus_tag and NCBI GeneID (if available), the current CDS annotation coordinates, and indicates then the original protein identically matches verses is similar to the replacement non-redundant protein or was dropped from the annotation.</li>
|
|
<li><b>Supplemental report of suppressed assemblies:</b> An FTP file in the release-catalog directory (release70.addedQA-SuppressedAssemblies.txt) reports details for a subset of bacterial genomes that were suppressed in March 2015 following an expansion of QA metrics and subsequent to curatorial review. This report illustrates some of the reasons for suppression.</li></ul>
|
|
<p>If you have more questions or specific questions that are not addressed in the documentation, you can write to the Help Desk at <a href="mailto:info@ncbi.nlm.nih.gov">info@ncbi.nlm.nih.gov</a> or use the <a href="http://www.ncbi.nlm.nih.gov/projects/RefSeq/update.cgi">feedback form</a> on the RefSeq page.</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
</div>
|
|
<!-- Index Terms: -->
|
|
|
|
|
|
|
|
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<div class="footer">
|
|
<p class="footerLeft"><span class="footerissn"><strong>ISSN 2161-2986 (Online)</strong> Content not copyrighted; freely reproducible.</span><br/>
|
|
<a href="/">National Library of Medicine</a> 8600 Rockville Pike, Bethesda, MD 20894
|
|
<br>
|
|
<a href="//www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a>,
|
|
<a href="//www.nlm.nih.gov/web_policies.html">Web Policies</a>,
|
|
<a href="//www.nlm.nih.gov/careers/jobopenings.html">Careers</a>,
|
|
<a href="//www.nlm.nih.gov/accessibility.html">Accessibility</a>,
|
|
<a href="//www.usa.gov/">USA.gov</a>,
|
|
<a href="//www.hhs.gov/vulnerability-disclosure-policy/index.html">HHS Vulnerability Disclosure</a>
|
|
<br>
|
|
<a href="//www.nih.gov/">NIH</a>,
|
|
<a href="https://www.hhs.gov/">HHS</a>,
|
|
<a href="//www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office">FOIA</a>,
|
|
<a class="supportLink" href="//support.nlm.nih.gov?from=" target="_blank">NLM Support Center</a>
|
|
|
|
</p>
|
|
|
|
<p class="footerRight">
|
|
<strong>Last updated:</strong> 11 May 2015</p>
|
|
</div>
|
|
</div>
|
|
<map id="nlm_masthead_113" name="nlm_masthead_113">
|
|
<area shape="rect" alt="NLM Technical Bulletin" coords="1,15,396,45" href="//www.nlm.nih.gov/pubs/techbull/tb.html" title="NLM Technical Bulletin" />
|
|
<area shape="rect" alt="National Library of Medicine" coords="0,47,203,62" href="//www.nlm.nih.gov/" title="National Library of Medicine" />
|
|
<area shape="rect" coords="207,47,396,62" href="//www.nih.gov/" alt="National Institutes of Health" title="" />
|
|
</map>
|
|
<!--*****************************Content end*******************************-->
|
|
<!-- START OF SmartSource Data Collector TAG -->
|
|
<!-- Copyright (c) 1996-2010 WebTrends Inc. All rights reserved. -->
|
|
<!-- Version: 9.3.0 -->
|
|
<!-- Tag Builder Version: 3.1 -->
|
|
<!-- Created: 12/14/2010 4:50:39 PM -->
|
|
<script src="/scripts/support.js"></script>
|
|
<script src="/core/nlm-notifyExternal/1.0/nlm-notifyExternal.min.js"></script>
|
|
</body>
|
|
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|