nih-gov/www.ncbi.nlm.nih.gov/refseq/annotation_euk/release_notes

1213 lines
66 KiB
XML
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<!-- AppResources meta begin -->
<meta name="paf-app-resources" content="" />
<!-- AppResources meta end -->
<!-- TemplateResources meta begin -->
<meta name="paf_template" content="StdNCol" />
<!-- TemplateResources meta end -->
<!-- Page meta begin -->
<!-- Page meta end -->
<!-- Logger begin -->
<meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="refseq" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="static" />
<!-- Logger end -->
<title>Software release notes for the NCBI Eukaryotic Genome Annotation Pipeline</title>
<!-- PageFixtures headcontent begin -->
<link type="text/css" rel="stylesheet" href="/core/assets/genome/css/genome.css" /><link type="text/css" rel="stylesheet" href="/core/assets/genome/css/genome_links.css" />
<meta name="cms-local-nav-url" content="http://cms.ncbi.nlm.nih.gov//refseq/annotation_euk/_nav.xml" />
<!-- PageFixtures headcontent end -->
<!-- AppResources external_resources begin -->
<script type="text/javascript" src="/core/jig/1.15.6/js/jig.min.js"></script>
<!-- AppResources external_resources end -->
<!-- Page headcontent begin -->
<meta name="subsite" content="refseq" />
<meta name="path" content="refseq/annotation_euk/release_notes" />
<meta name="modified" content="2024-06-20T16:56:08Z" />
<!-- Page headcontent end -->
<!-- PageFixtures resources begin -->
<link xmlns="http://www.w3.org/1999/xhtml" type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218191/css/4207974/4206132.css" xml:base="http://127.0.0.1/sites/static/header_footer" />
<!-- PageFixtures resources end -->
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8B50AE7C8B92C100000000010600D8.m_6" /><script type="text/javascript"><!--
var ScriptPath = '/portal/';
var objHierarchy = {"name":"PAFAppLayout","type":"Layout","realname":"PAFAppLayout",
"children":[{"name":"PAFAppLayout.AppController","type":"Cluster","realname":"PAFAppLayout.AppController",
"children":[{"name":"PAFAppLayout.AppController.AppResources","type":"Portlet","realname":"PAFAppLayout.AppController.AppResources","shortname":"AppResources"},
{"name":"PAFAppLayout.AppController.RequestProcessor","type":"Portlet","realname":"PAFAppLayout.AppController.RequestProcessor","shortname":"RequestProcessor"},
{"name":"PAFAppLayout.AppController.Controller","type":"Cluster","realname":"PAFAppLayout.AppController.Controller",
"children":[{"name":"PAFAppLayout.AppController.Controller.AnnotStatusStaticContentController","type":"Portlet","realname":"PAFAppLayout.AppController.Controller.AnnotStatusStaticContentController","shortname":"AnnotStatusStaticContentController"}]},
{"name":"PAFAppLayout.AppController.Page","type":"Cluster","realname":"PAFAppLayout.AppController.Page",
"children":[{"name":"PAFAppLayout.AppController.Page.PAFPageSelectorData","type":"Portlet","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFPageSelectorData","shortname":"PAFPageSelectorData"},
{"name":"PAFAppLayout.AppController.Page.PAFStaticPage","type":"Cluster","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFStaticPage",
"children":[{"name":"PAFAppLayout.AppController.Page.PAFStaticPage.MainPortlet","type":"Portlet","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFStaticPage.MainPortlet","shortname":"MainPortlet"}]}]},
{"name":"PAFAppLayout.AppController.PageFixtures","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.PageFixturesP","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.PageFixturesP","shortname":"PageFixturesP"},
{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar.SearchBarChooser","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar.SearchBarChooser","shortname":"SearchBarChooser"},
{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar.PAFSearchBar","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar.PAFSearchBar","shortname":"PAFSearchBar"}]},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIBreadcrumbs","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIBreadcrumbs","shortname":"NCBIBreadcrumbs"},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIHelpDesk","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIHelpDesk","shortname":"NCBIHelpDesk"},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIApplog_NoScript_Ping","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIApplog_NoScript_Ping","shortname":"NCBIApplog_NoScript_Ping"}]},
{"name":"PAFAppLayout.AppController.PageFixtures.LocalNavPortlet","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.LocalNavPortlet","shortname":"LocalNavPortlet"}]},
{"name":"PAFAppLayout.AppController.TemplateResources","type":"Cluster","realname":"PAFAppLayout.AppController.TemplateResources",
"children":[{"name":"PAFAppLayout.AppController.TemplateResources.StdNColResources","type":"Portlet","realname":"PAFAppLayout.AppController.TemplateResources.PAFTemplateResources.StdNColResources","shortname":"StdNColResources"}]},
{"name":"PAFAppLayout.AppController.Logger","type":"Portlet","realname":"PAFAppLayout.AppController.Logger","shortname":"Logger"},
{"name":"PAFAppLayout.AppController.DebugConsole","type":"Portlet","realname":"PAFAppLayout.AppController.DebugConsole","shortname":"DebugConsole"}]}]};
--></script>
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218658/css/4121862/3974050/3917732/251717/4218659/4218660/14534/45193/3534283/4128070/4005757/4062871.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218658/css/3529741.css" media="print" /><script type="text/javascript">
var ObjectLinks=[{i:0, ename: "p$ExL", esid:"*", sname: "p$ExL", ssid:"*", dname:"p$el", dsid:"0",m:"CopyValue",p:[],f: function(src, dst) {fn_CopyValue(src, dst);}}]
var ActiveNames = {"p$ExL":1};
</script></head>
<body class=" static">
<div class="grid">
<div class="col twelve_col nomargin shadow">
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
<div class="sysmessages">
<noscript>
<p class="nojs">
<strong>Warning:</strong>
The NCBI web site requires JavaScript to function.
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
</p>
</noscript>
</div>
<!--/.sysmessage-->
<div class="wrap">
<div class="page">
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" id="universal_header" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="usa-banner">
<div class="usa-accordion">
<header class="usa-banner-header">
<div class="usa-grid usa-banner-inner">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
<p>An official website of the United States government</p>
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
<span class="usa-banner-button-text">Here's how you know</span>
</button>
</div>
</header>
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
<div class="usa-banner-guidance-gov usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
<div class="usa-media_block-body">
<p>
<strong>The .gov means it's official.</strong>
<br />
Federal government websites often end in .gov or .mil. Before
sharing sensitive information, make sure you're on a federal
government site.
</p>
</div>
</div>
<div class="usa-banner-guidance-ssl usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
<div class="usa-media_block-body">
<p>
<strong>The site is secure.</strong>
<br />
The <strong>https://</strong> ensures that you are connecting to the
official website and that any information you provide is encrypted
and transmitted securely.
</p>
</div>
</div>
</div>
</div>
</section>
<div class="usa-overlay"></div>
<header class="ncbi-header" role="banner" data-section="Header">
<div class="usa-grid">
<div class="usa-width-one-whole">
<div class="ncbi-header__logo">
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
</a>
</div>
<div class="ncbi-header__account">
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
<span class="fa fa-user" aria-hidden="true">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
<g style="fill: #fff">
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
</g>
</svg>
</span>
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
<span class="sr-only">Show account info</span>
</button>
</div>
<div class="ncbi-popup-anchor">
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
<div class="ncbi-popup-head">
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
<span class="fa fa-times">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
</svg>
</span>
<span class="usa-sr-only">Close</span></button>
<h4>Account</h4>
</div>
<div class="account-user-info">
Logged in as:<br />
<b><span class="username" id="uname_long">username</span></b>
</div>
<div class="account-links">
<ul class="usa-unstyled-list">
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
</ul>
</div>
</div>
</div>
</div>
</div>
</header>
<div role="navigation" aria-label="access keys">
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
</div>
<section data-section="Alerts">
<div class="ncbi-alerts-placeholder"></div>
</section>
</div>
</div>
<!--/.header-->
<div class="header">
<div class="res_logo"><h1 class="res_name"><a href="/refseq/" title="RefSeq home">RefSeq</a></h1><h2 class="res_tagline">Integrated reference sequences</h2></div>
<div class="search"><form method="get" action="/refseq/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="refseq" selected="selected">RefSeq</option><option value="biosample">BioSample</option><option value="nuccore">Nucleotide</option><option value="taxonomy" class="last">Taxonomy</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="refseq">RefSeq</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search RefSeq" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div><input type="hidden" name="p$a" id="p$a" /><input type="hidden" name="p$l" id="p$l" value="PAFAppLayout" /><input type="hidden" name="p$st" id="p$st" value="refseq" /><input name="SessionId" id="SessionId" value="CE8B5AF87C7FFCB1_0191SID" disabled="disabled" type="hidden" /><input name="Snapshot" id="Snapshot" value="/projects/refseq/refseq@1.21" disabled="disabled" type="hidden" /></form></div>
</div>
<div class="nav_and_browser">
<div class="localnav"><ul class="jig-ncbilocalnav">
<li><a href="/refseq/annotation_euk/">Eukaryotic Annotation Home</a></li>
<li><a href="#">Documentation</a><ul>
<li><a href="/refseq/annotation_euk/process">Annotation Process</a></li>
<li><a href="/refseq/annotation_euk/gnomon">Gnomon</a></li>
<li><a href="/books/NBK169439/">NCBI Handbook Chapter</a></li>
<li><a href="/refseq/annotation_euk/release_notes">Software Release Notes</a></li>
</ul>
</li>
<li><a href="#">Annotated Genomes</a><ul>
<li><a href="/refseq/annotation_euk/all">All Annotated Genomes</a></li>
<li><a href="/refseq/annotation_euk/status/#recent">Recently Annotated Genomes</a></li>
<li><a href="/refseq/annotation_euk/status/">Annotation Runs In Progress</a></li>
<li><a href="/refseq/annotation_euk/#graphs">Annotations Per Year Graphs</a></li>
</ul>
</li>
<li><a href="/refseq/annotation_euk/policy">Annotation Policy</a></li>
<li><span id="euk-annot-request-navbar"></span><a href="https://support.nlm.nih.gov/support/create-case/">Request Annotation</a></li>
</ul></div>
</div>
<!-- was itemctrl -->
<div class="container">
<div id="maincontent" class="content col twelve_col last">
<div class="col1">
<h1 id="software-release-notes-for-the-n">Software release notes for the NCBI Eukaryotic Genome Annotation Pipeline</h1>
<p>The software used for the NCBI annotation pipelines is under active development. This page provides a list of the major changes incorporated in releases of the Eukaryotic Genome Annotation Pipeline software.</p>
<h2 id="version-103">Version 10.3<a id="version10.3" href=""></a></h2>
<p><strong>Release date: June 20 2024</strong></p>
<h3 id="process">Process</h3>
<ul>
<li>Automated computation of maximum allowed intron length and applying that value across multiple alignment tools</li>
<li>Computation of normalized gene expression counts for all RNA-seq datasets aligned as part of the annotation pipeline</li>
<li>Introduction of <a href="https://www.nature.com/articles/s41592-021-01101-x">Diamond</a> alignment tool to generate protein-to-protein alignments</li>
<li>Ortholog calculation for an expanded set of arthropod genomes</li>
<li>Various bug fixes and improvements</li>
<li>Updates to versions of third party software and data:<ul>
<li>RepeatMasker v4.1.5</li>
<li>Rfam v14.10</li>
<li>tRNAscan-SE v2.0.12</li>
<li>BUSCO v.5.7.1</li>
<li>samtools v1.20</li>
</ul>
</li>
</ul>
<h3 id="reporting">Reporting</h3>
<ul>
<li>Addition of a new downloadable file to the <a href="https://ftp.ncbi.nlm.nih.gov/genomes/">FTP site</a>, in connection with the addition of normalized gene expression data:<ul>
<li><em>*_normalized_gene_expression_counts.txt.gz</em>: tab-delimited text file with counts of normalized RNA-Seq reads mapped to each gene</li>
</ul>
</li>
</ul>
<h2 id="version-102">Version 10.2<a id="version10.2" href=""></a></h2>
<p><strong>Release date: September 6 2023</strong></p>
<h3 id="process_1">Process</h3>
<ul>
<li>Assignment of Gene Ontology terms to annotated proteins using <a href="https://pubmed.ncbi.nlm.nih.gov/24451626/">InterProScan</a></li>
<li>Improvements in the handling of cross-species RNA-Seq alignments with STAR</li>
<li>Calculation of expression per RNA-Seq run and per gene using using <a href="https://pubmed.ncbi.nlm.nih.gov/24227677/">Subread featureCounts software</a></li>
<li>Improved filtering of PacBio and ONT RNA alignments used for model generation</li>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h3 id="reporting_1">Reporting</h3>
<ul>
<li>Addition of new downloadable files to our <a href="https://ftp.ncbi.nlm.nih.gov/genomes/">FTP site</a>, in connection with the features above<ul>
<li>Gene Ontology annotation of the annotated genes in GO Annotation File (GAF) format. See files <em>*_gene_ontology.gaf.gz</em></li>
<li>Addition of featureCounts output files. These files provide the counts of reads per RNA-Seq run per gene, for all RNA-Seq runs used in the annotation, and some metadata:<ul>
<li><em>*_gene_expression_counts.txt.gz</em>: tab-delimited text file with counts of RNA-Seq reads mapped to each gene</li>
<li><em>*_rnaseq_runs.txt</em>: tab-delimited text file containing information about RNA-Seq runs used for gene expression analyses</li>
<li><em>*_rnaseq_alignment_summary.txt</em> files: tab-delimited text file containing information about assignment of the aligned reads to genes</li>
</ul>
</li>
<li>Addition of RNA-Seq coverage graph in <a href="https://genome.ucsc.edu/goldenPath/help/bigWig.html">UCSC bigWig format</a>, for each SRA run aligned to the genome. See <em>*_graph.bw</em> in the RNASeq_coverage_graphs directory.</li>
</ul>
</li>
</ul>
<h2 id="version-101">Version 10.1<a id="version10.1" href=""></a></h2>
<p><strong>Release date: December 14 2022</strong></p>
<h3 id="process_2">Process</h3>
<ul>
<li>Better identification and removal of chimeric alignments by STAR for more accurate predictions of paralogous genes</li>
<li>Trimming of low-entropy terminal exons identified minimap2</li>
<li>Revised annotation of RefSeq NM_/NR_ features with large inserts (e.g. an Alu repeat found in the transcript and not the genome) to use a single exon rather than two abutting exons</li>
<li>Improvements for PAR annotation and gene placement when annotating multiple assemblies (e.g. human GRCh38.p14 and CHM13_T2Tv2.0)</li>
<li>Added support for annotation of human GenBank assemblies using curated RefSeq data, available under <a href="https://ftp.ncbi.nlm.nih.gov/genomes/all/pilot/">genomes/all/pilot</a></li>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h3 id="reporting_2">Reporting</h3>
<ul>
<li>New nomenclature for annotations. Starting with this release, annotations will be named after the assembly accession and date on which the annotation was started. For example, the name of the annotation for assembly GCF_016801865.2 started in December 2022, is GCF_016801865.2-RS_2022_12.</li>
</ul>
<h2 id="version-100">Version 10.0<a id="version10.0" href=""></a></h2>
<p><strong>Release date: June 14 2022</strong></p>
<h3 id="process_3">Process</h3>
<ul>
<li>Aligner change for RNA-Seq reads from Splign to STAR (Dobin A, et al. <a href="/pubmed/23104886/"><em>Bioinformatics</em> 2013, <strong>29</strong>(1):15-21</a>)</li>
<li>Upgrade of RFAM library to RFAM 14.6, for the prediction of small non-coding RNAs (rRNAs, snRNAs and snoRNAs)</li>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h2 id="version-90">Version 9.0<a id="version9.0" href=""></a></h2>
<p><strong>Release date: June 8 2021</strong></p>
<h3 id="process_4">Process</h3>
<ul>
<li>Addition of a module for the refinement of transcription start sites with Cap analysis gene expression (CAGE) data. (Applied only in the annotation of species with public CAGE data in SRA.)</li>
</ul>
<h3 id="reporting_3">Reporting</h3>
<ul>
<li>Addition of cap and/or polyA sites information on genomic and transcript records, when experimental support is available (CAGE for cap and RNA-Seq for polyA sites).<ul>
<li>On genomic records, cap and polyA_site evidence are in the /experiment field of the .gbk files, as
<code>/experiment="COORDINATES: polyA evidence [ECO:0006239]"</code> or <code>/experiment="COORDINATES: cap analysis [ECO:0007248] and polyA evidence [ECO:0006239]"</code></li>
<li>On transcript records, cap evidence is represented as misc_features and polyA as polyA_site features. See for example <a href="https://www.ncbi.nlm.nih.gov/nuccore/XM_027966739.2">XM_027966739.2</a>:</li>
</ul>
</li>
</ul>
<pre><code class="language-markdown"> misc_feature 1
/gene="D2HGDH"`
/experiment="COORDINATES: cap analysis [ECO:0007248]"
/note="transcription start site"
[...]
polyA_site 2524
/gene="D2HGDH"
/experiment="COORDINATES: polyA evidence [ECO:0006239]"
</code></pre>
<ul>
<li>The cap and polyA sites are present in column 9 of the GFF3 file. </li>
</ul>
<h2 id="version-86">Version 8.6<a id="version8.6" href=""></a></h2>
<p><strong>Release date: February 24 2021</strong></p>
<h3 id="process_5">Process</h3>
<ul>
<li>Change in masking of genome repeats prior to alignments of transcripts and protein evidence:<ul>
<li>Use of WindowMasker for all organisms but human and mouse </li>
<li>For human and mouse, switched RepeatMasker to using Dfam HMMs rather than RepBase libraries</li>
</ul>
</li>
<li>Normalization of the 5' and 3'-UTR ends of RefSeq model transcripts (with XM or XR prefix) with the curated RefSeq transcripts (NM or NR prefix) of the same gene with the same terminal exon</li>
</ul>
<h3 id="reporting_4">Reporting</h3>
<ul>
<li>Addition to the web and XML annotation reports of:<ul>
<li>BUSCO results, calculated on the annotated gene set using the longest protein from each gene </li>
<li>Per-run alignment statistics of long RNA-Seq reads, generated with long-read sequencing technologies such as PacBio or Oxford Nanopore </li>
</ul>
</li>
<li>Removal from the FTP site of files reporting genomic spans masked by RepeatMasker (*rm.out.gz files) </li>
</ul>
<h2 id="version-85">Version 8.5<a id="version8.5" href=""></a></h2>
<p><strong>Release date: July 9 2020</strong></p>
<h3 id="process_6">Process</h3>
<ul>
<li>Upgrade of minimap2 to version 2.17, for aligning SRA long read transcriptomes</li>
<li>Upgrade of tRNAscan-SE to version 2.0.4, for prediction of tRNAs</li>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h2 id="version-84">Version 8.4<a id="version8.4" href=""></a></h2>
<p><strong>Release date: March 17 2020</strong></p>
<h3 id="process_7">Process</h3>
<ul>
<li>Improvement in the naming process for fish genes. We have switched to primarily applying gene symbols and names from zebrafish, which are mostly provided by the Zebrafish Information Network (<a href="https://zfin.org/">ZFIN</a>), instead of human, to other fish orthologs. The end result is more ortholog connections, and better nomenclature.</li>
</ul>
<h2 id="version-83">Version 8.3<a id="version8.3" href=""></a></h2>
<p><strong>Release date: November 25 2019</strong></p>
<h3 id="process_8">Process</h3>
<ul>
<li>Aligner change for SRA long read transcriptomes (PacBio IsoSeq, Oxford Nanopore technologies, etc...) from Splign to Minimap2 (Li H <a href="/pubmed/29750242"><em>Bioinformatics</em> 2018, <strong>34</strong>(18):3094-3100</a>)</li>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h3 id="reporting_5">Reporting</h3>
<ul>
<li>Addition of annotated transcripts in BAM format to the files available for download</li>
<li>Files for the annotated assemblies now available under <a href="https://ftp.ncbi.nlm.nih.gov/genomes/refseq">genomes/refseq</a>. Files in <a href="https://ftp.ncbi.nlm.nih.gov/genomes/">genomes</a>/Genus_species will be archived on February 1, 2020 <a href="https://ncbiinsights.ncbi.nlm.nih.gov/2019/12/05/new-assem-downloads/">as announced December 5, 2019</a></li>
</ul>
<h2 id="version-82">Version 8.2<a id="version8.2" href=""></a></h2>
<p><strong>Release date: March 8 2019</strong></p>
<h3 id="process_9">Process</h3>
<ul>
<li>Upgrade of RepeatMasker to version 4.0.8 and RepBase-20181026</li>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h2 id="version-81">Version 8.1<a id="version8.1" href=""></a></h2>
<p><strong>Release date: June 21 2018</strong></p>
<h3 id="process_10">Process</h3>
<ul>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h2 id="version-80">Version 8.0<a id="version8.0" href=""></a></h2>
<p><strong>Release date: November 20 2017</strong></p>
<h3 id="process_11">Process</h3>
<ul>
<li>Addition of a module to the pipeline to annotate small non-coding RNAs (rRNAs, snRNAs and snoRNAs), using <em>cmsearch</em> from the Infernal package and RFAM 12.0 HMMs for eukaryotes (Nawrocki EP, et al. <a href="/pubmed/25392425"><em>Nucleic Acids Research</em> 2015, <strong>43</strong>(Database issue):D130-7</a>).</li>
</ul>
<h3 id="reporting_6">Reporting</h3>
<ul>
<li>Changes in the web annotation reports. These result in higher consistency with the NCBI GFFs and other downloadable files. Note that web reports for annotations executed with software older than version 8.0 were not updated to the new format.<ul>
<li>Features annotated on organelles are now included in the 'Gene and Feature statisitics' section</li>
<li>Changes in the break-down of reported features:<ul>
<li>Immunoglobulin/T-cell receptor gene segments are reported separately from protein-coding genes.</li>
<li>Pseudogenes are reported as two categories, transcribed and non-transcribed pseudogenes.</li>
</ul>
</li>
</ul>
</li>
</ul>
<h2 id="version-74">Version 7.4<a id="version7.4" href=""></a></h2>
<p><strong>Release date: April 19 2017</strong></p>
<h3 id="process_12">Process</h3>
<ul>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h3 id="reporting_7">Reporting</h3>
<ul>
<li>In compliance with a <a href="https://www.ncbi.nlm.nih.gov/news/03-02-2016-phase-out-of-GI-numbers/">NCBI-wide change</a>, gi numbers are no longer included in FASTA and GenBank format files (.fa, .mfa, .gbk and .gbs) provided on <a href="https://ftp.ncbi.nih.gov/genomes/">our FTP site</a>.</li>
<li>In the RNA-Seq alignments section of the annotation reports, report of the 'Percent of aligned reads with introns' instead of the 'Percent spliced reads'. The 'Percent of aligned reads with introns' is the proportion of reads with a spliced alignments out of all aligned reads.</li>
<li>In the RNA-Seq alignments section of the annotation reports, correction in the calculation of the' Percent aligned reads'. In some reports generated prior to version 7.4, the denominator included the count of reads from small numbers SRA runs that were not used in the annotation.</li>
</ul>
<h2 id="version-73">Version 7.3<a id="version7.3" href=""></a></h2>
<p><strong>Release date: February 9 2017</strong></p>
<h3 id="process_13">Process</h3>
<ul>
<li>Improvements in the alignment process for curated RefSeq sequences in masked regions of the genome</li>
<li>Improvements in the global alignment process of protein evidence to the genome</li>
<li>Incremental improvements to internal processing and performance</li>
</ul>
<h3 id="reporting_8">Reporting</h3>
<ul>
<li>In the eukaryotic annotation status page, addition of links to the Genome Data Viewer (GDV) for genomes assembled to the level of chromosomes</li>
<li>In the RNA-Seq alignments section of the annotation reports, addition of publications associated with RNA-Seq data</li>
</ul>
<h2 id="version-72">Version 7.2<a id="version7.2" href=""></a></h2>
<p><strong>Release date: September 27 2016</strong></p>
<h3 id="process_14">Process</h3>
<ul>
<li>Added option to include in the final annotation Gnomon models with up to 99% ab initio sequence and no BlastP hit. This option may be used for annotating organisms distant from reference genomes, and for which little long-ranging same or cross-species primary evidence is publicly available and align to the genome (i.e. some invertebrates or fungi).</li>
<li>Refinements to pairwise orthology calculations to be more conservative when there are multiple paralogs and no supporting synteny information</li>
<li>Incremental improvements to internal processing</li>
</ul>
<h3 id="reporting_9">Reporting</h3>
<ul>
<li>Changes to GFF3 files. ncRNA features are now represented in the type field (column 3) with specific SO terms associated with their ncRNA_class (lnc_RNA, SRP_RNA, snRNA , RNase_MRP_RNA, etc). The "ncrna_class" attribute is no longer provided in the attributes field (column 9).</li>
</ul>
<h2 id="version-71">Version 7.1<a id="version7.1" href=""></a></h2>
<p><strong>Release date: June 8 2016</strong></p>
<h3 id="process_15">Process</h3>
<ul>
<li>Upgrade of RepeatMasker to version 4.0.6, along with RepBase Update 20150807 and RM database version 20150807</li>
<li>Incremental improvements to internal processing</li>
</ul>
<h2 id="version-70">Version 7.0<a id="version7.0" href=""></a></h2>
<p><strong>Release date: April 8 2016</strong></p>
<h3 id="process_16">Process</h3>
<ul>
<li>Execution of the annotation process on top-level sequences (chromosomes, and unplaced and unlocalized scaffolds) instead of scaffolds. This change improves the annotation of features spanning gaps between adjacent scaffolds. For the near future, SNP annotation will remain on scaffolds.</li>
<li>Assignment of unique GeneIDs to tRNAs annotated at different locations. Note that tRNAs with the same anticodon are assigned the same Gene symbol. This change increases consistency with other gene types.</li>
<li>Bug fix in the handling of coding models with a high proportion of ab initio sequence (&gt;50%)</li>
<li>Restriction in the generation of alternative variants for <a href="/assembly/model/#asmb_def">alternate loci units</a>. If a gene with a <a href="/refseq/about/">known RefSeq transcript</a> (NM_ or NR_prefix) is placed on an alternate locus, no alternate variant model (XM_ or XR_ prefix) is created for the gene on this alternate locus. Given sufficient evidence, alternative variants for gene with known RefSeq will continue being generated on the primary assembly unit. This change will affect the annotation of alternate loci units in human and mouse.</li>
<li>Incremental improvements to internal processing and annotation consistency</li>
</ul>
<h3 id="reporting_10">Reporting</h3>
<ul>
<li>In Nucleotide:<ul>
<li>GenBank, Graphics and ASN views of RefSeq placed scaffolds no longer show any annotation (see for example<a href="/nuccore/NW_001594469.1">NW_001594469.1)</a></li>
<li>ASN view of RefSeq chromosomes now include the annotation.</li>
</ul>
</li>
<li>On the FTP site (see for example the <a href="https://ftp.ncbi.nih.gov/genomes/Ornithorhynchus_anatinus/">recent re-annotation of platypus</a>)<ul>
<li>GFF files are now only provided for top-level sequences.</li>
<li>Files in the CHR_* directories for nuclear chromosomes no longer include annotation on placed scaffolds.</li>
<li>Masked spans (masking_coordinates.gz) are now in top-level coordinates.</li>
<li>Comparison of current to previous annotation (comparison directory) are now in top-level coordinates.</li>
</ul>
</li>
</ul>
<h2 id="version-65">Version 6.5<a id="version6.5" href=""></a></h2>
<p><strong>Release date: November 23 2015</strong></p>
<h3 id="process_17">Process</h3>
<ul>
<li>Due to low usage of the STS (Sequence Tagged Sites) placement information on annotated sequences, the process that maps STSs has been discontinued. STS annotation will not be produced for new RefSeq sequences, but will remain available for sequences last annotated before November 20, 2015.</li>
<li>Better handling of stranded RNA-seq reads</li>
<li>Incremental improvements to internal processing and annotation consistency</li>
</ul>
<h3 id="reporting_11">Reporting</h3>
<ul>
<li>Addition of a section to the HTML annotation reports, "Comparison of current and previous annotations", for organisms that are re-annotated (see <a href="/genome/annotation_euk/Ceratotherium_simum_simum/101/#AnnotationComparisonStats">this example</a>). This new section indicates how much of the annotation on each assembly has changed between the current and the previous annotation releases and provides links to downloadable full reports. The full reports (in tabular and <a href="/tools/gbench/">Genome Workbench</a> formats) are on our FTP site and contain the mappings of current to previous genes and transcripts. Summary counts by category of change are available in the XML annotation report, annotation_report.xml file (<span><span>&lt;AnnotationComparison&gt;</span></span> section), also in the FTP directory.</li>
<li>Addition to the annotation_report.xml &lt;RnaseqAlignReport&gt; section of the &lt;Stranded&gt; tag to the individual SRA runs that were generated with a strand-specific isolation technique</li>
<li>Changes to GFF3-formatted files:<ul>
<li>Transcript features for model RefSeqs now contain the attribute "model_evidence" in column 9, listing the source and number of supporting evidence and percent coverage by RNA-Seq samples, similar to reporting in the flatfile format.</li>
<li>GFF3 output has been changed to only use small gaps (1-2 bp) (aka micro-introns) to correct for frameshifts, even if the RefSeq product has an insertion. Earlier files from software releases 6.3 and 6.4 used small overlaps to represent insertions according to INSDC specifications, but these overlaps werent compatible with some external software.</li>
</ul>
</li>
</ul>
<h2 id="version-64">Version 6.4<a id="version6.4" href=""></a></h2>
<p><strong>Release date: July 22 2015</strong></p>
<h3 id="process_18">Process</h3>
<ul>
<li>Improvement in the RNA-Seq alignment process. Prior to alignment to the genome, SRA runs are now evaluated for strandedness and reads of stranded runs are aligned in the sense orientation only. Unstranded runs are aligned in both orientations and logic to determine the best strand is applied downstream as before.</li>
<li>Incremental improvements to internal processing and annotation consistency</li>
</ul>
<h3 id="reporting_12">Reporting</h3>
<ul>
<li>Changes to GFF3-formatted files. Genes in the GFF files for the final annotation now contain the attribute "gene_biotype" in column 9, making explicit whether a gene is coding, non-coding, pseudogene, etc... See more details in the <a href="https://ftp.ncbi.nlm.nih.gov/genomes/README_GFF3.txt">GFF3 documentation</a>.</li>
</ul>
<h2 id="version-63">Version 6.3<a id="version6.3" href=""></a></h2>
<p><strong>Release date: April 21 2015</strong></p>
<h3 id="process_19">Process</h3>
<ul>
<li>Improvement in the annotation of model proteins containing selenocysteine residues (see for example <a href="/nuccore/XM_012546481.1">XM_012546481.1</a>)<ul>
<li>Selenocysteine residues are now represented with a "U" (instead of a code-breaking "X") in protein sequences.</li>
<li>Titles of selenocysteine-containing proteins are not prefixed any more with "LOW QUALITY PROTEIN" unless the proteins contains corrections for the genome.</li>
<li>Transcripts and annotation of the parent genomic sequences contain <span>a /transl_except t</span>hat explicitly provides the location of the selenocysteine residue in the sequence.</li>
</ul>
</li>
<li>Refinement in the logic that weighs alignments of same-species transcript versus cross-species <a href="/refseq/about/">validated RefSeq proteins</a> to favor same-species transcripts. This change results in a smaller number of models with frameshifts or code-breaks.</li>
<li>Improvement of models bordering assembly gaps<ul>
<li>Better handling of alignments of protein evidence affected by assembly gaps</li>
<li>Generation of alternative variants of gap-filled models, if alternative variants are supported by the evidence and if the gap-filled portion is identical in all variants</li>
<li>Trimming of UTRs in gap-filled portion of a transcript if shorter than 100 bases</li>
</ul>
</li>
</ul>
<h3 id="reporting_13">Reporting</h3>
<ul>
<li>Change in the reporting of RNA-Seq alignment statistics in the "Short read transcript alignments" section of the annotation reports. Raw counts of aligned and spliced reads are estimates and are subject to small variations (within 1%) from run to run, therefore only percentages rounded to the nearest integer are now reported.</li>
</ul>
<h2 id="version-62">Version 6.2<a id="version6.2" href=""></a></h2>
<p><strong>Release date: December 3 2014</strong></p>
<h3 id="process_20">Process</h3>
<ul>
<li>Improvements to alignments and model generation algorithms</li>
<li>Exclusion of low-entropy RNA-Seq reads from the set of reads aligned to the genome</li>
</ul>
<h3 id="reporting_14">Reporting</h3>
<ul>
<li>Addition of a section to the annotation reports, "Alignment of the annotated proteins to a set of high-quality proteins", providing the counts of annotated proteins with BlastP hits against a database of high-confidence proteins (e.g. UniProtKB/Swiss-Prot), at several coverage thresholds. For comparison purposes the data is also provided for a selection of related organisms that were recently annotated.  </li>
<li>Bug fix in the calculation of the number of RNA-Seq reads aligned to the genome presented in the "Short read transcript alignments" section of the annotation reports. Statistics in reports pre-dating the 6.2 release may be off by a few percent.</li>
<li>Modification of the representation of multi-interval non-trans-spliced tRNA features in GFF3 files. Each multi-interval non-trans-spliced tRNA feature is now represented by a single feature (line) of type tRNA and multiple nested features of type exon (one for each interval).</li>
<li>Modification of the representation of transcripts with indels compared to the genome in GFF3 files. Insertions in transcripts within the coding region are now represented by a small overlap between the two halves of a split exon, and deletions within the coding region are represented by very short introns between the two halves of an exon. This allows software to properly interpret the reading frame. Note that the conceptual sequence of the feature can still differ from the transcript or protein sequence because of mismatches, gaps, and when overlapping genome sequence does not match the sequence of an insertion.</li>
</ul>
<h2 id="version-61">Version 6.1<a id="version6.1" href=""></a></h2>
<p><strong>Release date: August 4 2014</strong></p>
<h3 id="process_21">Process</h3>
<ul>
<li>Addition of a post tRNAscan-SE filter to limit probable noise in tRNA predictions</li>
<li>Bug fix in the unique hit exon coverage track displayed in Gene, that caused reads with multiple placements to be included</li>
</ul>
<h3 id="reporting_15">Reporting</h3>
<ul>
<li>In the "Short read transcript alignments" section of the annotation reports, addition of the alignment statistics per RNA-Seq SRA run, in addition to the alignment statistics per sample</li>
</ul>
<h2 id="version-60">Version 6.0<a id="version6.0" href=""></a></h2>
<p><strong>Release date: April 17 2014</strong></p>
<h3 id="process_22">Process</h3>
<ul>
<li>For model RefSeqs extending into assembly gaps, construction of transcript (XM_/XR_) and protein (XP_) products using a combination of genomic and transcript sequence (<a href="/refseq/about/">RefSeq</a>, <a href="http://www.insdc.org/">INSDC</a> or <a href="/genbank/tsa">TSA</a>) to compensate for missing genomic sequence.</li>
<li>Improvements to identification of orthologs compared to a reference taxon, including more robust analysis of protein BLAST alignments. These changes result in more ortholog calls, especially for more distantly related taxa, with lower false-match rates. The results are used for gene naming, and are reported in Gene.</li>
<li>Redesign of the code for categorizing genes by type (protein-coding, pseudogene, non-coding) and assigning names to genes and products (transcript and protein RefSeqs). These changes allow for more automation and higher throughput, as well as improve the identification of pseudogenes and low-quality protein-coding genes.</li>
<li>Change in the naming of model RefSeq variants and isoforms to use the same isoform name for multiple variants that differ only in the UTRs, and to use the same variant and isoform names for equivalent model RefSeqs annotated on multiple assemblies.</li>
</ul>
<h3 id="reporting_16">Reporting</h3>
<ul>
<li>For model RefSeqs extending into assembly gaps, addition to the nucleotide records of the source of the model spans. For example, <a href="/nuccore/XM_007659754.1">XM_007659754.1</a> is a model with three exons annotated on genomic sequence <a href="/nuccore/AAPN01287557.1">AAPN01287557.1</a> and was allowed to extend at the 5-prime end into an assembly gap based on the alignment of transcript <a href="/nuccore/JQ350810.1">JQ350810.1</a>. The flat file for this record contains the following three indicators of the origin of the model:</li>
</ul>
<p><em>A comment:</em></p>
<p><img src="/core/assets/genome/images/gap_filling_comment.png" alt="gap_filling_comment" /></p>
<p><em>An assembly gap attribute:</em></p>
<p><img src="/core/assets/genome/images/gap_filling_attribute.png" alt="gap_filling_attribute" /></p>
<p><em>A PRIMARY block providing the spans of the RefSeq model on the genomic or transcript (primary) sequence:</em></p>
<p><img src="/core/assets/genome/images/gap_filling_composition.png" alt="gap_filling_composition" /></p>
<ul>
<li>For model RefSeqs extending into assembly gaps, annotation of the genomic mRNA and CDS features with partial features (&lt; or &gt; in the flatfile view), either at internal intervals or at the 5-prime or 3-prime end, to indicate the location of the missing sequence.</li>
<li>Addition of a structured comment of RefSeq attributes to the nucleotide and protein records of model RefSeqs with <em>ab initio</em> span(s) and/or with corrections (see <a href="/nuccore/XM_007529441">XM_007529441.1</a> for example). The comment indicates the following, as appropriate for each model:<ul>
<li>Ab initio span(s): % bases not supported by evidence and produced by the <em>ab initio</em> component of Gnomon</li>
<li>Frameshift(s): number of indels corrected</li>
<li>Internal stop codon(s): number of genomic stop codons corrected</li>
<li>Assembly gap(s): number of transcript bases added to fill a genome assembly gap (see above)</li>
</ul>
</li>
<li>Addition of keyword "corrected model" to models with frameshifts, internal stop codons or assembly gaps; and keyword "includes ab initio" to models with <em>ab initio</em> spans.</li>
<li>Addition to the annotation reports of the number of model RefSeqs with genomic gaps filled with transcript sequence.</li>
<li>Change in the annotation reports for the calculation of the number of corrected model RefSeqs. The new count, "model RefSeq with major corrections", includes all model RefSeq proteins with major corrections (CDSs with correction for internal stop-codons, frameshifts or internal gaps).</li>
<li>Changes to GFF3-formatted files:<ul>
<li>Incorporation of the start_range and end_range attributes from the GVF specification to indicate partial features. The GFF3 specification currently does not include any formal mechanism to indicate partial features, so these attributes are borrowed from GVF with non-official (lowercase) tags. In NCBI's annotation files, presence of a start_range attribute can simply be interpreted as column 4 is partial, and an end_range attribute as column 5 is partial, regardless of strand, without further analysis of the tag value. Further details about the attributes are available in the <a href="http://www.sequenceontology.org/resources/gvf_1.05.html">GVF specifications</a>.</li>
<li>Reduced usage of URL escaping in attribute values.</li>
</ul>
</li>
</ul>
<h2 id="version-52">Version 5.2<a id="version5.2" href=""></a></h2>
<p><strong>Release date: November 19 2013</strong></p>
<h3 id="process_23">Process</h3>
<ul>
<li>Exclusion of spans in protein alignments from use by gene prediction if the spans contain an intron with much lower RNA-Seq support than the rest of the alignment.</li>
<li>Classification of model RefSeqs (XR_) for predicted non-coding genes as ncRNA of type lncRNA rather than misc_RNA.</li>
<li>Improvements to RNA-Seq filtering criteria in regions of alternative splicing.</li>
<li>Improvements to model predictions in regions of closely-spaced or overlapping genes.</li>
<li>Improvements to the assembly-assembly alignment process, used for tracking genes across assemblies.</li>
<li>Performance improvements.</li>
</ul>
<h3 id="reporting_17">Reporting</h3>
<ul>
<li>Production of a report with each annotation run summarizing the features annotated and the alignments used for gene prediction. This report is available in HTML (see URL in the README_CURRENT_RELEASE file) and in XML on the <a href="https://ftp.ncbi.nih.gov/genomes/">FTP site</a>.</li>
<li>Change in the format of the README_CURRENT_RELEASE file distributed on the <a href="https://ftp.ncbi.nih.gov/genomes/">FTP site</a>.</li>
<li>Phase-out of the production of RefSeq scaffold BLAST databases. Top-level (chromosomes, unplaced and unlocalized scaffolds) BLAST databases are now the default on the organism-specific BLAST pages.</li>
<li>Increased stringency for the CpG islands displayed in Map Viewer. Only islands meeting the "strict" definition of 500bp or more in length, 50% or higher in GC content and 0.60 or higher observed CpG / expected CpG are now shown in the CpG island map.</li>
</ul>
<h2 id="version-51">Version 5.1<a id="version5.1" href=""></a></h2>
<p><strong>Release date: July 19 2013</strong></p>
<h3 id="process_24">Process</h3>
<ul>
<li>Exclusion of spans in EST or mRNA alignments from use by gene prediction if the spans contain an intron with much lower RNA-Seq support than the rest of the alignment.</li>
<li>Allowed co-existence of known RefSeq (NM/NR/NP_ accessions) and model RefSeq (XM/XR/XP_ accessions) on the same gene, resulting in an increase in the number of alternate variants for organisms with large amount of evidence (i.e. RNA-Seq).</li>
</ul>
<h2 id="version-50">Version 5.0<a id="version5.0" href=""></a></h2>
<p><strong>Release date: April 11 2013</strong></p>
<h3 id="process_25">Process</h3>
<ul>
<li>Addition of a process to align RNA-Seq short reads from SRA to the genome.</li>
<li>Incorporation of RNA-Seq alignments in gene prediction.</li>
<li>Performance improvements.</li>
</ul>
<h3 id="reporting_18">Reporting</h3>
<ul>
<li>Production of RNA-Seq coverage graphs and intron feature tracks.</li>
<li>Addition of BioSamples in the annotated features' evidence support summary on the model RefSeq records.</li>
</ul>
<h2 id="version-41">Version 4.1<a id="version4.1" href=""></a></h2>
<p><strong>Release date: January 8 2013</strong></p>
<h3 id="process_26">Process</h3>
<ul>
<li>Classification of model RefSeqs (XR_) for predicted non-coding genes as misc_RNA.</li>
<li>Performance improvements.</li>
</ul>
<h3 id="reporting_19">Reporting</h3>
<ul>
<li>Addition of a /note on RNA and CDS features describing differences between the annotation product and the genome.</li>
<li>Addition of the BioProject ID on model RefSeq records (XM/XR/XP_).</li>
</ul>
<h2 id="version-40">Version 4.0<a id="version4.0" href=""></a></h2>
<p><strong>Release date: May 21 2012</strong></p>
<h3 id="process_27">Process</h3>
<ul>
<li>For some genomes, addition of <em>ab initio</em> predictions to the model RefSeq set if these have high-quality BLAST hits to known proteins.</li>
<li>Improvements to the assembly-assembly alignment process, used for tracking genes across assemblies.</li>
<li>Improvements to the alignment of genomic sequence to the genome. Alignments with long gaps are now split in the Map Viewer display.</li>
<li>Performance improvements.</li>
</ul>
<h3 id="reporting_20">Reporting</h3>
<ul>
<li>Addition of annotation files in GFF3 format to the FTP site.</li>
<li>Addition of BLAST databases of top-level  molecules (chromosomes, unplaced and unlocalized scaffolds) to the set of BLAST databases displayed in the organism-specific BLAST pages.</li>
</ul>
<p><span id="shared-content-1"></span></p>
</div>
<!--/.col1-->
<div class="col2">
</div>
<!--/.col2-->
<div class="col3">
</div>
<!--/.col3-->
<div class="col4">
</div>
<!--/.col4-->
<div class="col5">
</div>
<div class="col6">
</div>
<div class="col7">
</div>
<div class="col8">
</div>
<div class="col9">
</div>
</div><!--/.content-->
</div><!--/.container-->
<div id="NCBIFooter_dynamic">
<!--<component id="NCBIBreadcrumbs"/>
<component id="NCBIHelpDesk"/>-->
<noscript><img alt="" src="/stat?jsdisabled=true&amp;ncbi_app=refseq&amp;ncbi_db=&amp;ncbi_pdid=static&amp;ncbi_phid=CE8B50AE7C8B92C100000000010600D8" /></noscript>
</div>
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" class="footer" id="footer" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="icon-section">
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
<div class="grid-container container">
<div class="icon-section_container">
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter">
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
<title>Twitter</title>
<g id="twitterx1008">
<path id="path1008" d="M6.06736 7L16.8778 20.8991L6.00001 32.2H10.2L18.6 23.1L25.668 32.2H34L22.8 17.5L31.9 7H28.4L20.7 15.4L14.401 7H6.06898H6.06736ZM9.66753 8.73423H12.9327L29.7327 30.4658H26.5697L9.66753 8.73423Z" fill="#5B616B"></path>
</g>
</svg>
</a>
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>Facebook</title>
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>LinkedIn</title>
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11,
.cls-12 {
fill: #737373;
}
.cls-11 {
fill-rule: evenodd;
}
</style>
</defs>
<title>GitHub</title>
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
</path>
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
</path>
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
</path>
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
</path>
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
</path>
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
</path>
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
</svg></a>
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
<defs><style>.cls-1{fill:#737373;}</style></defs>
<title>NCBI Insights Blog</title>
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
</svg>
</a>
</div>
</div>
</section>
<section class="container-fluid bg-primary">
<div class="container pt-5">
<div class="row mt-3">
<div class="col-lg-3 col-12">
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
<ul class="list-inline social_media">
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Twitter</title>
<g id="twitterx1009" clip-path="url(#clip0_65276_3946)">
<path id="Vector_Twitter" d="M17.5006 34.6565C26.9761 34.6565 34.6575 26.9751 34.6575 17.4996C34.6575 8.02416 26.9761 0.342773 17.5006 0.342773C8.02514 0.342773 0.34375 8.02416 0.34375 17.4996C0.34375 26.9751 8.02514 34.6565 17.5006 34.6565Z" fill="#205493" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
<path id="path1009" d="M8.54811 8.5L16.2698 18.4279L8.50001 26.5H11.5L17.5 20L22.5486 26.5H28.5L20.5 16L27 8.5H24.5L19 14.5L14.5007 8.5H8.54927H8.54811ZM11.1197 9.73873H13.4519L25.4519 25.2613H23.1926L11.1197 9.73873Z" fill="white"></path>
</g>
<defs>
<clipPath id="clip0_65276_3946">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Facebook</title>
<g id="Facebook" clip-path="url(#clip0_1717_1086)">
<path id="Vector_Facebook" d="M15.1147 29.1371C15.1147 29.0822 15.1147 29.0296 15.1147 28.9747V18.9414H11.8183C11.6719 18.9414 11.6719 18.9414 11.6719 18.8018C11.6719 17.5642 11.6719 16.3289 11.6719 15.0937C11.6719 14.9793 11.7062 14.9518 11.816 14.9518C12.8683 14.9518 13.9206 14.9518 14.9751 14.9518H15.1215V14.8329C15.1215 13.8057 15.1215 12.774 15.1215 11.7492C15.1274 10.9262 15.3148 10.1146 15.6706 9.37241C16.1301 8.38271 16.9475 7.60378 17.9582 7.19235C18.6492 6.90525 19.3923 6.76428 20.1405 6.7783C21.0029 6.79202 21.8653 6.83091 22.7278 6.86065C22.8879 6.86065 23.048 6.89496 23.2082 6.90182C23.2974 6.90182 23.3271 6.94071 23.3271 7.02993C23.3271 7.54235 23.3271 8.05477 23.3271 8.5649C23.3271 9.16882 23.3271 9.77274 23.3271 10.3767C23.3271 10.4819 23.2974 10.5139 23.1921 10.5116C22.5379 10.5116 21.8814 10.5116 21.2271 10.5116C20.9287 10.5184 20.6316 10.5528 20.3395 10.6146C20.0822 10.6619 19.8463 10.7891 19.6653 10.9779C19.4842 11.1668 19.3672 11.4078 19.3307 11.6669C19.2857 11.893 19.2612 12.1226 19.2575 12.3531C19.2575 13.1904 19.2575 14.0299 19.2575 14.8695C19.2575 14.8946 19.2575 14.9198 19.2575 14.9564H23.0229C23.1807 14.9564 23.183 14.9564 23.1624 15.1074C23.0778 15.7662 22.9885 16.425 22.9039 17.0816C22.8322 17.6321 22.7636 18.1827 22.698 18.7332C22.6729 18.9437 22.6797 18.9437 22.4693 18.9437H19.2644V28.8992C19.2644 28.9793 19.2644 29.0593 19.2644 29.1394L15.1147 29.1371Z" fill="white"></path>
<path id="Vector_2_Facebook" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1086">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Youtube</title>
<g id="YouTube" clip-path="url(#clip0_1717_1101)">
<path id="Vector_Youtube" d="M26.2571 11.4791C25.9025 11.1589 25.5709 10.9576 24.228 10.834C22.5512 10.6785 20.2797 10.6556 18.564 10.6533H16.4365C14.7208 10.6533 12.4493 10.6785 10.7725 10.834C9.43196 10.9576 9.09798 11.1589 8.7434 11.4791C7.81464 12.321 7.6202 14.6268 7.59961 16.8938C7.59961 17.3178 7.59961 17.741 7.59961 18.1635C7.62706 20.4121 7.82837 22.686 8.7434 23.521C9.09798 23.8412 9.42967 24.0425 10.7725 24.1661C12.4493 24.3216 14.7208 24.3445 16.4365 24.3468H18.564C20.2797 24.3468 22.5512 24.3216 24.228 24.1661C25.5686 24.0425 25.9025 23.8412 26.2571 23.521C27.1722 22.6929 27.3735 20.451 27.4009 18.2206C27.4009 17.7402 27.4009 17.2599 27.4009 16.7795C27.3735 14.5491 27.1699 12.3072 26.2571 11.4791ZM15.5604 20.5311V14.652L20.561 17.5001L15.5604 20.5311Z" fill="white"></path>
<path id="Vector_2_Youtube" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1101">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
</ul>
</div>
<div class="col-lg-3 col-12">
<p class="address_footer text-white">National Library of Medicine<br />
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
Bethesda, MD 20894</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
</div>
</div>
<div class="row">
<div class="col-lg-12 centered-lg">
<nav class="bottom-links">
<ul class="mt-3">
<li>
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
</li>
<li>
<a class="text-white" href="https://www.nih.gov/">NIH</a>
</li>
<li>
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
</li>
<li>
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
</section>
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
</div>
<!--/.footer-->
<p class="last-updated small">Last updated: 2024-06-20T16:56:08Z</p>
</div>
<!--/.page-->
</div>
<!--/.wrap-->
<span class="PAFAppResources"></span>
</div><!-- /.twelve_col -->
</div>
<!-- /.grid -->
<!-- usually for JS scripts at page bottom -->
<span class="pagefixtures"></span>
<!-- CE8B5AF87C7FFCB1_0191SID /projects/refseq/refseq@1.21 portal104 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type='text/javascript' src='/portal/js/portal.js'></script><script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4218658/js/3879255/4121861/4218656/4087685.js" snapshot="refseq"></script></body>
</html>