1204 lines
56 KiB
HTML
1204 lines
56 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||
|
||
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||
<!-- AppResources meta begin -->
|
||
<meta name="paf-app-resources" content="" />
|
||
<!-- AppResources meta end -->
|
||
|
||
<!-- TemplateResources meta begin -->
|
||
<meta name="paf_template" content="StdNCol" />
|
||
|
||
<!-- TemplateResources meta end -->
|
||
|
||
<!-- Page meta begin -->
|
||
|
||
<!-- Page meta end -->
|
||
|
||
<!-- Logger begin -->
|
||
<meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="genbank" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="custom-page" />
|
||
<!-- Logger end -->
|
||
|
||
<title>Common Discrepancy Reports</title>
|
||
|
||
<!-- PageFixtures headcontent begin -->
|
||
|
||
<meta name="cms-local-nav-url" content="https://cms.ncbi.nlm.nih.gov//genbank/_nav" />
|
||
|
||
<!-- PageFixtures headcontent end -->
|
||
|
||
<!-- AppResources external_resources begin -->
|
||
<script type="text/javascript" src="/core/jig/1.15.6/js/jig.min.js"></script>
|
||
|
||
<!-- AppResources external_resources end -->
|
||
|
||
<!-- Page headcontent begin -->
|
||
<meta name="subsite" content="genbank" />
|
||
<meta name="path" content="genbank/new_asndisc_examples" />
|
||
<meta name="modified" content="2024-06-13T12:04:55Z" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="cms-edit-aux-url" content="http://cms.ncbi.nlm.nih.gov/node//edit" />
|
||
<!-- Page headcontent end -->
|
||
<!-- PageFixtures resources begin -->
|
||
<link xmlns="http://www.w3.org/1999/xhtml" type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218191/css/4207974/4206132.css" xml:base="http://127.0.0.1/sites/static/header_footer" />
|
||
|
||
<!-- PageFixtures resources end -->
|
||
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8DE79A7C81E7C100000000007B005C.m_6" />
|
||
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/css/4121862/3974050/3917732/251717/4108189/14534/45193/3534283/4128070/3407145/4005757/4062871.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/css/3529741/3529739.css" media="print" /></head>
|
||
<body class=" col2 custom-page">
|
||
<div class="grid">
|
||
<div class="col twelve_col nomargin shadow">
|
||
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
|
||
<div class="sysmessages">
|
||
<noscript>
|
||
<p class="nojs">
|
||
<strong>Warning:</strong>
|
||
The NCBI web site requires JavaScript to function.
|
||
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
|
||
</p>
|
||
</noscript>
|
||
</div>
|
||
<!--/.sysmessage-->
|
||
<div class="wrap">
|
||
<div class="page">
|
||
<div xmlns:xi="http://www.w3.org/2001/XInclude">
|
||
<div xmlns="http://www.w3.org/1999/xhtml" id="universal_header" xml:base="http://127.0.0.1/sites/static/header_footer">
|
||
<section class="usa-banner">
|
||
<div class="usa-accordion">
|
||
<header class="usa-banner-header">
|
||
<div class="usa-grid usa-banner-inner">
|
||
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
|
||
<p>An official website of the United States government</p>
|
||
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
|
||
<span class="usa-banner-button-text">Here's how you know</span>
|
||
</button>
|
||
</div>
|
||
</header>
|
||
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
|
||
<div class="usa-banner-guidance-gov usa-width-one-half">
|
||
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
|
||
<div class="usa-media_block-body">
|
||
<p>
|
||
<strong>The .gov means it's official.</strong>
|
||
<br />
|
||
Federal government websites often end in .gov or .mil. Before
|
||
sharing sensitive information, make sure you're on a federal
|
||
government site.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
<div class="usa-banner-guidance-ssl usa-width-one-half">
|
||
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
|
||
<div class="usa-media_block-body">
|
||
<p>
|
||
<strong>The site is secure.</strong>
|
||
<br />
|
||
The <strong>https://</strong> ensures that you are connecting to the
|
||
official website and that any information you provide is encrypted
|
||
and transmitted securely.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<div class="usa-overlay"></div>
|
||
<header class="ncbi-header" role="banner" data-section="Header">
|
||
|
||
<div class="usa-grid">
|
||
<div class="usa-width-one-whole">
|
||
|
||
<div class="ncbi-header__logo">
|
||
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
|
||
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
|
||
</a>
|
||
</div>
|
||
|
||
<div class="ncbi-header__account">
|
||
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
|
||
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
|
||
<span class="fa fa-user" aria-hidden="true">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
|
||
<g style="fill: #fff">
|
||
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
|
||
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
|
||
</g>
|
||
</svg>
|
||
</span>
|
||
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
|
||
<span class="sr-only">Show account info</span>
|
||
</button>
|
||
</div>
|
||
|
||
<div class="ncbi-popup-anchor">
|
||
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
|
||
<div class="ncbi-popup-head">
|
||
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
|
||
<span class="fa fa-times">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
|
||
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
|
||
</svg>
|
||
</span>
|
||
<span class="usa-sr-only">Close</span></button>
|
||
<h4>Account</h4>
|
||
</div>
|
||
<div class="account-user-info">
|
||
Logged in as:<br />
|
||
<b><span class="username" id="uname_long">username</span></b>
|
||
</div>
|
||
<div class="account-links">
|
||
<ul class="usa-unstyled-list">
|
||
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
|
||
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
|
||
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
|
||
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</header>
|
||
<div role="navigation" aria-label="access keys">
|
||
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
|
||
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
|
||
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
|
||
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
|
||
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
|
||
</div>
|
||
<section data-section="Alerts">
|
||
<div class="ncbi-alerts-placeholder"></div>
|
||
</section>
|
||
</div>
|
||
</div>
|
||
<!--/.header-->
|
||
<div class="header">
|
||
<div class="res_logo"><h1 class="res_name"><a href="/genbank/" title="GenBank home">GenBank</a></h1><h2 class="res_tagline">Public nucleic acid sequence repository</h2></div>
|
||
<div class="search"><form method="get" action="/nuccore/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="nuccore" selected="selected">Nucleotide</option><option value="gquery">All Databases</option><option value="books">Books</option><option value="sra" class="last">SRA</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Nucleotide" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form></div>
|
||
|
||
</div>
|
||
<div class="nav_and_browser">
|
||
<div class="localnav"><ul class="jig-ncbilocalnav">
|
||
<li><a href="#">GenBank</a><ul>
|
||
<li><a href="/genbank/">About GenBank</a></li>
|
||
<li><a href="/genbank/submit_types">Submission Types</a></li>
|
||
<li><a href="/genbank/submit">Submission Tools</a></li>
|
||
<li><a href="/genbank/update">Update GenBank Records</a></li>
|
||
<li><a href="/nuccore/">Search</a></li>
|
||
<li><a href="/BLAST/Blast.cgi?CMD=Web&PAGETYPE=BLASTHome">BLAST</a></li>
|
||
<li><a href="/genbank/statistics">Statistics</a></li>
|
||
<li><a href="/genbank/samplerecord/">Sample Record</a></li>
|
||
<li><a href="/genbank/sequencerevisionhistory/">Revision History</a></li>
|
||
<li><a href="/genbank/sequenceids/">Sequence IDs</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Submit</a><ul>
|
||
<li><a href="/genbank/submit">Submission Tools</a></li>
|
||
<li><a href="/genbank/submit_types">Submission Types</a></li>
|
||
<li><a href="/WebSub/?tool=genbank">BankIt</a></li>
|
||
<li><a href="/genbank/table2asn">table2asn</a></li>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/sra/docs/sequence-data-processing">Sequence Data Processing</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Genomes</a><ul>
|
||
<li><a href="/genbank/genomesubmit">Complete Genome Submission Guide</a></li>
|
||
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Genome Annotation Guide</a></li>
|
||
<li><a href="/genbank/eukaryotic_genome_submission_annotation">Eukaryotic Genome Annotation Guide</a></li>
|
||
<li><a href="/genbank/examples.wgs">Annotation Examples</a></li>
|
||
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/wgs/">Genome Submission Portal</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a title="Whole Genome Shotgun sequences and submissions" href="#">WGS</a><ul>
|
||
<li><a href="/genbank/wgs">About WGS</a></li>
|
||
<li><a href="/Traces/wgs">WGS Project List</a></li>
|
||
<li><a href="/genbank/wgs.submit">WGS Submission Guide</a></li>
|
||
<li><a href="/genbank/wgsfaq/">FAQ</a></li>
|
||
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/wgs/">Genome Submission Portal</a></li>
|
||
<li><a href="/genbank/eukaryotic_genome_submission_annotation">Eukaryotic Annotation Guide</a></li>
|
||
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Annotation Guide</a></li>
|
||
<li><a href="/genbank/asndisc">Discrepancy Report</a></li>
|
||
<li><a href="/assembly/agp/AGP_Specification/">AGP format</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Metagenomes</a><ul>
|
||
<li><a href="/genbank/metagenome">About Metagenomes</a></li>
|
||
<li><a href="/genbank/structuredcomment">Structured Comment</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">TPA</a><ul>
|
||
<li><a href="/genbank/TPA">About TPA</a></li>
|
||
<li><a href="/genbank/tpafaq">FAQ</a></li>
|
||
<li><a href="/genbank/TPA-Exp">TPA-Exp</a></li>
|
||
<li><a href="/genbank/TPA-Inf">TPA-Inf</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">TSA</a><ul>
|
||
<li><a href="/genbank/TSA">About TSA</a></li>
|
||
<li><a href="/genbank/TSAguide">TSA Submission Guide</a></li>
|
||
<li><a href="/genbank/TSAfaq">FAQ</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">INSDC</a><ul>
|
||
<li><a href="/genbank/collab">About INSDC</a></li>
|
||
<li><a href="/genbank/collab/country">Geographic Location Name List</a></li>
|
||
<li><a href="/genbank/collab/db_xref">db_xref List</a></li>
|
||
<li><a href="http://www.insdc.org/documents/feature_table.html">Feature Table</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Documentation</a><ul>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/sra/docs/sequence-data-processing/">Sequence Data Processing</a></li>
|
||
<li><a href="/genbank/submission_brokers">Submission Brokers</a></li>
|
||
<li><a href="/genbank/acc_prefix">Accession Number Prefixes</a></li>
|
||
<li><a href="/genbank/organelle_submit/">Organelle Submission Guide</a></li>
|
||
<li><a href="/genbank/monkeypox_submission/">Monkeypox Submission Guide</a></li>
|
||
<li><a href="/genbank/validation/">Common Submission Errors</a> </li>
|
||
<li><a href="/genbank/sequencecheck/">Ribosomal Submission Errors</a></li>
|
||
<li><a href="/genbank/sequencecheck/virus">Common Sequence Errors</a></li>
|
||
<li><a href="https://support.nlm.nih.gov/knowledgebase/category/?id=CAT-01240">Submission FAQs</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Other</a><ul>
|
||
<li><a href="/genbank/htgs">About HTGs</a></li>
|
||
<li><a href="/genbank/dbest">About EST</a></li>
|
||
<li><a href="/genbank/dbgss">About GSS</a></li>
|
||
<li><a href="/genbank/tls">About TLS</a></li>
|
||
<li><a href="/genbank/tlsguide">Submit TLS</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul></div>
|
||
</div>
|
||
|
||
<!-- was itemctrl -->
|
||
<div class="container">
|
||
<div id="maincontent" class="content col twelve_col last">
|
||
<div class="col1">
|
||
<h1 id="common-discrepancy-reports">Common Discrepancy Reports</h1>
|
||
|
||
|
||
<h3 id="introduction">Introduction</h3>
|
||
|
||
|
||
<p>The Discrepancy Report is an evaluation of a single or multiple ASN.1
|
||
files, looking for suspicious annotation or annotation discrepancies
|
||
that NCBI staff has noticed commonly occur in genome submissions, both
|
||
complete and incomplete (WGS). A few of the problems that this
|
||
function was written to find include inconsistent locus_tag prefixes,
|
||
missing gene features, and suspect product names. </p>
|
||
|
||
|
||
<p>This page shows common reports generated by table2asn or the newest version of the command-line program asndisc. See <a href="/genbank/asndisc">more information about the Discrepancy Report</a> and those tools.</p>
|
||
|
||
|
||
<p>If you have questions about the Discrepancy Report, please contact us by email at <a href="mailto:genomes@ncbi.nlm.nih.gov">genomes@ncbi.nlm.nih.gov</a> prior to sending us your submission.</p>
|
||
|
||
|
||
<h2 id="common-reports">Common Reports</h2>
|
||
|
||
|
||
<h3 id="10_percentn">10_PERCENTN</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : The sequence specified has >10% N’s.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : If your sequence has a lot of gaps, it would be expected to give this warning. The warning is more to notify the user that there is a low-quality sequence that should be checked. Gap features may be needed if not already present.</p>
|
||
|
||
|
||
<p>Examples:</p>
|
||
|
||
|
||
<pre><code> 10_PERCENTN: 1 sequence has > 10% Ns
|
||
|
||
Contig9.33 (length 4226, 715 other)
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="all_seqs_circular">ALL_SEQS_CIRCULAR</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : FATAL error for when the technique is WGS, location is genomic, and all sequences are circular. </p>
|
||
|
||
|
||
<p><em>Suggestion</em> : If these are contigs of a draft genome, please remove the circular indication. If the sequences are the chromosome and plasmids of a genome where there is one sequence per chromosome, please change the locations to match the sequence.</p>
|
||
|
||
|
||
<p>Examples: </p>
|
||
|
||
|
||
<pre><code>ALL_SEQS_CIRCULAR: FATAL! ALL (10) sequences are circular
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="all_seqs_shorter_than_20kb">ALL_SEQS_SHORTER_THAN_20kb</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : The set of sequences in the genome are all <20 kb, which is unexpected for a genome assembly. It may indicate that some sequences are missing, or that this is a transcriptome assembly or some other sort of submission rather than a genome assembly. </p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check for missing sequences. If you are updating a genome by adding plasmids, please include the chromosome too as the submission portal will expect a chromosome. If this is a transcriptome assembly, then submit it to the <a href="https://submit.ncbi.nlm.nih.gov/subs/tsa/">TSA submission portal</a>.</p>
|
||
|
||
|
||
<p>Examples:</p>
|
||
|
||
|
||
<pre><code>ALL_SEQS_SHORTER_THAN_20kb: No sequences longer than 20,000 nt found.
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="bacterial_joined_features_no_exc">BACTERIAL_JOINED_FEATURES_NO_EXCEPTION</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : Bacteria do not have exons and introns in general so there should not be joined features that do not have exceptions Coding regions that are translated using ribosomal slippage should have the exception ‘ribosomal slippage’. One example where this message can be ignored are features that cross the sequence origin, where a join is necessary.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Ignore if these cross the sequence origin. Add an exception if these are translated by ribosomal slippage. Do not use for any other cases (like annotation across a stop codon generated by sequence error)</p>
|
||
|
||
|
||
<p>Examples: </p>
|
||
|
||
|
||
<p>This is an example of one that is ok.</p>
|
||
|
||
|
||
<pre><code>BACTERIAL_JOINED_FEATURES_NO_EXCEPTIONS: 2 coding regions with joined locations have no exceptions
|
||
|
||
2 coding regions over the origin of circular DNA
|
||
</code></pre>
|
||
|
||
|
||
<p>This is an example that is not correct.</p>
|
||
|
||
|
||
<pre><code>BACTERIAL_JOINED_FEATURES_NO_EXCEPTIONS: FATAL! 1 coding regions with joined locations have no exceptions
|
||
|
||
FATAL! 1 coding region not over the origin of circular DNA
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="bacterial_partial_nonextendable_">BACTERIAL_PARTIAL_NONEXTENDABLE_EXCEPTION</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : All bacterial features that are internal to a sequence must be complete unless directly abutting a gap. If the feature is close to the end of a contig, the feature should be extended to the end. The software can detect when the feature can be extended a few bases to the end of the sequence.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : If an internally partial coding region lacks a valid start or stop, please make it a nonfunctional gene without a translation. If features are close to the end of a partial sequence or contig but too far away from the end for the software to fix, please extend to the end of the sequence. If internal and there are gaps, change span to go so it abuts the gap, even if not the end of a codon.</p>
|
||
|
||
|
||
<p>Examples:</p>
|
||
|
||
|
||
<pre><code>BACTERIAL_PARTIAL_NONEXTENDABLE_EXCEPTION_PROBLEMS: FATAL! 1 feature has partial ends that do not abut
|
||
the end of the sequence or a gap and cannot be extend by 3 or fewer nucleotides to do so.
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="bacteria_should_not_have_mrna">BACTERIA_SHOULD_NOT_HAVE_MRNA</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : Since bacteria have many polycistronic mRNAs and we annotate each gene and coding region separately, individual mRNAs for each gene is incorrect. </p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Remove the incorrect mRNA features. Rarely, the mRNAs are annotated for the complete polycistronic transcript</p>
|
||
|
||
|
||
<p>Examples: </p>
|
||
|
||
|
||
<pre><code>BACTERIA_SHOULD_NOT_HAVE_MRNA: 1 bacterial sequence has mRNA features
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="bad_gene_name">BAD_GENE_NAME</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : A gene symbol contains suspect phrases or characters. The gene symbol is longer than expected or has unusual characters.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check the gene symbols. Do not use protein names for gene symbols. If in doubt, remove the gene symbol.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>BAD_GENE_NAME: 1 gene contains suspect phrase or characters
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="bad_bacterial_gene_name">BAD_BACTERIAL_GENE_NAME</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : There are gene symbols that do not meet the correct format for bacterial genes</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Correct the gene symbols if possible (generally three lower case letters followed by capital letters as necessary)</p>
|
||
|
||
|
||
<p>Examples:</p>
|
||
|
||
|
||
<pre><code>BAD_BACTERIAL_GENE_NAME: 1 bacterial gene does not start with a lowercase letter
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="check_auth_name">CHECK_AUTH_NAME</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : One or more authors are missing their first or last name.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Correct the author names so that the family name is last, and the given name is first.</p>
|
||
|
||
|
||
<p>Examples:</p>
|
||
|
||
|
||
<pre><code>CHECK_AUTH_NAME: 2 pubs missing author’s first or last name
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="dup_genes_opposite_strands">DUP_GENES_OPPOSITE_STRANDS</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : There is a pair of genes with the same span but on different strands</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Remove one of the genes as this is an annotation error.</p>
|
||
|
||
|
||
<p>Examples:</p>
|
||
|
||
|
||
<pre><code>DUP_GENES_OPPOSITE_STRANDS: 2 genes match other genes in the same location, but on the opposite strand
|
||
|
||
gene 93158..93895
|
||
/gene="abcD"
|
||
gene complement(93158..93895)
|
||
/gene="cmk"
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="eukaryote_should_have_mrna">EUKARYOTE_SHOULD_HAVE_MRNA</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : All eukaryotic CDS features should be accompanied by mRNA features. This genome is lacking all mRNA features.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Add appropriate mRNA features for all the CDS features. Note that you should have transcript_IDs and protein_IDs on both the mRNA and the accompanying CDS feature. See <a href="https://www.ncbi.nlm.nih.gov/genbank/eukaryotic_genome_submission_annotation/#protein_id">https://www.ncbi.nlm.nih.gov/genbank/eukaryotic_genome_submission_annotation/#protein_id</a> for information about .tbl files, and the <a href="/genbank/genomes_gff/#formatting">mRNA and CDS features require transcript_id and protein_id qualifiers</a>.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>EUKARYOTE_SHOULD_HAVE_MRNA: FATAL! No mRNA present
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="exon_intron_conflict">EXON_INTRON_CONFLICT</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : The spans of the exons and adjacent introns do not directly abut one another.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : The exon and intron spans must be edited to be directly adjacent to one another. However, rethink the use of these features. Introns and exons can be implied by the CDS and/or mRNA spans on a record, making them redundant and sources of inconsistencies. These features could be removed unless necessary for clarification.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>EXON_INTRON_CONFLICT.asn:exon 1 lcl|ex1:1-10
|
||
|
||
EXON_INTRON_CONFLICT.asn:intron [intron] lcl|ex1:12-20
|
||
|
||
EXON_INTRON_CONFLICT.asn:exon 2 lcl|ex1:22-40
|
||
|
||
EXON_INTRON_CONFLICT.asn:exon 1 lcl|ex2:1-15
|
||
|
||
EXON_INTRON_CONFLICT.asn:intron [intron] lcl|ex2:10-25
|
||
|
||
EXON_INTRON_CONFLICT.asn:exon 2 lcl|ex2:20-30
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="find_badlen_trnas">FIND_BADLEN_TRNAS</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : tRNA is longer than expected. This is usually an annotation error. If the genome is archaeal, it is likely there is an unannotated intron.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check for annotation errors. Annotate with a joined span if archaeal intron can be identified.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>FIND_BADLEN_TRNAS: 1 tRNA is too long – over 150 nucleotides
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="gaps">GAPS</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : One of the sequences in the genome contains gaps. </p>
|
||
|
||
|
||
<p><em>Suggestion</em> s: Can be ignored if this is expected, otherwise check your sequence and annotation</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>GAPS: 1 sequence contains gaps
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="gene_product_conflict">GENE_PRODUCT_CONFLICT</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : There are coding regions that have the same gene name as other coding regions, but the product name is different.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check the pairs to see if the gene symbols and/or products are correct. Since there is no unified system for gene symbol naming it is possible for the conflict is expected. The submitter must decide whether to ignore the warning or not.</p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>GENE_PRODUCT_CONFLICT: 2 coding regions have the same gene name as another coding region but a different product
|
||
|
||
2 coding regions have the same gene name (lptF) as another coding region but a different product
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="inconsistent_dblink">INCONSISTENT_DBLINK</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : All parts of a genome should have the same BioProject and BioSample pair. This test will tell you when that is not true.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check to see which pieces have the incorrect BioProject or BioSample</p>
|
||
|
||
|
||
<p>Example: mismatch of the BioSamples</p>
|
||
|
||
|
||
<h3 id="inconsistent_dblink-dblink-repor">INCONSISTENT_DBLINK: DBLink Report (all present, inconsistent)</h3>
|
||
|
||
|
||
<pre><code>BioSample (all present, inconsistent)
|
||
2 DBLink objects have field BioSample value ‘SAMN01’
|
||
2 DBLink objects have field BioSample value ‘SAMN02’
|
||
BioProject (all present, all same)
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="inconsistent_structured_comments">INCONSISTENT_STRUCTURED_COMMENTS</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : All parts of a genome should have the same assembly structure comment information. One exception is plasmids of a complete genome can have a different coverage value.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Most of the time this indicates a problem and the wrong pieces should be identified and corrected.</p>
|
||
|
||
|
||
<p>Example: Mismatch of the Assembly Method of the genome assembly structured comment</p>
|
||
|
||
|
||
<pre><code>INCONSISTENT_STRUCTURED_COMMENTS: Stuctured Comment Report (all present, inconsistent)
|
||
Structured comment field Assembly Method
|
||
(all present, inconsistent)
|
||
Sturctured comment field Expected Final Version
|
||
(all present, all same)
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="long_no_annotation">LONG_NO_ANNOTATION</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : Test indicates that there is at least one sequence is greater than 5000 nt in length and there is no annotation.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : The test is informational only. If you did not intend to provide annotation, it can be ignored. However, this is valuable in cases where annotation was inadvertantly dropped from a sequence.</p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<p>1 bioseq is longer than 5000nt and has no features.</p>
|
||
|
||
|
||
<h3 id="low_quality_region">LOW_QUALITY_REGION</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : The sequence contains a region where there are a large number of nucleotides that are not A,C,G,T. This can be correct if it is expected and not N. If there are runs of N’s, gap features should be added and there will be other errors in the discrepancy report for the N’s.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check to see if these are non A,C,G,T,N IUPAC bases. If the bases in the region are N’s, add gaps.</p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>LOW_QUALITY_REGION: 1 sequence contains low quality region
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="misc_feature_with_product_qual">MISC_FEATURE_WITH_PRODUCT_QUAL</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : The record has misc_feature features that have a product qualifier. /product is only permitted on CDS and RNA features since those are the features where something is made not simply described.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check these to see if misc_feature is appropriate. If so move the information in /product to /note. If not use the appropriate CDS or RNA feature.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>MISC_FEATURE_WITH_PRODUCT_QUAL: 15 features have a product qualifier
|
||
|
||
misc_feature 7760..7894
|
||
/product="Truncated periplasmic divalent cation tolerance protein"
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="mrna_should_have_protein_transcr">MRNA_SHOULD_HAVE_PROTEIN_TRANSCRIPT_IDS</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : Eukaryotic mRNAs and the corresponding CDS features should have matching transcript_IDs and protein_IDs so that the pairing of each mRNA and CDS is exact. In this case there are no protein_ids or transcript_ids on the mRNA features. The same error message will be given if only one of the two IDs are missing (eg. No transcript_IDs)</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Add the protein_ids and transcript_ids to the mRNA features. See <a href="https://www.ncbi.nlm.nih.gov/genbank/eukaryotic_genome_submission_annotation/#protein_id">https://www.ncbi.nlm.nih.gov/genbank/eukaryotic_genome_submission_annotation/#protein_id</a></p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>MRNA_SHOULD_HAVE_PROTEIN_TRANSCRIPT_IDS: no protein_id and transcript_id present
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="multiple_cds_on_mrna">MULTIPLE_CDS_ON_MRNA</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : All CDS features on eukaryotic genomes must have their own mRNA, even if the mRNA has an identical span (in the case of alternate start sites)</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : If the multiple CDS features are correct for the gene, then add mRNA along with transcript_ids to the second mRNA/CDS pair.
|
||
Remove any incorrect features. If the genome has reciprocal correctly added transcript_ids and protein_ids, this should not be a problem.</p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>MULTIPLE_CDS_ON_MRNA.asn:ex2 (length 247)
|
||
|
||
MULTIPLE_CDS_ON_MRNA.asn:ex3 (length 247)
|
||
|
||
MULTIPLE_CDS_ON_MRNA.asn:ex5 (length 247)
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="no_locus_tags">NO_LOCUS_TAGS</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : All CDS and RNA features of a genome must have locus_tag qualifier. This error indicates that none are present.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Add locus_tag qualifiers (and genes, if necessary) to all CDS and RNA features.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>NO_LOCUS_TAGS: FATAL! None of the 1871 genes has locus tag
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="protein_names">PROTEIN_NAMES</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : Message indicates that all proteins have the same name. The name that the proteins have will be given in the message.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : For eukaryotic genomes, this could be okay as many of the genomes are not well characterized. Since there is a lot more data for bacteria, we do expect that most bacterial proteins will have a functional name.</p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>PROTEIN_NAMES: All proteins have the same name ‘hypothetical protein’
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="required_strain">REQUIRED_STRAIN</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : Genomes of certain classes of organisms (bacteria, fungi) require strain qualifiers. Endosymbionts and metagenomic assemblies should have isolate instead.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Add a strain name for those organisms that require them. For those that require strain, isolate is not appropriate.</p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>REQUIRED_STRAIN: 7 biosources are missing required strain value
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="rrna_name_conflicts">RRNA_NAME_CONFLICTS</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : rRNA product names should be the standard names for the molecule.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check for the correct rRNA naming policy for your organism, eg 16S ribosomal RNA. The list of expected names is:</p>
|
||
|
||
|
||
<ul>
|
||
<li>4.5S ribosomal RNA</li>
|
||
<li>5S ribosomal RNA</li>
|
||
<li>5.8S ribosomal RNA</li>
|
||
<li>12S ribosomal RNA</li>
|
||
<li>15S ribosomal RNA</li>
|
||
<li>16S ribosomal RNA</li>
|
||
<li>18S ribosomal RNA</li>
|
||
<li>21S ribosomal RNA</li>
|
||
<li>23S ribosomal RNA</li>
|
||
<li>25S ribosomal RNA</li>
|
||
<li>26S ribosomal RNA</li>
|
||
<li>28S ribosomal RNA</li>
|
||
<li>large subunit ribosomal RNA</li>
|
||
<li>small subunit ribosomal RNA</li>
|
||
</ul>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>FATAL! 4 rRNA product names are not standard. Correct the names to the standard format, eg 16S ribosomal RNA.
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="seq_shorter_than_200bp">SEQ_SHORTER_THAN_200bp</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : We expect that contigs of draft genomes be at least 200 bp in length. If you think that you need to keep a short contig, please contact genome staff to explain the situation.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Remove those sequences that are less than 200 nt in length.</p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>SEQ_SHORTER_THAN_200bp: 2 contigs are shorter than 200 nt
|
||
Contig69641.1 (length 100)
|
||
Contig72501.1 (length 187)
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="seq_shorter_than_50bp">SEQ_SHORTER_THAN_50bp</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : There are sequences that are smaller than 50 nt in length. These should be removed from your draft genome. These sequences will also get the SEQ_SHORTER_THAN_200bp warning.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Remove these sequences from your draft genome.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>SEQ_SHORTER_THAN_50bp: 3 sequences are shorter than 50 nt
|
||
Contig9.22 (length 46)
|
||
Contig9.23 (length 12)
|
||
Contig9.24 (length 6)
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="short_lncrna">SHORT_LNCRNA</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : We expect that these features are >200 bases in length. lncRNAs are long non-coding RNA; such molecules are generally defined as having a length greater than 200bp and do not fit into any other ncRNA class.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Check to see if the ncRNA should be defined by a different class. See the following list of classes:
|
||
http://www.insdc.org/rna_vocab.html</p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>SHORT_LNCRNA: 1 lncRNA feature is suspiciously short
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="short_rrna">SHORT_RRNA</h3>
|
||
|
||
|
||
<p><em>Explanation</em> The rRNA is not partial at either end and is shorter than expected. This could be because the location is not correct or this is just a bit of rRNA-like sequence.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : There are several possibilities, depending upon what the problem is. Adjust the rRNA to the right location, or make the end partial if it is at the end of a sequence or abuts a gap, or mark the gene as "pseudo", as appropriate. These are the current conditions that will trigger this test:</p>
|
||
|
||
|
||
<ul>
|
||
<li>18S, 26S, 25S, 16S, small subunit and large subunit rRNAs that are less than 1000 nt and not partial at either end.</li>
|
||
<li>23S rRNA that is less than 2000bp and not partial at either end.</li>
|
||
<li>28S rRNA that is less than 3300bp and not partial at either end.</li>
|
||
<li>5.8S rRNAs that is less than 130 nt and not partial at either end</li>
|
||
<li>5S rRNAs that is less than 90 nt and not partial at either end</li>
|
||
</ul>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>FATAL: SHORT_RRNA: 1 rRNA feature is too short
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="show_transl_except">SHOW_TRANSL_EXCEPT</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : There are coding regions in the genome that have a transl_except qualifier to make a valid translation. The common example is a selenocysteine-containing protein. </p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Correct if it is a valid transl_except and not one that is an attempt to annotate across a stop codon due to sequencing error.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>SHOW_TRANSL_EXCEPTL 3 coding regions have a translation exception
|
||
/transl_except=(pos:complement(333705..333707),aa:Sec)
|
||
/product="formate dehydrogenase-N subunit alpha"
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="source_quals">SOURCE_QUALS</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : Tests whether all the source qualifiers match on all sequences in the particular editing session. The source qualifiers should match for all pieces of the same genome Qualifiers such as chromosome and plasmid_name are ignored in this test. There will be a separate test for each qualifier. This is flagged FATAL when not all match.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Fix the piece of the genome that does not match.</p>
|
||
|
||
|
||
<p>Examples:</p>
|
||
|
||
|
||
<p>This is example text where all match ‘SOURCE_QUALS: collection-date (all present, all same)’</p>
|
||
|
||
|
||
<p>This is example text where there is a FATAL because not all match: </p>
|
||
|
||
|
||
<pre><code>SOURCE_QUALS: FATAL! Strain (all present, some duplicates)
|
||
3 sources have strain = A
|
||
3 sources have strain = B
|
||
2 sources have strain = C
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="title_ends_with_sequence">TITLE_ENDS_WITH_SEQUENCE</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : There are apparent nucleotide sequence characters in the definition line of the record.</p>
|
||
|
||
|
||
<p><em>Suggestion</em> : This happens because there is no carriage return after the seqID of the sequence. Go back to the fasta file and make sure the sequence begins on the second line.</p>
|
||
|
||
|
||
<p>Example:</p>
|
||
|
||
|
||
<pre><code>TITLE_ENDS WITH_SEQUENCE:
|
||
2 deflines appear to end with sequence characters
|
||
|
||
DEFINITION 7_quiver GTCTTGTAGTTGATGGCCATATTTACCTGCATAGACTTGATTGACTT
|
||
TTTTAGGCACACCTTTGATATAG.
|
||
|
||
DEFINITION TCTTGTAGTTGATGGCCATATTTACCTGCATAGACTTGATTGACTTTTTTAGGCACACC
|
||
TTTGATATAG.
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="unpub_pub_without_title">UNPUB_PUB_WITHOUT_TITLE</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : Test is FATAL where there is an unpublished pub and no title. </p>
|
||
|
||
|
||
<p><em>Suggestion</em>: Add the title of your prospective publication. It does not have to be the final title. </p>
|
||
|
||
|
||
<p>Example: This is the text you will see when there is no title</p>
|
||
|
||
|
||
<pre><code>UNPUB_PUB_WITHOUT_TITLE: FATAL! Unpublished pubs have no title
|
||
</code></pre>
|
||
|
||
|
||
<h3 id="unusual_nt">UNUSUAL_NT</h3>
|
||
|
||
|
||
<p><em>Explanation</em> : A base other than the most common IUPAC bases (A,C,G,T,N) is present in the sequence. </p>
|
||
|
||
|
||
<p><em>Suggestion</em> : Can be correct if any of the ambiguous bases other than N are used. Can be wrong if text words were included at the beginning of your FASTA sequence. Check your sequence to see if it is correct. </p>
|
||
|
||
|
||
<p>Example: </p>
|
||
|
||
|
||
<pre><code>UNUSUAL_NT: 1 sequence contains nucleotides that are not ATCG or N
|
||
</code></pre>
|
||
|
||
|
||
</div>
|
||
<!--/.col1-->
|
||
<div class="col2">
|
||
<div class="rightnav">
|
||
<h2 id="genome-resources">Genome Resources</h2>
|
||
<ul>
|
||
<li><a href="/genbank/wgs/">About WGS</a></li>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/Traces/wgs/?view=wgs">WGS Browser</a></li>
|
||
<li><a href="/genbank/genomesubmit/">Genome Submission Guide</a></li>
|
||
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/genome/">Genome Submission Portal</a></li>
|
||
<li><a href="/genbank/wgs_update/">Update Genome Records</a></li>
|
||
<li><a href="/genbank/wgsfaq/">FAQ</a></li>
|
||
<li><a href="/genbank/table2asn">table2asn</a></li>
|
||
<li><a href="/genbank/diploid_haps">Submitting Multiple Haplotype Assemblies</a></li>
|
||
<li><a href="/WebSub/template.cgi/">Create Submission Template</a></li>
|
||
<li><a href="/genbank/eukaryotic_genome_submission/">Eukaryotic Annotation Guide</a></li>
|
||
<li><a href="/genbank/genomesubmit_annotation/">Prokaryotic Annotation Guide</a></li>
|
||
<li><a href="/genbank/examples.wgs/">Annotation Example Files</a></li>
|
||
<li><a href="/genbank/genomes_gff">Annotating Genomes with GFF3 or GTF files</a></li>
|
||
<li><a href="/genbank/genome_validation">Validation Error Explanations for Genomes</a></li>
|
||
<li><a href="/genbank/asndisc/">Discrepancy Report</a></li>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/genome/annotation_prok/">NCBI Prokaryotic Genome Annotation Pipeline</a></li>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/">AGP Format</a></li>
|
||
<li><a href="/genbank/metagenome/">Metagenome Submission Guide</a></li>
|
||
<li><a href="/genbank/structuredcomment/">Structured Comment</a></li>
|
||
<li><a href="/bioproject/">BioProject</a></li>
|
||
<li><a href="/biosample/">BioSample</a></li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
<!--/.col2-->
|
||
<div class="col3">
|
||
|
||
</div>
|
||
<!--/.col3-->
|
||
<div class="col4">
|
||
|
||
</div>
|
||
<!--/.col4-->
|
||
<div class="col5">
|
||
|
||
</div>
|
||
<div class="col6">
|
||
|
||
</div>
|
||
<div class="col7">
|
||
|
||
</div>
|
||
<div class="col8">
|
||
|
||
</div>
|
||
<div class="col9">
|
||
|
||
</div>
|
||
</div><!--/.content-->
|
||
</div><!--/.container-->
|
||
<div id="NCBIFooter_dynamic">
|
||
<div class="breadcrumbs">You are here:
|
||
<span id="breadcrumb_text"><a href="/guide/">NCBI</a></span></div>
|
||
<a id="help-desk-link" class="help_desk" href="https://support.ncbi.nlm.nih.gov/ics/support/default.asp?Time=2025-03-05T05:21:40-05:00&Snapshot=%2Fprojects%2Fstaticsites%2Fgenbank%2Fgenbank@2.21&Host=portal106&ncbi_phid=CE8DE79A7C81E7C100000000007B005C&ncbi_session=CE8B5AF87C7FFCB1_0191SID&from=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fgenbank%2Fnew_asndisc_examples%2F&Ncbi_App=genbank&Page=custom-page&style=classic&deptID=28049" target="_blank">Support Center</a>
|
||
<noscript><img alt="" src="/stat?jsdisabled=true&ncbi_app=genbank&ncbi_db=&ncbi_pdid=custom-page&ncbi_phid=CE8DE79A7C81E7C100000000007B005C" /></noscript>
|
||
</div>
|
||
|
||
|
||
<div xmlns:xi="http://www.w3.org/2001/XInclude">
|
||
<div xmlns="http://www.w3.org/1999/xhtml" class="footer" id="footer" xml:base="http://127.0.0.1/sites/static/header_footer">
|
||
<section class="icon-section">
|
||
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
|
||
<div class="grid-container container">
|
||
<div class="icon-section_container">
|
||
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
|
||
<title>Twitter</title>
|
||
<g id="twitterx1008">
|
||
<path id="path1008" d="M6.06736 7L16.8778 20.8991L6.00001 32.2H10.2L18.6 23.1L25.668 32.2H34L22.8 17.5L31.9 7H28.4L20.7 15.4L14.401 7H6.06898H6.06736ZM9.66753 8.73423H12.9327L29.7327 30.4658H26.5697L9.66753 8.73423Z" fill="#5B616B"></path>
|
||
</g>
|
||
</svg>
|
||
</a>
|
||
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<title>Facebook</title>
|
||
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<title>LinkedIn</title>
|
||
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<defs>
|
||
<style>
|
||
.cls-11,
|
||
.cls-12 {
|
||
fill: #737373;
|
||
}
|
||
|
||
.cls-11 {
|
||
fill-rule: evenodd;
|
||
}
|
||
</style>
|
||
</defs>
|
||
<title>GitHub</title>
|
||
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
|
||
</path>
|
||
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
|
||
</path>
|
||
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
|
||
</path>
|
||
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
|
||
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
|
||
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
|
||
<defs><style>.cls-1{fill:#737373;}</style></defs>
|
||
<title>NCBI Insights Blog</title>
|
||
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
|
||
</svg>
|
||
</a>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section class="container-fluid bg-primary">
|
||
<div class="container pt-5">
|
||
<div class="row mt-3">
|
||
<div class="col-lg-3 col-12">
|
||
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
|
||
<ul class="list-inline social_media">
|
||
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
|
||
<title>Twitter</title>
|
||
<g id="twitterx1009" clip-path="url(#clip0_65276_3946)">
|
||
<path id="Vector_Twitter" d="M17.5006 34.6565C26.9761 34.6565 34.6575 26.9751 34.6575 17.4996C34.6575 8.02416 26.9761 0.342773 17.5006 0.342773C8.02514 0.342773 0.34375 8.02416 0.34375 17.4996C0.34375 26.9751 8.02514 34.6565 17.5006 34.6565Z" fill="#205493" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
|
||
<path id="path1009" d="M8.54811 8.5L16.2698 18.4279L8.50001 26.5H11.5L17.5 20L22.5486 26.5H28.5L20.5 16L27 8.5H24.5L19 14.5L14.5007 8.5H8.54927H8.54811ZM11.1197 9.73873H13.4519L25.4519 25.2613H23.1926L11.1197 9.73873Z" fill="white"></path>
|
||
</g>
|
||
<defs>
|
||
<clipPath id="clip0_65276_3946">
|
||
<rect width="35" height="35" fill="white"></rect>
|
||
</clipPath>
|
||
</defs>
|
||
</svg>
|
||
</a></li>
|
||
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
|
||
<title>Facebook</title>
|
||
<g id="Facebook" clip-path="url(#clip0_1717_1086)">
|
||
<path id="Vector_Facebook" d="M15.1147 29.1371C15.1147 29.0822 15.1147 29.0296 15.1147 28.9747V18.9414H11.8183C11.6719 18.9414 11.6719 18.9414 11.6719 18.8018C11.6719 17.5642 11.6719 16.3289 11.6719 15.0937C11.6719 14.9793 11.7062 14.9518 11.816 14.9518C12.8683 14.9518 13.9206 14.9518 14.9751 14.9518H15.1215V14.8329C15.1215 13.8057 15.1215 12.774 15.1215 11.7492C15.1274 10.9262 15.3148 10.1146 15.6706 9.37241C16.1301 8.38271 16.9475 7.60378 17.9582 7.19235C18.6492 6.90525 19.3923 6.76428 20.1405 6.7783C21.0029 6.79202 21.8653 6.83091 22.7278 6.86065C22.8879 6.86065 23.048 6.89496 23.2082 6.90182C23.2974 6.90182 23.3271 6.94071 23.3271 7.02993C23.3271 7.54235 23.3271 8.05477 23.3271 8.5649C23.3271 9.16882 23.3271 9.77274 23.3271 10.3767C23.3271 10.4819 23.2974 10.5139 23.1921 10.5116C22.5379 10.5116 21.8814 10.5116 21.2271 10.5116C20.9287 10.5184 20.6316 10.5528 20.3395 10.6146C20.0822 10.6619 19.8463 10.7891 19.6653 10.9779C19.4842 11.1668 19.3672 11.4078 19.3307 11.6669C19.2857 11.893 19.2612 12.1226 19.2575 12.3531C19.2575 13.1904 19.2575 14.0299 19.2575 14.8695C19.2575 14.8946 19.2575 14.9198 19.2575 14.9564H23.0229C23.1807 14.9564 23.183 14.9564 23.1624 15.1074C23.0778 15.7662 22.9885 16.425 22.9039 17.0816C22.8322 17.6321 22.7636 18.1827 22.698 18.7332C22.6729 18.9437 22.6797 18.9437 22.4693 18.9437H19.2644V28.8992C19.2644 28.9793 19.2644 29.0593 19.2644 29.1394L15.1147 29.1371Z" fill="white"></path>
|
||
<path id="Vector_2_Facebook" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
|
||
</g>
|
||
<defs>
|
||
<clipPath id="clip0_1717_1086">
|
||
<rect width="35" height="35" fill="white"></rect>
|
||
</clipPath>
|
||
</defs>
|
||
</svg>
|
||
</a></li>
|
||
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
|
||
<title>Youtube</title>
|
||
<g id="YouTube" clip-path="url(#clip0_1717_1101)">
|
||
<path id="Vector_Youtube" d="M26.2571 11.4791C25.9025 11.1589 25.5709 10.9576 24.228 10.834C22.5512 10.6785 20.2797 10.6556 18.564 10.6533H16.4365C14.7208 10.6533 12.4493 10.6785 10.7725 10.834C9.43196 10.9576 9.09798 11.1589 8.7434 11.4791C7.81464 12.321 7.6202 14.6268 7.59961 16.8938C7.59961 17.3178 7.59961 17.741 7.59961 18.1635C7.62706 20.4121 7.82837 22.686 8.7434 23.521C9.09798 23.8412 9.42967 24.0425 10.7725 24.1661C12.4493 24.3216 14.7208 24.3445 16.4365 24.3468H18.564C20.2797 24.3468 22.5512 24.3216 24.228 24.1661C25.5686 24.0425 25.9025 23.8412 26.2571 23.521C27.1722 22.6929 27.3735 20.451 27.4009 18.2206C27.4009 17.7402 27.4009 17.2599 27.4009 16.7795C27.3735 14.5491 27.1699 12.3072 26.2571 11.4791ZM15.5604 20.5311V14.652L20.561 17.5001L15.5604 20.5311Z" fill="white"></path>
|
||
<path id="Vector_2_Youtube" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
|
||
</g>
|
||
<defs>
|
||
<clipPath id="clip0_1717_1101">
|
||
<rect width="35" height="35" fill="white"></rect>
|
||
</clipPath>
|
||
</defs>
|
||
</svg>
|
||
</a></li>
|
||
</ul>
|
||
</div>
|
||
<div class="col-lg-3 col-12">
|
||
<p class="address_footer text-white">National Library of Medicine<br />
|
||
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
|
||
Bethesda, MD 20894</a></p>
|
||
</div>
|
||
<div class="col-lg-3 col-12 centered-lg">
|
||
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
|
||
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
|
||
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
|
||
</div>
|
||
<div class="col-lg-3 col-12 centered-lg">
|
||
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
|
||
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
|
||
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
|
||
</div>
|
||
</div>
|
||
<div class="row">
|
||
<div class="col-lg-12 centered-lg">
|
||
<nav class="bottom-links">
|
||
<ul class="mt-3">
|
||
<li>
|
||
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.nih.gov/">NIH</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
|
||
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
|
||
</div>
|
||
</div>
|
||
<!--/.footer-->
|
||
<p class="last-updated small">Last updated: 2024-06-13T12:04:55Z</p>
|
||
</div>
|
||
<!--/.page-->
|
||
</div>
|
||
<!--/.wrap-->
|
||
<span class="PAFAppResources"></span>
|
||
|
||
|
||
</div><!-- /.twelve_col -->
|
||
</div>
|
||
<!-- /.grid -->
|
||
|
||
|
||
|
||
<!-- usually for JS scripts at page bottom -->
|
||
<span class="pagefixtures"></span>
|
||
|
||
|
||
<!-- CE8B5AF87C7FFCB1_0191SID /projects/staticsites/genbank/genbank@2.21 portal106 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
|
||
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
|
||
|
||
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/js/3879255/4121861/1490097/4087685.js" snapshot="genbank"></script></body>
|
||
</html>
|
||
|