1140 lines
77 KiB
HTML
1140 lines
77 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||
|
||
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||
<!-- AppResources meta begin -->
|
||
<meta name="paf-app-resources" content="" />
|
||
<!-- AppResources meta end -->
|
||
|
||
<!-- TemplateResources meta begin -->
|
||
<meta name="paf_template" content="StdNCol" />
|
||
|
||
<!-- TemplateResources meta end -->
|
||
|
||
<!-- Page meta begin -->
|
||
|
||
<!-- Page meta end -->
|
||
|
||
<!-- Logger begin -->
|
||
<meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="genbank" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="custom-page" />
|
||
<!-- Logger end -->
|
||
|
||
<title>Prokaryotic and Eukaryotic Genomes Submission Guide</title>
|
||
|
||
<!-- PageFixtures headcontent begin -->
|
||
|
||
<meta name="cms-local-nav-url" content="https://cms.ncbi.nlm.nih.gov//genbank/_nav" />
|
||
|
||
<!-- PageFixtures headcontent end -->
|
||
|
||
<!-- AppResources external_resources begin -->
|
||
<script type="text/javascript" src="/core/jig/1.15.6/js/jig.min.js"></script>
|
||
|
||
<!-- AppResources external_resources end -->
|
||
|
||
<!-- Page headcontent begin -->
|
||
<meta name="subsite" content="genbank" />
|
||
<meta name="path" content="genbank/genomesubmit" />
|
||
<meta name="modified" content="2024-03-22T14:10:13Z" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="cms-edit-aux-url" content="http://cms.ncbi.nlm.nih.gov/node//edit" />
|
||
<!-- Page headcontent end -->
|
||
<!-- PageFixtures resources begin -->
|
||
<link xmlns="http://www.w3.org/1999/xhtml" type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218191/css/4207974/4206132.css" xml:base="http://127.0.0.1/sites/static/header_footer" />
|
||
|
||
<!-- PageFixtures resources end -->
|
||
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8C99E57C812441000000000021001A.m_7" />
|
||
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/css/4121862/3974050/3917732/251717/4108189/14534/45193/3534283/4128070/3407145/4005757/4062871.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/css/3529741/3529739.css" media="print" /></head>
|
||
<body class=" col2 custom-page">
|
||
<div class="grid">
|
||
<div class="col twelve_col nomargin shadow">
|
||
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
|
||
<div class="sysmessages">
|
||
<noscript>
|
||
<p class="nojs">
|
||
<strong>Warning:</strong>
|
||
The NCBI web site requires JavaScript to function.
|
||
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
|
||
</p>
|
||
</noscript>
|
||
</div>
|
||
<!--/.sysmessage-->
|
||
<div class="wrap">
|
||
<div class="page">
|
||
<div xmlns:xi="http://www.w3.org/2001/XInclude">
|
||
<div xmlns="http://www.w3.org/1999/xhtml" id="universal_header" xml:base="http://127.0.0.1/sites/static/header_footer">
|
||
<section class="usa-banner">
|
||
<div class="usa-accordion">
|
||
<header class="usa-banner-header">
|
||
<div class="usa-grid usa-banner-inner">
|
||
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
|
||
<p>An official website of the United States government</p>
|
||
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
|
||
<span class="usa-banner-button-text">Here's how you know</span>
|
||
</button>
|
||
</div>
|
||
</header>
|
||
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
|
||
<div class="usa-banner-guidance-gov usa-width-one-half">
|
||
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
|
||
<div class="usa-media_block-body">
|
||
<p>
|
||
<strong>The .gov means it's official.</strong>
|
||
<br />
|
||
Federal government websites often end in .gov or .mil. Before
|
||
sharing sensitive information, make sure you're on a federal
|
||
government site.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
<div class="usa-banner-guidance-ssl usa-width-one-half">
|
||
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
|
||
<div class="usa-media_block-body">
|
||
<p>
|
||
<strong>The site is secure.</strong>
|
||
<br />
|
||
The <strong>https://</strong> ensures that you are connecting to the
|
||
official website and that any information you provide is encrypted
|
||
and transmitted securely.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<div class="usa-overlay"></div>
|
||
<header class="ncbi-header" role="banner" data-section="Header">
|
||
|
||
<div class="usa-grid">
|
||
<div class="usa-width-one-whole">
|
||
|
||
<div class="ncbi-header__logo">
|
||
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
|
||
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
|
||
</a>
|
||
</div>
|
||
|
||
<div class="ncbi-header__account">
|
||
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
|
||
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
|
||
<span class="fa fa-user" aria-hidden="true">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
|
||
<g style="fill: #fff">
|
||
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
|
||
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
|
||
</g>
|
||
</svg>
|
||
</span>
|
||
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
|
||
<span class="sr-only">Show account info</span>
|
||
</button>
|
||
</div>
|
||
|
||
<div class="ncbi-popup-anchor">
|
||
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
|
||
<div class="ncbi-popup-head">
|
||
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
|
||
<span class="fa fa-times">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
|
||
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
|
||
</svg>
|
||
</span>
|
||
<span class="usa-sr-only">Close</span></button>
|
||
<h4>Account</h4>
|
||
</div>
|
||
<div class="account-user-info">
|
||
Logged in as:<br />
|
||
<b><span class="username" id="uname_long">username</span></b>
|
||
</div>
|
||
<div class="account-links">
|
||
<ul class="usa-unstyled-list">
|
||
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
|
||
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
|
||
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
|
||
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</header>
|
||
<div role="navigation" aria-label="access keys">
|
||
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
|
||
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
|
||
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
|
||
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
|
||
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
|
||
</div>
|
||
<section data-section="Alerts">
|
||
<div class="ncbi-alerts-placeholder"></div>
|
||
</section>
|
||
</div>
|
||
</div>
|
||
<!--/.header-->
|
||
<div class="header">
|
||
<div class="res_logo"><h1 class="res_name"><a href="/genbank/" title="GenBank home">GenBank</a></h1><h2 class="res_tagline">Public nucleic acid sequence repository</h2></div>
|
||
<div class="search"><form method="get" action="/nuccore/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="nuccore" selected="selected">Nucleotide</option><option value="books">Books</option><option value="pmc">PMC</option><option value="pubmed" class="last">PubMed</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Nucleotide" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form></div>
|
||
|
||
</div>
|
||
<div class="nav_and_browser">
|
||
<div class="localnav"><ul class="jig-ncbilocalnav">
|
||
<li><a href="#">GenBank</a><ul>
|
||
<li><a href="/genbank/">About GenBank</a></li>
|
||
<li><a href="/genbank/submit_types">Submission Types</a></li>
|
||
<li><a href="/genbank/submit">Submission Tools</a></li>
|
||
<li><a href="/genbank/update">Update GenBank Records</a></li>
|
||
<li><a href="/nuccore/">Search</a></li>
|
||
<li><a href="/BLAST/Blast.cgi?CMD=Web&PAGETYPE=BLASTHome">BLAST</a></li>
|
||
<li><a href="/genbank/statistics">Statistics</a></li>
|
||
<li><a href="/genbank/samplerecord/">Sample Record</a></li>
|
||
<li><a href="/genbank/sequencerevisionhistory/">Revision History</a></li>
|
||
<li><a href="/genbank/sequenceids/">Sequence IDs</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Submit</a><ul>
|
||
<li><a href="/genbank/submit">Submission Tools</a></li>
|
||
<li><a href="/genbank/submit_types">Submission Types</a></li>
|
||
<li><a href="/WebSub/?tool=genbank">BankIt</a></li>
|
||
<li><a href="/genbank/table2asn">table2asn</a></li>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/sra/docs/sequence-data-processing">Sequence Data Processing</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Genomes</a><ul>
|
||
<li><a href="/genbank/genomesubmit">Complete Genome Submission Guide</a></li>
|
||
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Genome Annotation Guide</a></li>
|
||
<li><a href="/genbank/eukaryotic_genome_submission_annotation">Eukaryotic Genome Annotation Guide</a></li>
|
||
<li><a href="/genbank/examples.wgs">Annotation Examples</a></li>
|
||
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/wgs/">Genome Submission Portal</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a title="Whole Genome Shotgun sequences and submissions" href="#">WGS</a><ul>
|
||
<li><a href="/genbank/wgs">About WGS</a></li>
|
||
<li><a href="/Traces/wgs">WGS Project List</a></li>
|
||
<li><a href="/genbank/wgs.submit">WGS Submission Guide</a></li>
|
||
<li><a href="/genbank/wgsfaq/">FAQ</a></li>
|
||
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/wgs/">Genome Submission Portal</a></li>
|
||
<li><a href="/genbank/eukaryotic_genome_submission_annotation">Eukaryotic Annotation Guide</a></li>
|
||
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Annotation Guide</a></li>
|
||
<li><a href="/genbank/asndisc">Discrepancy Report</a></li>
|
||
<li><a href="/assembly/agp/AGP_Specification/">AGP format</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Metagenomes</a><ul>
|
||
<li><a href="/genbank/metagenome">About Metagenomes</a></li>
|
||
<li><a href="/genbank/structuredcomment">Structured Comment</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">TPA</a><ul>
|
||
<li><a href="/genbank/TPA">About TPA</a></li>
|
||
<li><a href="/genbank/tpafaq">FAQ</a></li>
|
||
<li><a href="/genbank/TPA-Exp">TPA-Exp</a></li>
|
||
<li><a href="/genbank/TPA-Inf">TPA-Inf</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">TSA</a><ul>
|
||
<li><a href="/genbank/TSA">About TSA</a></li>
|
||
<li><a href="/genbank/TSAguide">TSA Submission Guide</a></li>
|
||
<li><a href="/genbank/TSAfaq">FAQ</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">INSDC</a><ul>
|
||
<li><a href="/genbank/collab">About INSDC</a></li>
|
||
<li><a href="/genbank/collab/country">Geographic Location Name List</a></li>
|
||
<li><a href="/genbank/collab/db_xref">db_xref List</a></li>
|
||
<li><a href="http://www.insdc.org/documents/feature_table.html">Feature Table</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Documentation</a><ul>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/sra/docs/sequence-data-processing/">Sequence Data Processing</a></li>
|
||
<li><a href="/genbank/submission_brokers">Submission Brokers</a></li>
|
||
<li><a href="/genbank/acc_prefix">Accession Number Prefixes</a></li>
|
||
<li><a href="/genbank/organelle_submit/">Organelle Submission Guide</a></li>
|
||
<li><a href="/genbank/monkeypox_submission/">Monkeypox Submission Guide</a></li>
|
||
<li><a href="/genbank/validation/">Common Submission Errors</a> </li>
|
||
<li><a href="/genbank/sequencecheck/">Ribosomal Submission Errors</a></li>
|
||
<li><a href="/genbank/sequencecheck/virus">Common Sequence Errors</a></li>
|
||
<li><a href="https://support.nlm.nih.gov/knowledgebase/category/?id=CAT-01240">Submission FAQs</a></li>
|
||
</ul>
|
||
</li>
|
||
<li><a href="#">Other</a><ul>
|
||
<li><a href="/genbank/htgs">About HTGs</a></li>
|
||
<li><a href="/genbank/dbest">About EST</a></li>
|
||
<li><a href="/genbank/dbgss">About GSS</a></li>
|
||
<li><a href="/genbank/tls">About TLS</a></li>
|
||
<li><a href="/genbank/tlsguide">Submit TLS</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul></div>
|
||
</div>
|
||
|
||
<!-- was itemctrl -->
|
||
<div class="container">
|
||
<div id="maincontent" class="content col twelve_col last">
|
||
<div class="col1">
|
||
<h1 id="prokaryotic-and-eukaryotic-genom">Prokaryotic and Eukaryotic Genomes Submission Guide</h1>
|
||
|
||
|
||
<p>Both WGS and non-WGS genomes, including gapless complete bacterial
|
||
chromosomes, can be submitted via the Submission Portal. You will be
|
||
asked to choose whether the genome being submitted is considered WGS
|
||
or not. The differences for GenBank purposes are:</p>
|
||
|
||
|
||
<p><img src="/core/assets/genbank/images/WGSorNot.png" alt="Each chromosome in a non-WGS genome is in a single piece and there are no extra sequences. A WGS genome may still have chromosomes in multiple pieces and/or unplaced sequences" /></p>
|
||
|
||
|
||
<p><strong>non-WGS</strong></p>
|
||
|
||
|
||
<ul>
|
||
<li>Each chromosome is in a single sequence and there are no extra sequences</li>
|
||
<li>Each sequence in the genome must be assigned to a chromosome or plasmid or organelle</li>
|
||
<li>Plasmids and organelles can still be in multiple pieces.</li>
|
||
</ul>
|
||
|
||
|
||
<p><strong>WGS</strong></p>
|
||
|
||
|
||
<ul>
|
||
<li>One or more chromosomes are in multiple pieces and/or some sequences are not assembled into chromosomes</li>
|
||
</ul>
|
||
|
||
|
||
<p><strong>In both cases</strong></p>
|
||
|
||
|
||
<ul>
|
||
<li>There can still be gaps within the sequences; you will supply that information in the submission</li>
|
||
<li>Plasmids and organelles can still be in multiple pieces.</li>
|
||
<li>Internal sequences must be arranged in the correct order and orientation.</li>
|
||
<li>Sequences concatenated in unknown order are not allowed.</li>
|
||
</ul>
|
||
|
||
|
||
<h2 id="table-of-contents">Table of Contents</h2>
|
||
|
||
|
||
<ul>
|
||
<li><a href="#types">Type of submission</a></li>
|
||
<li><a href="#events">Events</a></li>
|
||
<li><a href="#files">Submission files</a> : <a href="#fasta">fasta</a> , <a href="#sqn">.sqn</a> , <a href="#agp">AGP</a> , <a href="#genomeinfo">Genome Info</a></li>
|
||
<li><a href="#metadata">Common metadata for all genomes</a></li>
|
||
<li><a href="#submitting_genomes">Submitting genomes</a></li>
|
||
<li><a href="#pgap">Requesting Prokaryotic Genome Annotation Pipeline (PGAP) annotation of prokaryotic genomes</a></li>
|
||
<li><a href="#run_pgap">Running PGAP yourself</a></li>
|
||
</ul>
|
||
|
||
|
||
<h2 id="types">Type of submission</h2>
|
||
|
||
|
||
<ul>
|
||
<li>No annotation or requesting PGAP annotation? Submit the fasta sequences in <a href="#single">single</a> or <a href="#batch">Batch</a> mode</li>
|
||
<li>Annotated genome? Create a <a href="#sqn">.sqn file in ASN format</a> and submit it in <a href="#single">single</a> or <a href="#batch">Batch</a> mode</li>
|
||
<li>Lots of genomes in the same BioProject? Use the <a href="#batch">Batch</a> mode</li>
|
||
<li>Individual assemblies of the haplotypes of a diploid or polyploid genome? See <a href="/genbank/diploid_haps">Submitting Multiple Haplotype Assemblies</a></li>
|
||
</ul>
|
||
|
||
|
||
<h2 id="single">Submit a single genome</h2>
|
||
|
||
|
||
<p>This is the simplest submission route because you just fill in a web form in the <a href="https://submit.ncbi.nlm.nih.gov/subs/genome">Submission Portal</a> and upload fasta (or sqn) files of the genome sequences. You will need to:</p>
|
||
|
||
|
||
<ul>
|
||
<li>Provide the BioProject created for this research effort, e.g., during submission of the reads to SRA OR register a new BioProject during the genome submission.</li>
|
||
<li>Provide the BioSample created during submission of the reads to SRA OR register a BioSample during the genome submission</li>
|
||
<li>Assert whether this is a WGS or non-wgs genome assembly</li>
|
||
<li>Upload <a href="#fasta">fasta</a> sequences of the genome (or <a href="#sqn">.sqn,</a> file if the genome is annotated)</li>
|
||
<li>Upload optional <a href="#agp">AGP</a> file(s) to assemble scaffolds (unplaced or unlocalized) and/or chromosomes from the submitted sequences. This is for WGS only. Remember that you can submit the gapped scaffolds themselves instead of submitting contigs plus an AGP file</li>
|
||
<li>Provide information in response to prompts during the genome submission (see the <a href="#metadata">common metada</a> section):<ul>
|
||
<li>Genome Assembly Data and other information about this genome assembly</li>
|
||
<li>Gap Information (What the Ns represent)</li>
|
||
<li>Chromosome and plasmid assignments. Every sequence in a non-wgs genome must have a chromosome or plasmid assignment and every chromosome must be submitted as a single sequence.</li>
|
||
<li>Authors and a title (for fasta submissions)</li>
|
||
<li>Release date (immediately after processing OR a specific date. Release will be on that date or upon publication, whichever is first)</li>
|
||
<li>Optional request for annotation of prokaryotic genomes by <a href="#pgap">PGAP</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
|
||
<h2 id="batch">Submit a batch of genomes</h2>
|
||
|
||
|
||
<p>This submission route allows you to submit as many as 400 WGS or non-wgs genomes in a single batch submission. In this route you choose Batch/multiple in the <a href="https://submit.ncbi.nlm.nih.gov/subs/genome">Genome Submission Portal</a> , fill in the web form, upload a Genome Info file with genome metadata, and upload or preload fasta files (or sqn files if there is annotation) of the genome sequences. All the genomes within a batch must:</p>
|
||
|
||
|
||
<ul>
|
||
<li>Be part of the same BioProject</li>
|
||
<li>Be either WGS or non-wgs, not a mix of both types</li>
|
||
<li>Have the same (initial) release date</li>
|
||
<li>Have the same gap/Ns information</li>
|
||
<li>Contain either <a href="#fasta">fasta</a> files or ASN ( <a href="#sqn">.sqn</a> ) files, not a mix of file types. We recommend submitting fasta files unless the submission needs to include annotation or the Genome-Assembly-Data structured comment</li>
|
||
<li>Have a single file for each genome, including any plasmid or organelle sequences</li>
|
||
<li>Have a separate file for each genome, not all the genomes together</li>
|
||
<li>Request <a href="#pgap">PGAP</a> annotation or not (only relevant for prokaryotic genomes)</li>
|
||
<li>Be just a single layer (= no AGP file(s))</li>
|
||
</ul>
|
||
|
||
|
||
<p>You will need to:</p>
|
||
|
||
|
||
<ul>
|
||
<li>Provide the BioProject created for this research effort, e.g., during submission of the reads to SRA OR register a new BioProject during the genome submission.</li>
|
||
<li>Provide the BioSamples that were preregistered, eg during submission of the reads to SRA OR register BioSamples during the genome submission</li>
|
||
<li>Include assignment (ie, chromosome, plasmid or organelle) information about the sequence in the fasta files (see the <a href="#batch_assignment">Additional requirements for batch submissions</a> section)</li>
|
||
<li>Upload or <a href="/genbank/preloadfiles">preload</a> fasta sequences (or sqn files for annotated genomes) of the genomes. Each genome is in a separate single file, uniquely named, but the files can be archived together<ul>
|
||
<li>An option for batch submission is to preload the files of genome assemblies before beginning the submission, rather than uploading them in the browser during the submission. You can preload using Aspera, the FTP protocol or Filezilla. Detailed instructions for using the preload option for genome submissions are at <a href="/genbank/preloadfiles">How to preload files</a>.</li>
|
||
</ul>
|
||
</li>
|
||
<li>Upload a <a href="#genomeinfo">Genome Info table</a> with information specific to each genome</li>
|
||
<li>Provide this information in response to prompts on the web pages during the genome submission (see the <a href="#metadata">common metadata</a> section):<ul>
|
||
<li>Gap Information (What the Ns represent)</li>
|
||
<li>Authors and a title (for fasta submissions)</li>
|
||
<li>Release date (immediately after processing OR a specific date. Release will be on that date or upon publication, whichever is first)</li>
|
||
<li>Optional request for annotation of prokaryotic genomes by <a href="#pgap">PGAP</a></li>
|
||
</ul>
|
||
</li>
|
||
</ul>
|
||
|
||
|
||
<h3 id="events">Events</h3>
|
||
|
||
|
||
<ol>
|
||
<li>Only if you will be submitting a genome with annotation and you have not yet registered a BioProject and BioSample for this genome, then you will register the genome sequencing project with the <a href="https://submit.ncbi.nlm.nih.gov/subs/bioproject/">BioProject</a> and <a href="https://submit.ncbi.nlm.nih.gov/subs/biosample/">BioSample</a> databases so that a locus_tag prefix will be assigned to the BioProject:BioSample pair. If you have already registered a BioProject and BioSample for this genome, eg when submitting the reads to SRA, then a locus_tag prefix should have already been assigned. A file of the locus_tag prefix(es) for the BioSamples within a BioProject is linked to the <a href="https://submit.ncbi.nlm.nih.gov/subs/bioproject/">BioProject submission</a>. Write to <a href="mailto:genomes@ncbi.nlm.nih.gov">genomes@ncbi.nlm.nih.gov</a> if you did not receive a locus_tag prefix. Do not register a duplicate BioProject or BioSample for the same genome. Provide these preregistered BioProject and BioSample accessions in the genome submission. Remember that annotation is optional for genome submissions. If you are submitting a genome without annotation, even if you will be requesting PGAP annotation, then you'll create the BioSample (and BioProject, if necessary) during the genome submission. Genomes sequenced as part of the same research effort can belong to a single BioProject, so it's common to create a BioProject during the submission of one genome and then include that BioProject during the submission of additional genomes.</li>
|
||
<li>
|
||
<p>Make the genome assembly <a href="#files">data files</a>.</p>
|
||
<ul>
|
||
<li>Unannotated genomes just need <a href="#fasta">fasta</a> files</li>
|
||
<li>Annotated genomes need to make <a href="#sqn">.sqn</a> file submissions by running the command line program table2asn (the replacement of tbl2asn), and then fixing Errors and Fatals that are indicated in the .val and .dr files. Failure to do this will cause serious delays in processing.</li>
|
||
</ul>
|
||
</li>
|
||
<li>
|
||
<p>If you have higher-level assembly information, scaffolds and/or chromosomes, then generate an <a href="#agp">AGP file</a> to build those objects from the wgs-contigs.</p>
|
||
</li>
|
||
<li>If you are submitting a batch of genomes (maximum of 400 per batch), then create a <a href="#genomeinfo">Genome Info</a> file. Note that for batch submissions all chromosome and plasmid assignment information must be included in the header of the relevant fasta sequence, as described in the 'see details' section of the <a href="#batch_assignment">Additional requirements for batch submissions</a> </li>
|
||
<li><a href="#submitting_genomes">Submit</a> via the <a href="https://submit.ncbi.nlm.nih.gov/subs/genome">Genome Submission Portal</a>.</li>
|
||
<li><a href="#whathappensnext">What happens after submission</a></li>
|
||
</ol>
|
||
|
||
|
||
<h3 id="files">Submission Files</h3>
|
||
|
||
|
||
<h4 id="fasta">Fasta files</h4>
|
||
|
||
|
||
<h6 id="put-the-sequences-file-into-fast"><em>Put the sequences file into fasta format</em></h6>
|
||
|
||
|
||
<ul>
|
||
<li>These files have the suffix .fsa.</li>
|
||
<li>Each sequence has a definition line beginning with a '>' and a unique identifier (SeqID), eg contig001, contig002. The SeqIDs must:<ul>
|
||
<li>Be <50 characters</li>
|
||
<li>Can only include letters, digits, hyphens (-), underscores (_), periods (.), colons (:), asterisks (*), and number signs (#).</li>
|
||
<li>Be unique within a genome</li>
|
||
</ul>
|
||
</li>
|
||
<li>Include in the definition line the organism and the relevant strain, breed, cultivar or isolate, if one exists for the sequenced organism. Any additional source qualifiers will be added from the registered BioSample to the genome during processing.</li>
|
||
<li>Remove any Ns from the beginning or end of each sequence.</li>
|
||
<li>Contigs should be >199nt, unless they are part of multi-component scaffolds in an AGP file</li>
|
||
</ul>
|
||
|
||
|
||
<h6 id="batch_assignment"><em>IMPORTANT Additional requirements for batch submissions</em></h6>
|
||
|
||
|
||
<p>[1] All the sequences of single genome must be in one file</p>
|
||
|
||
|
||
<p>[2] The chromosome, plasmid, and organelle assignment information must be encoded in the input files of a batch submission, as described in these details:</p>
|
||
|
||
|
||
<ul>
|
||
<li>To indicate that a single gapped or ungapped sequence represents the
|
||
chromosome, include [location=chromosome] in the fasta definition line. <strong>This must be true for at
|
||
least one sequence when "option 1" (non-wgs genome) is selected.</strong></li>
|
||
<li>Sequences that are a complete circular chromosome or plasmid need to
|
||
have the circular topology and the completeness included. If a gapped
|
||
or nongapped circular chromosome has a gap at the end or has not been
|
||
circularized, please also include that information unbracketed. Examples:<ul>
|
||
<li>[topology=circular] [completeness=complete]</li>
|
||
<li>[topology=circular] gap at end, not circularized</li>
|
||
</ul>
|
||
</li>
|
||
<li>Sequences that are part of a plasmid, or an organellar chromosome, or specific nuclear chromosomes
|
||
need to have that information included in the fasta definition line, in these formats:<ul>
|
||
<li>[plasmid-name=pBR322]</li>
|
||
<li>[plasmid-name=unnamed] (when the plasmid name is not known. However, be sure that each plasmid has a unique name, eg unnamed1 and unnamed2. )</li>
|
||
<li>[location=mitochondrion]</li>
|
||
<li>[location=chloroplast]</li>
|
||
<li>[chromosome=2]</li>
|
||
</ul>
|
||
</li>
|
||
<li>Follow the <a href="#chr_names">Plasmid and chromosome names rules</a></li>
|
||
<li>
|
||
<p>Here is an example of the definition line for the complete plasmid of
|
||
a bacterial submission (all the text must be in a single line):</p>
|
||
<p>>contig02 [organism=Clostridium difficile] [strain=ABDC] [plasmid-name=pABDC1] [topology=circular] [completeness=complete]</p>
|
||
</li>
|
||
<li>
|
||
<p>Here is an example of a gapped sequence that represents chromosome 2
|
||
of a eukaryotic genome (all the text must be in a single line), so both the chromosome location and chromosome
|
||
name are included:</p>
|
||
<p>>Seq001 [organism=Puma concolor] [isolate=ABDC] [location=chromosome] [chromosome=2]</p>
|
||
</li>
|
||
<li>
|
||
<p>Here is an example of sequences that belong to chromosome 5 so only the chromosome name is included
|
||
(all the text must be in a single line):</p>
|
||
<p>>Seq001 [organism=Puma concolor] [isolate=ABDC] [chromosome=5]</p>
|
||
<p>>Seq002 [organism=Puma concolor] [isolate=ABDC] [chromosome=5]</p>
|
||
</li>
|
||
</ul>
|
||
|
||
|
||
<h4 id="sqn">.sqn files</h4>
|
||
|
||
|
||
<p>These are generally required only when the submitter wants to include annotation. Annotation is optional for GenBank genome submissions.</p>
|
||
|
||
|
||
<h6 id="see-details"><a data-jig="ncbitoggler" href="#sqn-details">see details</a></h6>
|
||
|
||
|
||
<div id="sqn-details">
|
||
<p>Prepare a .sqn file for submission using <a href="/genbank/table2asn">table2asn</a>. table2asn reads a template file along with the fasta sequence and annotation table files, and outputs an ASN (.sqn) file for submission to GenBank. Follow these three steps:</p>
|
||
<p>1) <strong>Prepare data files</strong></p>
|
||
<p>Prepare fasta files as above, with one file per genome.</p>
|
||
<p>Prepare these additional files:</p>
|
||
<ul>
|
||
<li>a <a href="https://submit.ncbi.nlm.nih.gov/genbank/template/submission/">template file</a> with submitter and publication information.</li>
|
||
<li>annotation files. These correspond to and have the same basenames as the .fsa files. There are two different file formats:<ul>
|
||
<li>5-column feature table files that have the suffix .tbl. Be sure to read the annotation requirements in the appropriate annotation guidelines:<ul>
|
||
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Annotation Guidelines</a></li>
|
||
<li><a href="/genbank/eukaryotic_genome_submission">Eukaryotic Annotation Guidelines</a></li>
|
||
</ul>
|
||
</li>
|
||
<li>GFF files in GenBank-specific format that have the suffix .gff. Be sure to read the instructions at <a href="/genbank/genomes_gff">Genome Annotation with GFF or GTF files</a>.</li>
|
||
</ul>
|
||
</li>
|
||
<li>Genome-Assembly-Data Structured Comment. This information can be provided during the genome submission, but if many genomes are being submitted it could be simpler to include this in the .sqn file itself. To do that, use the <a href="https://submit.ncbi.nlm.nih.gov/structcomment/genomes/">Genome-Assembly-Data Structured Comment Template</a> to create the file and then have it included with <code>-w genasm.cmt</code> in the tbl2asn commandline, below.</li>
|
||
<li>quality scores of the sequences. These files correspond to and have the same basenames as the .fsa files, but have the suffix .qvl. The quality scores are optional.</li>
|
||
</ul>
|
||
<p>2) <strong>Run <a href="/genbank/table2asn">table2asn</a></strong></p>
|
||
<p>A. Annotation is in GenBank-specific GFF files: follow the <a href="/genbank/genomes_gff/#run">instructions for GFF files</a>.</p>
|
||
<p>B. Annotation is in .tbl files: follow these instructions. Note that a few of the arguments in table2asn have changed relative to tbl2asn, eg <code>-indir</code> instead of <code>-p</code>. The <a href="/genbank/table2asn">table2asn</a> page provides more details. Here are the instructions for creating annotated genome files when the annotation is in .tbl files: </p>
|
||
<p>Sample command line when the sequences are contigs (overlapping reads with no Ns representing gaps) is</p>
|
||
<p><strong>table2asn -indir path_to_files -t template -M n -Z</strong></p>
|
||
<p>If the sequences contain Ns that represent gaps, then run the appropriate table2asn command line with the <code>-l</code> and <code>-gaps-min</code> arguments, as described in the <a href="/genbank/wgs_gapped/">Gapped Genome Submission</a> page. The command line for the most common situation (runs of 10 or more Ns represent a gap, and there are no gaps of completely unknown size, and the evidence for linkage across the gaps is "paired-ends") is:</p>
|
||
<p><strong>table2asn -indir path_to_files -t template -M n -Z -gaps-min 10 -l paired-ends</strong></p>
|
||
<p>For either case you can include the source information in the definition line of each contig, as described in the fasta defline components section, above. Alternatively, the organism and strain (or breed or isolate) can be included with -j in the table2asn command line. The additional source qualifiers will be obtained from the registered BioSample. However, chromosome, plasmid & organelle assignment information must be included in the fasta definition lines. In addition, if the submission is an annnotated prokaryotic genome, then include the genetic code with -j in the commandline, for example:</p>
|
||
<p><strong>table2asn -indir path_to_files -t template -M n -Z -j "[organism=Clostridium difficile ABDC] [strain=ABDC] [gcode=11]"</strong></p>
|
||
<p>Here are some commonly used arguments when there is no annotation or when the annotation input is .tbl file: </p>
|
||
<table>
|
||
<thead>
|
||
<tr>
|
||
<th>Option</th>
|
||
<th>Description</th>
|
||
</tr>
|
||
</thead>
|
||
<tbody>
|
||
<tr>
|
||
<td>-M n</td>
|
||
<td>To run genome-specific functions and validator and to fix some known product name problems</td>
|
||
</tr>
|
||
<tr>
|
||
<td>-Z</td>
|
||
<td>Runs the sequence discrepancy report, which looks for subtle inconsistencies within a set of related records, and outputs a file with the .dr suffix. See the <a href="/genbank/asndisc">Discrepancy Report page</a> for information about its output. NOTE: this argument is changed from tbl2asn because it no longer requires (or accepts) an output file name.</td>
|
||
</tr>
|
||
<tr>
|
||
<td>-t template.sbt</td>
|
||
<td>Specifies the template file (.sbt), which can be be created at <a href="https://submit.ncbi.nlm.nih.gov/genbank/template/submission/">GenBank Submission Template</a>. If the .sbt file is in a different directory the full path must be specified.</td>
|
||
</tr>
|
||
<tr>
|
||
<td>-j</td>
|
||
<td>Allows the addition of source qualifiers that are the same for every sequence in every fasta file being read. Examples:<br /><code>-j "[organism=Mus musculus] [tissue-type=liver]"</code><br /><code>-j "[organism=Escherichia coli] [strain=ABC1] [gcode=11]"</code></td>
|
||
</tr>
|
||
<tr>
|
||
<td>-V b</td>
|
||
<td>Generate GenBank Flatfile with a .gbf suffix. This file is only for viewing; it is not for submission. Adding this could slow table2asn so you may choose to include it only for the first run to make sure that the annotation looks as expected.</td>
|
||
</tr>
|
||
<tr>
|
||
<td>-c s</td>
|
||
<td>Add exception to every CDS with an intron shorter than 11bp. Adds /artificial_location="low-quality sequence region" to the CDS, allowing the CDS to pass the ShortIntron error, and causes the protein definition line to be prefaced with "LOW QUALITY PROTEIN:". This option should only be used if you are confident that the protein translation is correct. Do not use short introns to force a translation containing frameshifts or large deletions.</td>
|
||
</tr>
|
||
<tr>
|
||
<td>-Y File_name</td>
|
||
<td>Import a file that is a text comment</td>
|
||
</tr>
|
||
<tr>
|
||
<td>-w assembly.cmt</td>
|
||
<td>Import Structured Comment Table. This is optional, but can be helpful when there are multiple genomes, because there will be less information to supply on the web form during submission. This file can be created at <a href="https://submit.ncbi.nlm.nih.gov/structcomment/genomes/">Structured Comment Template</a></td>
|
||
</tr>
|
||
</tbody>
|
||
</table>
|
||
<p>3) <strong>Check the output of the validation and discrepancy report and fix problems</strong></p>
|
||
<p>A. Check the .stats file for the number, severity and type of errors that are present in the .val files. All Errors and Rejects need to be fixed. The presence of errors will slow processing. See the <a href="/genbank/genome_validation">genome validation errors</a> for guidance. Contact <a href="mailto:genomes@ncbi.nlm.nih.gov">genomes@ncbi.nlm.nih.gov</a> with any questions about the validation output. During processing there may be some questions about other aspects of the submission.</p>
|
||
<p>B. Check the .dr file for the results of the discrepancy report. Categories prefaced with FATAL are nearly always unacceptable and must be fixed. (The exceptions are FATALs about bacteria when the genome is not bacterial.) Some of the categories are informational, for example <em>PROTEIN_NAMES: All proteins have same name "hypothetical protein"</em>. Reports that are not flagged as fatal should be examined to determine if they represent annotation artifacts that need to be corrected or if they are acceptable due to the biology of the genome. See the <a href="/genbank/asndisc/#evaluating_the_output">discrepancy report examples and explanations</a> and <a href="/genbank/new_asndisc_examples">common discrepancy reports</a> for guidance. Write to <a href="mailto:genomes@ncbi.nlm.nih.gov">genomes@ncbi.nlm.nih.gov</a> and send the .dr file with questions about this report.</p>
|
||
<p>Some common discrepancy reports of which to be aware:</p>
|
||
<ul>
|
||
<li>
|
||
<p><em>NO_ANNOTATION</em> and <em>LONG_NO_ANNOTATION</em>. If either of these is expected, that is fine. However, if not expected, then check that the IDs in the .tbl file definition lines match the SeqIDs of the sequences in the fasta file. When you submit, please let us know when the sequences in the LONG_NO_ANNOTATION report are expected to be unannotated, so that we know to ignore this report.</p>
|
||
</li>
|
||
<li>
|
||
<p><em>PROTEIN_NAMES: All proteins have same name "hypothetical protein"</em>. If this is expected, that is fine. </p>
|
||
</li>
|
||
<li>
|
||
<p><em>FATAL: BACTERIAL_PARTIAL_NONEXTENDABLE_PROBLEMS</em>. If this is a eukaryotic genome, you can ignore this error. If this is a prokaryotic genome, then every CDS must begin and end with valid start and stop codons, respectively, or be partial and either extend to the end of the sequence or abut a gap within the scaffold sequence. However, in the .tbl file you should annotate with <em>pseudo</em> any genes that are 'broken' but are not thought to be pseudogenes. These are genes that do not encode the expected translation, for example because of internal stop codons or missing start or stop codons, and are often caused by problems with the sequence and/or assembly.</p>
|
||
</li>
|
||
</ul>
|
||
<p>C. Make any necessary fixes to the input .fsa and/or .tbl files and run table2asn again.</p>
|
||
</div>
|
||
|
||
|
||
<h4 id="agp">AGP file (optional)</h4>
|
||
|
||
|
||
<p>AGP files provide the ordering and orientation information to construct scaffolds from contigs, or to construct chromosomes from scaffolds and/or contigs. However, remember that we do accept the gapped scaffolds themselves as the basic sequences of the genome. If you choose to submit a multi-layer submission with and AGP file, then know that the AGP file defines these genome assemblies, so be sure to include all wgs-contigs that are considered to be part of the genome in the AGP file. However, if the sequences in the fasta (or .sqn) files are already the scaffolds or chromosomes, then do not make an AGP file.</p>
|
||
|
||
|
||
<h6 id="see-details_1"><a data-jig="ncbitoggler" href="#agp-file">see details</a></h6>
|
||
|
||
|
||
<div id="agp-file">
|
||
<p>See this page for the <a href="/genbank/genome_agp_specification/">AGP format</a>.</p>
|
||
<p>There are 3 types of AGP files:</p>
|
||
<ul>
|
||
<li>Unplaced scaffolds = scaffolds without chromosome or plasmid assignments</li>
|
||
<li>Chromosome = the objects built in the AGP file represent the nuclear or organellar chromosomes or plasmids</li>
|
||
<li>Unlocalized scaffolds = scaffolds that are known to belong to a nuclear or organellar chromosome or plasmid but are not part of the assembly to build those chromosomes or plasmids</li>
|
||
<li>NOTE: assignments will need to be provided for the objects being assembled in the Chromosome and Unlocalized AGP files, not the sequences in the fasta/sqn files</li>
|
||
</ul>
|
||
<p>Some specific requests are:</p>
|
||
<ul>
|
||
<li>Encode the type of object in the object names in column 1 like this:<ul>
|
||
<li>scaffold01, scaffold02, etc for scaffolds</li>
|
||
<li>chr (for bacteria) OR chr1, chr2, etc for eukaryotic chromosomes</li>
|
||
<li>the plasmid names for plasmids (eg pBR322). If the name is not known, then use 'unnamed'.</li>
|
||
<li>MT for the mitochondrial genome. MT_scaf01, MT_scaf02, etc for mitochondrial scaffolds.</li>
|
||
</ul>
|
||
</li>
|
||
<li>Use "100" as the length and U as the component-type for gaps of unknown size, as that is the GenBank convention. These will appear as gap(unk100) in the flatfile view of the GenBank record.</li>
|
||
<li>Use the same contig identifiers in column 6 (the component-id) that you used in the .fsa files. If the components have already been assigned accession numbers, then you need to use the accession.version numbers as the component identifiers; do not use just the accession number.</li>
|
||
<li>Use different identifiers for the object in column 1 and the component in column 6, even if the object consists of a single component.</li>
|
||
</ul>
|
||
<p>You can validate the basic format of your AGP file at <a href="https://www.ncbi.nlm.nih.gov/projects/genome/assembly/agp/agp_validate.cgi">http://www.ncbi.nlm.nih.gov/projects/genome/assembly/agp/agp_validate.cgi</a>. In addition, the standalone commandline program, <a href="/genbank/genome_agp_validation/">agp_validate</a> is available by <a href="https://ftp.ncbi.nih.gov/toolbox/ncbi_tools/converters/by_program/agp_validate/">anonymous FTP</a> to validate the AGP file more extensively yourself. The <code>-help</code> option details the arguments and command line format.</p>
|
||
</div>
|
||
|
||
|
||
<h4 id="genomeinfo">Genome Info table</h4>
|
||
|
||
|
||
<p>The Genome Info table is required for batch submissions and is used to provide the <a href="#genome_assembly_data">Genome Assembly Data</a> of each. You can either fill in the table during the genome submission or prepare the file ahead of time and upload it during the submission. To prepare it ahead of time, download the <a href="https://submit.ncbi.nlm.nih.gov/templates/">Genome Info file template</a>. The instructions are on the first tab of this file and the template is on the second tab. Complete the second tab (Genome_Data), then save the worksheet as a Text (Tab-delimited) file -- (use 'File, Save as, Save as type: Text (Tab-delimited)' ).</p>
|
||
|
||
|
||
<h6 id="see-details-of-the-required-and-"><a data-jig="ncbitoggler" href="#genome-info-table">see details of the required and optional information</a></h6>
|
||
|
||
|
||
<div id="genome-info-table">
|
||
<p>Each row in the template represents a genome. The required fields are:</p>
|
||
<ul>
|
||
<li>Biosample accession OR sample_name</li>
|
||
<li>Assembly method</li>
|
||
<li>Assembly method version</li>
|
||
<li>Genome coverage</li>
|
||
<li>Sequencing technology</li>
|
||
<li>File name</li>
|
||
</ul>
|
||
<p>Optional fields:</p>
|
||
<ul>
|
||
<li>Assembly date</li>
|
||
<li>Assembly name</li>
|
||
<li>Reference genome</li>
|
||
<li>Update (update_for)</li>
|
||
<li>bacteria_available_from</li>
|
||
</ul>
|
||
<p>Definitions of these fields are in the <a href="#genome_assembly_data">Genome Assembly Data</a> section and also as comments in the template itself.
|
||
Instructions:</p>
|
||
<ul>
|
||
<li>If you created BioSamples previously, provide accessions in the form of SAMN# in the column biosample_accession. See the Example of Genome Info file using BioSample Accessions.</li>
|
||
<li>If you are creating samples during this genome submission, provide the names of samples that you just created in the column 'sample_name'. Please note that sample names must be unique within your entire account. See the Example of Genome Info file using BioSample Names. Do not include both sample_name and biosample_accession for a genome.</li>
|
||
<li>If you archived your files by tar utility, you must list file names that are contained in the archive not the name of the tarred file.</li>
|
||
<li>Provide exact file names (including extensions) in the filename column.</li>
|
||
<li>Supported extensions for compressed files: tar, tar.gz, gz, bz2 (do not use zip!).</li>
|
||
<li>File names must be unique.</li>
|
||
</ul>
|
||
</div>
|
||
|
||
|
||
<h3 id="metadata">Metadata required for all genome submissions</h3>
|
||
|
||
|
||
<h4 id="bioproject">BioProject</h4>
|
||
|
||
|
||
<p>The BioProject contains the description of the research effort, relevant grant(s), and has links to the public data for the proejct. Each genome must belong to a BioProject, and genomes sequenced as part of the same research effort can belong to a single BioProject. Use the same BioProject for the sequence reads and genome assembly made from those reads; do not create duplicate BioProjects. If a new BioProject is necessary for unannotated (or PGAP-annotated) genomes, then registering during the genome submission process is simplest. However, genomes submitted with annotation will need to be <a href="https://submit.ncbi.nlm.nih.gov/subs/bioproject/">pre-registered</a> so that a locus_tag prefix can be assigned to the BioProject/BioSample pair and used to identify each gene within that genome uniquely. A file of the locus_tag prefix(es) for the BioSamples within a BioProject is linked to the <a href="https://submit.ncbi.nlm.nih.gov/subs/bioproject/">BioProject submission</a>. Write to <a href="mailto:genomes@ncbi.nlm.nih.gov">genomes@ncbi.nlm.nih.gov</a> if you did not receive a locus_tag prefix after preregistering a BioSample for your BioProject.</p>
|
||
|
||
|
||
<h4 id="biosample">BioSample</h4>
|
||
|
||
|
||
<p>The BioSample contains the source information of the sample that was sequenced. Use the same BioSample for the sequence reads and genome assembly made from those reads; do not create duplicate BioSamples. Registering a new BioSample can be done during the genome submission process for unannotated (or PGAP-annotated) genomes; however, genomes submitted with annotation will need to be <a href="https://submit.ncbi.nlm.nih.gov/subs/biosample/">pre-registered</a> to get a locus_tag prefix. Include the registered BioProject when you register the BioSample so that a locus_tag prefix is assigned to the pair. You'll find the locus_tag assignment(s) in a file linked to the <a href="https://submit.ncbi.nlm.nih.gov/subs/bioproject/">BioProject submission.</a></p>
|
||
|
||
|
||
<p>During processing of the genome the relevant information from the genome and BioSample will be merged so that they are in agreement. If the genome and BioSample have a conflict in the value of an attribute, we will stop and ask the submitter to clarify what the correct value is. There is some extra validation in GenBank compared to BioSample, eg ‘altitude’ is defined as being in meters in BioSample but it must have ‘m’ present in the GenBank genome. We will fix simple issues like this.</p>
|
||
|
||
|
||
<h4 id="genome_assembly_data">Genome Assembly Data and other information about a genome assembly</h4>
|
||
|
||
|
||
<ul>
|
||
<li><strong>Assembly method</strong> : Name of the assembly algorithm(s)</li>
|
||
<li><strong>Assembly method version or date</strong> : version of the algorithm or date it was run</li>
|
||
<li><strong>Genome coverage</strong> : The estimated base coverage across the genome, eg 12x.</li>
|
||
<li><strong>Sequencing technology</strong> : sequencing platform(s) used</li>
|
||
<li><strong>Assembly date</strong> : Optional. Year, month or day the assembly was made. Date formats: YYYY-MM-DD; YYYY-MM; YYYY</li>
|
||
<li><strong>Assembly name</strong> : Optional and not usually relevant for prokaryotes. This is a short name suitable for display that does not include the organism name eg, LoxAfr_3.0 for a Loxodonta africana assembly, version 3.0</li>
|
||
<li><strong>Full or Partial Genome in the sample</strong> : the answer is nearly always "yes, Full". Choose "no, partial" only if a subset of the sample was deliberately selected, eg just exomes or a single chromosome of a eukaryote or only the non-repetitive regions of the genome</li>
|
||
<li><strong>Reference genome</strong> : If this is NOT a de novo assembly, you will need to provide the accession.version and/or the assembly name of the genome assembly that was used as the reference guide for this assembly</li>
|
||
<li><strong>Update</strong> : accession of the genome being updated, when appropriate</li>
|
||
<li><strong>bacteria_available_from</strong> : Optional. For prokaryotes provide a name and physical address (not email) of the lab or PI, or a culture collection identifier where scientists could obtain this bacterial culture</li>
|
||
</ul>
|
||
|
||
|
||
<h4 id="gap-information-what-the-ns-repr">Gap Information: What the Ns represent</h4>
|
||
|
||
|
||
<ul>
|
||
<li>The minimum number of consecutive Ns that represents a gap (must be 10 or less). Be aware that the assembly statistics are always calculated using 10 or more Ns as a gap, regardless of the presence/absence of gaps in the final genome sequence.</li>
|
||
<li>The number of Ns that represents a gap of completely unknown length (usually 0; sometimes 100 or another value)</li>
|
||
<li>The evidence used to assert that the sequence on either side of the gap is linked (usually paired-ends)</li>
|
||
<li>This information is collected in the submission form for individual and batch submissions. Default answers are those that have been most commonly submitted. Be sure to select the correct answer when the defaults are incorrect for the genome(s) being submitted.</li>
|
||
</ul>
|
||
|
||
|
||
<h4 id="chromosome-and-plasmid-assignmen"><a data-jig="ncbitoggler" href="#chrom-assignments">Chromosome and plasmid assignments</a></h4>
|
||
|
||
|
||
<ul>
|
||
<li>Indicate any sequences that are chromosomes or plasmids, or that belong to chromosomes or plasmids.</li>
|
||
<li>Follow the <a href="#chr_names">chromosome and plasmid names rules</a></li>
|
||
<li id="chrom-assignments">Every sequence in a non-wgs genome must have a chromosome or plasmid assignment and every chromosome must be submitted as a single sequence.</li>
|
||
</ul>
|
||
|
||
|
||
<h4 id="chr_names">Plasmid and chromosome names rules</h4>
|
||
|
||
|
||
<h6 id="see-details_2"><a data-jig="ncbitoggler" href="#chr_names">see details</a></h6>
|
||
|
||
|
||
<div id="chr_names">
|
||
<p>Chromosome and plasmid names can only digits, dots, underscores, and
|
||
ASCII characters in plain text in the standard English alphabet. In
|
||
addition, there are rules specific for each.</p>
|
||
<h5 id="chromosome-names">Chromosome names</h5>
|
||
<ul>
|
||
<li>Can contain only digits, dots, underscores, and ASCII characters in plain text in the standard English alphabet.</li>
|
||
<li>Cannot include "chr" or "chromosome". However, linkage group names should include "LG" as part of the name.</li>
|
||
<li>Are limited to 33 characters</li>
|
||
<li>Cannot include these words: unknown, Un, Unk, 0 (= zero as the full name). This restriction is because each unplaced sequence should be separate, not concatenated with others, and without an assignment.</li>
|
||
</ul>
|
||
<h5 id="plasmid-names">Plasmid names</h5>
|
||
<ul>
|
||
<li>Can contain only digits, dots, underscores, and ASCII characters in plain text in the standard English alphabet.</li>
|
||
<li>Should start with lower case 'p' UNLESS the plasmid name is not known. In that case use 'unnamed', or "unnamed1" & "unnamed2" for distinct unnamed plasmids.</li>
|
||
<li>Cannot include the word 'plasmid'</li>
|
||
<li>Are limited to 20 characters</li>
|
||
</ul>
|
||
</div>
|
||
|
||
|
||
<h3 id="submitting_genomes">Submit the genomes to the Genome Submission Portal</h3>
|
||
|
||
|
||
<p>All files must be submitted via the <a href="https://submit.ncbi.nlm.nih.gov/subs/genome/">Genome Submission Portal</a>.
|
||
Choose "Single genome" or "Batch/multiple genomes".
|
||
Answer the questions and upload the necessary files
|
||
Review the summary page and click the "Submit" button.
|
||
The submission will be given a 'SUB' temporary identifier which you can use in correspondence before an accession number is assigned to the genome submission.</p>
|
||
|
||
|
||
<h3 id="whathappensnext">What happens next</h3>
|
||
|
||
|
||
<p>Once we receive your genome submission, several automated validations are run and a member of our staff conducts an initial review. If no significant issues are found, the genome will be assigned an accession number.</p>
|
||
|
||
|
||
<h6 id="if-there-are-problems"><a data-jig="ncbitoggler" href="#if-problems">If there are problems</a></h6>
|
||
|
||
|
||
<div id="if-problems">
|
||
<p>The submitted files will be marked in the submission portal as "Error"
|
||
and you will receive an email with details of the problems. Errors
|
||
found in the automated validations are automatically reported back to
|
||
the submission portal and an email is automatically sent to the
|
||
submitter with instructions on how to proceed. In addition, a member
|
||
of our staff conducts an initial review of each submission and reviews
|
||
several additional validations. The problems, including those
|
||
described in the Fix problems section, could be:</p>
|
||
<ul>
|
||
<li>Any Error-level errors and some Warning-level errors from the
|
||
validation. For .sqn file submissions you would see these in the
|
||
.val file(s) generated by table2asn.</li>
|
||
<li>Any FATAL or problem categories from the discrepancy report, the
|
||
.dr file generated by table2asn.</li>
|
||
<li>Sequence contamination in the genome sequences</li>
|
||
<li>Bad format of the AGP files or inconsistencies between the AGP and fasta/.sqn files.</li>
|
||
<li>If the genome size is not within the expected range of the median size of the
|
||
genomes of that species already in GenBank. Because this test
|
||
uses the genomes that are already in GenBank, genomes could
|
||
pass it simply because there are not yet enough genomes of
|
||
that species in GenBank. Note that you can run this <a href="/genbank/genome-size-check/">Genome Size Check test</a> yourself before submitting.</li>
|
||
<li>If ANI analysis of a prokaryotic genome indicates that the genome is
|
||
misidentified. This test uses the genomes that are already in
|
||
GenBank as the reference, so genomes could pass this test simply because there are
|
||
not yet enough genomes of that species in GenBank.</li>
|
||
</ul>
|
||
<p>Once you have made the fixes, log back into the
|
||
<a href="https://submit.ncbi.nlm.nih.gov/subs/genome">Genome Submission Portal</a>,
|
||
retrieve that submission by its 'SUB' identifier and click the "FIX" button of that
|
||
submission. You will be back in the original submission and will need to
|
||
delete the files that are marked as having errors, and then upload new
|
||
files in their place.</p>
|
||
<p>Once your submission is assigned an accession number it undergoes a
|
||
thorough review by our staff. This review is critical because we are
|
||
striving to present genome annotation in an accurate and consistent
|
||
manner so that database users can make maximum use of the data. If we
|
||
encounter problems during this review, we will contact you by email.</p>
|
||
</div>
|
||
|
||
|
||
<h6 id="submission-statuses-in-the-submi"><a data-jig="ncbitoggler" href="#portal-statuses">Submission statuses in the submission portal</a></h6>
|
||
|
||
|
||
<ul>
|
||
<li><strong>Queued</strong> : the submission is waiting for initial review</li>
|
||
<li><strong>Error</strong> : one or more genomes has errors in its files, so needs to
|
||
be resubmitted. Use the same file name when resubmitting batch
|
||
submissions.</li>
|
||
<li><strong>Processing, and no accession number</strong> : all of the genomes have
|
||
passed the initial automated validations and are waiting for
|
||
additional review.</li>
|
||
<li><strong>Processing, and accession number</strong> : genome accessions have been
|
||
assigned and the genomes will be processed by NCBI staff. Genomes
|
||
will remain at this status until they are released. We will
|
||
contact you during processing if the submission has issues that
|
||
require additional information.</li>
|
||
<li id="portal-statuses"><strong>Processed</strong> : the genome has been publicly released.</li>
|
||
</ul>
|
||
|
||
|
||
<p>If you elected to hold your genome until a particular date (or
|
||
publication, whichever is first), we ask that you provide us with the
|
||
expected publication date and also notify us in a timely manner of the
|
||
upcoming publication and the relevant citation details. This will
|
||
allow us to coordinate the release of your genome with the appearance
|
||
of the paper. Please provide at least two weeks' notice of any
|
||
upcoming publication.</p>
|
||
|
||
|
||
<p>NOTE: As of January 2017, genomes will be released on their
|
||
release date without additional communication, as is the normal
|
||
GenBank policy. Be sure to request an extension of the release date if
|
||
the genome is not yet published and you wish to continue to keep it
|
||
confidential.</p>
|
||
|
||
|
||
<h4 id="pgap">Requesting PGAP annotation of prokaryotic genomes</h4>
|
||
|
||
|
||
<p>Requests for annotation by the
|
||
<a href="/genome/annotation_prok/">Prokaryotic Genomes Annotation Pipeline</a>
|
||
is a step during submission of the
|
||
genome to GenBank. Prepare a regular GenBank genome submission and
|
||
request PGAP annotation during the submission process by clicking on
|
||
the box "Annotate this prokaryotic genome in the NCBI Prokaryotic
|
||
Annotation Pipeline before being released". The annotated genome will
|
||
be posted back to the Submission Portal for your review. You may edit
|
||
the file and resubmit that to GenBank; however, this is not required
|
||
and is generally not recommended, as it will slow processing and may
|
||
introduce problems that you would need to fix.</p>
|
||
|
||
|
||
<h4 id="run_pgap">Running PGAP yourself</h4>
|
||
|
||
|
||
<p>If you would like to annotate your prokaryotic genome with the NCBI <a href="/genome/annotation_prok/">Prokaryotic Genomes Annotation Pipeline</a> (PGAP) before or without submitting your data to GenBank, NCBI has made an external version available for you to download and run. It will generate a GenBank-compliant annotated genome that is submission-ready. If you are interested in running PGAP yourself, please see the <a href="https://ncbiinsights.ncbi.nlm.nih.gov/2019/05/13/prokaryotic-genome-annotation-pipeline-pgap-now-produces-files-suitable-for-submission-to-genbank/">NCBI Insights announcement</a>
|
||
and find more <a href="https://github.com/ncbi/pgap">details at github</a>, or see this <a href="https://www.youtube.com/watch?v=pNn_-_46lpI">short video</a>.</p>
|
||
|
||
|
||
<p>After your genome is annotated using external PGAP, you may choose to submit it to GenBank: </p>
|
||
|
||
|
||
<ul>
|
||
<li>Submit the ASN output file of your PGAP run to GenBank via the usual <a href="https://submit.ncbi.nlm.nih.gov/subs/genome/">Genome Submission Portal</a>. <ul>
|
||
<li>Information for <a href="/genbank/genomesubmit/#single">single submission</a>.</li>
|
||
<li>Information for <a href="/genbank/genomesubmit/#batch">batch submission</a>.</li>
|
||
</ul>
|
||
</li>
|
||
<li>As usual during the genome submission, you will be asked to provide the BioProject and BioSample that you may have already created for that genome or its sequencing reads, or to create them if they do not already exist. Please do not create duplicates.</li>
|
||
<li>Any locus_tag prefix can be used to run PGAP. However, during GenBank processing that value will be automatically changed to the officially registered locus_tag prefix for the BioProject:BioSample pair of that genome. Be sure to include the official locus_tags in your publication.<ul>
|
||
<li>You have the option to obtain the official locus_tag prefix before running PGAP, but that is not necessary. To do that, you would <a href="https://submit.ncbi.nlm.nih.gov/subs/bioproject/">register the BioProject</a>, then <a href="https://submit.ncbi.nlm.nih.gov/subs/biosample/">register the BioSample(s)</a>, then look for the locus_tag_prefix.txt file posted to the BioProject submission. A separate BioSample is required for each strain, but multiple genomes can, and usually do, belong to a single BioProject.</li>
|
||
</ul>
|
||
</li>
|
||
<li>The submitted genomes will undergo the standard validations, as <a href="#submitting_genomes">described above</a> including being screened for foreign contaminants and vector sequences and also being analyzed to check the organism identification. Any annotated assemblies that do not pass these validations may need to be modified. </li>
|
||
<li>Genomes that pass the validations will be assigned accession numbers and made public on the release date you have selected during submission. Be sure to use the official GenBank-assigned accessions when you publish.</li>
|
||
</ul>
|
||
|
||
|
||
</div>
|
||
<!--/.col1-->
|
||
<div class="col2">
|
||
<div class="rightnav">
|
||
<h2 id="genome-resources">Genome Resources</h2>
|
||
<ul>
|
||
<li><a href="/genbank/wgs/">About WGS</a></li>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/Traces/wgs/?view=wgs">WGS Browser</a></li>
|
||
<li><a href="/genbank/genomesubmit/">Genome Submission Guide</a></li>
|
||
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/genome/">Genome Submission Portal</a></li>
|
||
<li><a href="/genbank/wgs_update/">Update Genome Records</a></li>
|
||
<li><a href="/genbank/wgsfaq/">FAQ</a></li>
|
||
<li><a href="/genbank/table2asn">table2asn</a></li>
|
||
<li><a href="/genbank/diploid_haps">Submitting Multiple Haplotype Assemblies</a></li>
|
||
<li><a href="/WebSub/template.cgi/">Create Submission Template</a></li>
|
||
<li><a href="/genbank/eukaryotic_genome_submission/">Eukaryotic Annotation Guide</a></li>
|
||
<li><a href="/genbank/genomesubmit_annotation/">Prokaryotic Annotation Guide</a></li>
|
||
<li><a href="/genbank/examples.wgs/">Annotation Example Files</a></li>
|
||
<li><a href="/genbank/genomes_gff">Annotating Genomes with GFF3 or GTF files</a></li>
|
||
<li><a href="/genbank/genome_validation">Validation Error Explanations for Genomes</a></li>
|
||
<li><a href="/genbank/asndisc/">Discrepancy Report</a></li>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/genome/annotation_prok/">NCBI Prokaryotic Genome Annotation Pipeline</a></li>
|
||
<li><a href="https://www.ncbi.nlm.nih.gov/assembly/agp/AGP_Specification/">AGP Format</a></li>
|
||
<li><a href="/genbank/metagenome/">Metagenome Submission Guide</a></li>
|
||
<li><a href="/genbank/structuredcomment/">Structured Comment</a></li>
|
||
<li><a href="/bioproject/">BioProject</a></li>
|
||
<li><a href="/biosample/">BioSample</a></li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
<!--/.col2-->
|
||
<div class="col3">
|
||
|
||
</div>
|
||
<!--/.col3-->
|
||
<div class="col4">
|
||
|
||
</div>
|
||
<!--/.col4-->
|
||
<div class="col5">
|
||
|
||
</div>
|
||
<div class="col6">
|
||
|
||
</div>
|
||
<div class="col7">
|
||
|
||
</div>
|
||
<div class="col8">
|
||
|
||
</div>
|
||
<div class="col9">
|
||
|
||
</div>
|
||
</div><!--/.content-->
|
||
</div><!--/.container-->
|
||
<div id="NCBIFooter_dynamic">
|
||
<div class="breadcrumbs">You are here:
|
||
<span id="breadcrumb_text"><a href="/guide/">NCBI</a></span></div>
|
||
<a id="help-desk-link" class="help_desk" href="https://support.ncbi.nlm.nih.gov/ics/support/default.asp?Time=2025-03-05T04:07:24-05:00&Snapshot=%2Fprojects%2Fstaticsites%2Fgenbank%2Fgenbank@2.21&Host=portal105&ncbi_phid=CE8C99E57C812441000000000021001A&ncbi_session=CE8B5AF87C7FFCB1_0191SID&from=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fgenbank%2Fgenomesubmit%2F&Ncbi_App=genbank&Page=custom-page&style=classic&deptID=28049" target="_blank">Support Center</a>
|
||
<noscript><img alt="" src="/stat?jsdisabled=true&ncbi_app=genbank&ncbi_db=&ncbi_pdid=custom-page&ncbi_phid=CE8C99E57C812441000000000021001A" /></noscript>
|
||
</div>
|
||
|
||
|
||
<div xmlns:xi="http://www.w3.org/2001/XInclude">
|
||
<div xmlns="http://www.w3.org/1999/xhtml" class="footer" id="footer" xml:base="http://127.0.0.1/sites/static/header_footer">
|
||
<section class="icon-section">
|
||
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
|
||
<div class="grid-container container">
|
||
<div class="icon-section_container">
|
||
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
|
||
<title>Twitter</title>
|
||
<g id="twitterx1008">
|
||
<path id="path1008" d="M6.06736 7L16.8778 20.8991L6.00001 32.2H10.2L18.6 23.1L25.668 32.2H34L22.8 17.5L31.9 7H28.4L20.7 15.4L14.401 7H6.06898H6.06736ZM9.66753 8.73423H12.9327L29.7327 30.4658H26.5697L9.66753 8.73423Z" fill="#5B616B"></path>
|
||
</g>
|
||
</svg>
|
||
</a>
|
||
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<title>Facebook</title>
|
||
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<title>LinkedIn</title>
|
||
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<defs>
|
||
<style>
|
||
.cls-11,
|
||
.cls-12 {
|
||
fill: #737373;
|
||
}
|
||
|
||
.cls-11 {
|
||
fill-rule: evenodd;
|
||
}
|
||
</style>
|
||
</defs>
|
||
<title>GitHub</title>
|
||
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
|
||
</path>
|
||
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
|
||
</path>
|
||
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
|
||
</path>
|
||
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
|
||
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
|
||
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
|
||
<defs><style>.cls-1{fill:#737373;}</style></defs>
|
||
<title>NCBI Insights Blog</title>
|
||
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
|
||
</svg>
|
||
</a>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section class="container-fluid bg-primary">
|
||
<div class="container pt-5">
|
||
<div class="row mt-3">
|
||
<div class="col-lg-3 col-12">
|
||
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
|
||
<ul class="list-inline social_media">
|
||
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
|
||
<title>Twitter</title>
|
||
<g id="twitterx1009" clip-path="url(#clip0_65276_3946)">
|
||
<path id="Vector_Twitter" d="M17.5006 34.6565C26.9761 34.6565 34.6575 26.9751 34.6575 17.4996C34.6575 8.02416 26.9761 0.342773 17.5006 0.342773C8.02514 0.342773 0.34375 8.02416 0.34375 17.4996C0.34375 26.9751 8.02514 34.6565 17.5006 34.6565Z" fill="#205493" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
|
||
<path id="path1009" d="M8.54811 8.5L16.2698 18.4279L8.50001 26.5H11.5L17.5 20L22.5486 26.5H28.5L20.5 16L27 8.5H24.5L19 14.5L14.5007 8.5H8.54927H8.54811ZM11.1197 9.73873H13.4519L25.4519 25.2613H23.1926L11.1197 9.73873Z" fill="white"></path>
|
||
</g>
|
||
<defs>
|
||
<clipPath id="clip0_65276_3946">
|
||
<rect width="35" height="35" fill="white"></rect>
|
||
</clipPath>
|
||
</defs>
|
||
</svg>
|
||
</a></li>
|
||
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
|
||
<title>Facebook</title>
|
||
<g id="Facebook" clip-path="url(#clip0_1717_1086)">
|
||
<path id="Vector_Facebook" d="M15.1147 29.1371C15.1147 29.0822 15.1147 29.0296 15.1147 28.9747V18.9414H11.8183C11.6719 18.9414 11.6719 18.9414 11.6719 18.8018C11.6719 17.5642 11.6719 16.3289 11.6719 15.0937C11.6719 14.9793 11.7062 14.9518 11.816 14.9518C12.8683 14.9518 13.9206 14.9518 14.9751 14.9518H15.1215V14.8329C15.1215 13.8057 15.1215 12.774 15.1215 11.7492C15.1274 10.9262 15.3148 10.1146 15.6706 9.37241C16.1301 8.38271 16.9475 7.60378 17.9582 7.19235C18.6492 6.90525 19.3923 6.76428 20.1405 6.7783C21.0029 6.79202 21.8653 6.83091 22.7278 6.86065C22.8879 6.86065 23.048 6.89496 23.2082 6.90182C23.2974 6.90182 23.3271 6.94071 23.3271 7.02993C23.3271 7.54235 23.3271 8.05477 23.3271 8.5649C23.3271 9.16882 23.3271 9.77274 23.3271 10.3767C23.3271 10.4819 23.2974 10.5139 23.1921 10.5116C22.5379 10.5116 21.8814 10.5116 21.2271 10.5116C20.9287 10.5184 20.6316 10.5528 20.3395 10.6146C20.0822 10.6619 19.8463 10.7891 19.6653 10.9779C19.4842 11.1668 19.3672 11.4078 19.3307 11.6669C19.2857 11.893 19.2612 12.1226 19.2575 12.3531C19.2575 13.1904 19.2575 14.0299 19.2575 14.8695C19.2575 14.8946 19.2575 14.9198 19.2575 14.9564H23.0229C23.1807 14.9564 23.183 14.9564 23.1624 15.1074C23.0778 15.7662 22.9885 16.425 22.9039 17.0816C22.8322 17.6321 22.7636 18.1827 22.698 18.7332C22.6729 18.9437 22.6797 18.9437 22.4693 18.9437H19.2644V28.8992C19.2644 28.9793 19.2644 29.0593 19.2644 29.1394L15.1147 29.1371Z" fill="white"></path>
|
||
<path id="Vector_2_Facebook" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
|
||
</g>
|
||
<defs>
|
||
<clipPath id="clip0_1717_1086">
|
||
<rect width="35" height="35" fill="white"></rect>
|
||
</clipPath>
|
||
</defs>
|
||
</svg>
|
||
</a></li>
|
||
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer">
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
|
||
<title>Youtube</title>
|
||
<g id="YouTube" clip-path="url(#clip0_1717_1101)">
|
||
<path id="Vector_Youtube" d="M26.2571 11.4791C25.9025 11.1589 25.5709 10.9576 24.228 10.834C22.5512 10.6785 20.2797 10.6556 18.564 10.6533H16.4365C14.7208 10.6533 12.4493 10.6785 10.7725 10.834C9.43196 10.9576 9.09798 11.1589 8.7434 11.4791C7.81464 12.321 7.6202 14.6268 7.59961 16.8938C7.59961 17.3178 7.59961 17.741 7.59961 18.1635C7.62706 20.4121 7.82837 22.686 8.7434 23.521C9.09798 23.8412 9.42967 24.0425 10.7725 24.1661C12.4493 24.3216 14.7208 24.3445 16.4365 24.3468H18.564C20.2797 24.3468 22.5512 24.3216 24.228 24.1661C25.5686 24.0425 25.9025 23.8412 26.2571 23.521C27.1722 22.6929 27.3735 20.451 27.4009 18.2206C27.4009 17.7402 27.4009 17.2599 27.4009 16.7795C27.3735 14.5491 27.1699 12.3072 26.2571 11.4791ZM15.5604 20.5311V14.652L20.561 17.5001L15.5604 20.5311Z" fill="white"></path>
|
||
<path id="Vector_2_Youtube" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
|
||
</g>
|
||
<defs>
|
||
<clipPath id="clip0_1717_1101">
|
||
<rect width="35" height="35" fill="white"></rect>
|
||
</clipPath>
|
||
</defs>
|
||
</svg>
|
||
</a></li>
|
||
</ul>
|
||
</div>
|
||
<div class="col-lg-3 col-12">
|
||
<p class="address_footer text-white">National Library of Medicine<br />
|
||
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
|
||
Bethesda, MD 20894</a></p>
|
||
</div>
|
||
<div class="col-lg-3 col-12 centered-lg">
|
||
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
|
||
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
|
||
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
|
||
</div>
|
||
<div class="col-lg-3 col-12 centered-lg">
|
||
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
|
||
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
|
||
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
|
||
</div>
|
||
</div>
|
||
<div class="row">
|
||
<div class="col-lg-12 centered-lg">
|
||
<nav class="bottom-links">
|
||
<ul class="mt-3">
|
||
<li>
|
||
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.nih.gov/">NIH</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
|
||
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
|
||
</div>
|
||
</div>
|
||
<!--/.footer-->
|
||
<p class="last-updated small">Last updated: 2024-03-22T14:10:13Z</p>
|
||
</div>
|
||
<!--/.page-->
|
||
</div>
|
||
<!--/.wrap-->
|
||
<span class="PAFAppResources"></span>
|
||
|
||
|
||
</div><!-- /.twelve_col -->
|
||
</div>
|
||
<!-- /.grid -->
|
||
|
||
|
||
|
||
<!-- usually for JS scripts at page bottom -->
|
||
<span class="pagefixtures"></span>
|
||
|
||
|
||
<!-- CE8B5AF87C7FFCB1_0191SID /projects/staticsites/genbank/genbank@2.21 portal105 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
|
||
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
|
||
|
||
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/js/3879255/4121861/1490097/4087685.js" snapshot="genbank"></script></body>
|
||
</html>
|
||
|