nih-gov/www.ncbi.nlm.nih.gov/sra/docs/submitformats/index.html

1150 lines
60 KiB
HTML
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<!-- AppResources meta begin -->
<meta name="paf-app-resources" content="" />
<!-- AppResources meta end -->
<!-- TemplateResources meta begin -->
<meta name="paf_template" content="StdNCol" />
<!-- TemplateResources meta end -->
<!-- Page meta begin -->
<!-- Page meta end -->
<!-- Logger begin -->
<meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="sra" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="sra-custom-page" />
<!-- Logger end -->
<title>File Format Guide</title>
<!-- PageFixtures headcontent begin -->
<meta name="cms-local-nav-url" content="https://cms.ncbi.nlm.nih.gov//sra/docs/sratabnav" />
<!-- PageFixtures headcontent end -->
<!-- AppResources external_resources begin -->
<script type="text/javascript" src="/core/jig/1.14.8/js/jig.min.js"></script>
<!-- AppResources external_resources end -->
<!-- Page headcontent begin -->
<meta name="subsite" content="sra" />
<meta name="path" content="sra/docs/submitformats" />
<meta name="modified" content="2019-09-20T18:35:04Z" />
<link type="text/css" rel="stylesheet" href="/core/assets/sra/css/SraDoc.css" />
<!-- Page headcontent end -->
<!-- PageFixtures resources begin -->
<link xmlns="http://www.w3.org/1999/xhtml" type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218191/css/4207974/4206132.css" xml:base="http://127.0.0.1/sites/static/header_footer" />
<!-- PageFixtures resources end -->
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8E9A0D7CA138E100000000008D0077.m_6" />
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4217302/css/4121862/3974050/3917732/251717/4082496/14534/45193/3534283/4128070/4082512/4005757/4062871.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4217302/css/3529741/3529739.css" media="print" /></head>
<body class=" sra-custom-page">
<div class="grid">
<div class="col twelve_col nomargin shadow">
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
<div class="sysmessages">
<noscript>
<p class="nojs">
<strong>Warning:</strong>
The NCBI web site requires JavaScript to function.
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
</p>
</noscript>
</div>
<!--/.sysmessage-->
<div class="wrap">
<div class="page">
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" id="universal_header" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="usa-banner">
<div class="usa-accordion">
<header class="usa-banner-header">
<div class="usa-grid usa-banner-inner">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
<p>An official website of the United States government</p>
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
<span class="usa-banner-button-text">Here's how you know</span>
</button>
</div>
</header>
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
<div class="usa-banner-guidance-gov usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
<div class="usa-media_block-body">
<p>
<strong>The .gov means it's official.</strong>
<br />
Federal government websites often end in .gov or .mil. Before
sharing sensitive information, make sure you're on a federal
government site.
</p>
</div>
</div>
<div class="usa-banner-guidance-ssl usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
<div class="usa-media_block-body">
<p>
<strong>The site is secure.</strong>
<br />
The <strong>https://</strong> ensures that you are connecting to the
official website and that any information you provide is encrypted
and transmitted securely.
</p>
</div>
</div>
</div>
</div>
</section>
<div class="usa-overlay"></div>
<header class="ncbi-header" role="banner" data-section="Header">
<div class="usa-grid">
<div class="usa-width-one-whole">
<div class="ncbi-header__logo">
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
</a>
</div>
<div class="ncbi-header__account">
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
<span class="fa fa-user" aria-hidden="true">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
<g style="fill: #fff">
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
</g>
</svg>
</span>
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
<span class="sr-only">Show account info</span>
</button>
</div>
<div class="ncbi-popup-anchor">
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
<div class="ncbi-popup-head">
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
<span class="fa fa-times">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
</svg>
</span>
<span class="usa-sr-only">Close</span></button>
<h4>Account</h4>
</div>
<div class="account-user-info">
Logged in as:<br />
<b><span class="username" id="uname_long">username</span></b>
</div>
<div class="account-links">
<ul class="usa-unstyled-list">
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
</ul>
</div>
</div>
</div>
</div>
</div>
</header>
<div role="navigation" aria-label="access keys">
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
</div>
<section data-section="Alerts">
<div class="ncbi-alerts-placeholder"></div>
</section>
</div>
</div>
<!--/.header-->
<div class="header">
<div class="res_logo"><h1 class="res_name"><a href="/sra/" title="SRA home">SRA</a></h1><h2 class="res_tagline">SRA</h2></div>
<div class="search"><form method="get" action="/sra/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="sra" selected="selected">SRA</option><option value="nuccore">Nucleotide</option><option value="books">Books</option><option value="pubmed" class="last">PubMed</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search SRA" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class=" inline_list searchlinks"><li>
<a href="/sra/advanced/">Advanced</a>
</li></ul></div>
</div>
<div class="nav_and_browser">
<div class="localnav"><ul class="jig-ncbilocalnav">
<li>
<a title="Getting Started" href="/sra/docs/">Getting Started</a></li>
<li>
<a title="SRA Submission Quick Start" href="/sra/docs/submit">Submission Quick Start</a></li>
<li>
<a title="SRA Search and Download Guide" href="/sra/docs/sradownload">Search and Download</a>
</li>
<li>
<a title="How to use SRA in the cloud" href="/sra/docs/sra-cloud">SRA in the Cloud</a>
</li>
<li>
<a title="Archive Documentation" href="/sra/docs/sra-data-storage-model/">Archive Documentation</a>
<ul>
<li><a title="Data Storage Model" href="/sra/docs/sra-data-storage-model/">Data Storage Model</a></li>
<li><a title="Root Cause Analysis: Summary and Report" href="/sra/docs/rca/">Root Cause Analysis: Summary and Report</a></li>
</ul>
</li>
</ul></div>
</div>
<!-- was itemctrl -->
<div class="container">
<div id="maincontent" class="content col twelve_col last">
<div class="col1">
<h1 id="file-format-guide">File Format Guide</h1>
<div class="toc">
<ul>
<li><a href="#introduction">Introduction</a></li>
<li><a href="#bam-files">BAM files</a></li>
<li><a href="#cram-files">CRAM files</a></li>
<li><a href="#sff-files">SFF files</a></li>
<li><a href="#hdf5-files">HDF5 files</a><ul>
<li><a href="#pacbio">PacBio</a></li>
<li><a href="#minion-oxford-nanopore">MinION Oxford Nanopore</a></li>
<li><a href="#hdf5-tools">HDF5 tools</a></li>
</ul>
</li>
<li><a href="#fastq-files">FASTQ files</a><ul>
<li><a href="#pairedend-fastq">Paired-end FASTQ</a></li>
<li><a href="#platform-specific-fastq-files">Platform specific FASTQ files</a><ul>
<li><a href="#454-fastq">454 fastq</a></li>
<li><a href="#ion-torrent-fastq">Ion Torrent fastq</a></li>
<li><a href="#recent-illumina-fastq">Recent Illumina fastq</a></li>
<li><a href="#older-illumina-fastq">Older Illumina fastq</a></li>
<li><a href="#qiime-demultiplexed-sequences-in">QIIME de-multiplexed sequences in fastq</a></li>
<li><a href="#pacbio-ccs-circular-consensus-se">PacBio CCS (Circular Consensus Sequence) or RoI (Read of Insert) read</a></li>
<li><a href="#pacbio-ccs-subread">PacBio CCS subread</a></li>
<li><a href="#helicos-fastq-with-a-fixed-ascii">Helicos fastq with a fixed ASCII-based Phred value for quality</a></li>
<li><a href="#fasta-files">FASTA files</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#fasta-with-qual-file-pairs">FASTA with QUAL file pairs</a></li>
<li><a href="#csfasta-with-qual-files">CSFASTA with QUAL Files</a></li>
<li><a href="#legacy-formats">Legacy Formats</a><ul>
<li><a href="#srf-files">SRF files</a></li>
<li><a href="#native-illumina">Native Illumina</a></li>
<li><a href="#qseq">QSEQ</a></li>
</ul>
</li>
<li><a href="#machine-specific-information">Machine Specific Information</a><ul>
<li><a href="#illumina">Illumina</a></li>
<li><a href="#solid">SOLiD</a></li>
<li><a href="#roche-454-formerly-life-sciences">Roche 454 (formerly Life Sciences)</a></li>
<li><a href="#iontorrent_1">IonTorrent</a></li>
<li><a href="#pacbio_1">PacBio</a></li>
<li><a href="#minion-oxford-nanopore_1">MinION Oxford Nanopore</a></li>
<li><a href="#helicos_1">Helicos</a></li>
<li><a href="#capillary-sanger">Capillary (Sanger)</a></li>
<li><a href="#completegenomics">CompleteGenomics</a></li>
</ul>
</li>
<li><a href="#contact-sra">Contact SRA</a></li>
</ul>
</div>
<h2 id="introduction">Introduction</h2>
<p>This page reviews the submission file formats currently supported by the Sequence Read Archives (SRA) at NCBI, EBI, and DDBJ, and gives guidance to submitters about current and future file formats and policies regarding SRA submissions.</p>
<p>Some things to keep in mind:</p>
<ul>
<li>The SRA is a <strong>raw data</strong> archive, and requires per-base quality scores for all submitted data. Therefore, FASTA and other sequence-only formats are not sufficient for submission! FASTA can, however, be submitted as a reference sequence(s) for BAM files or as part of a FASTA/QUAL pair (see below).</li>
<li>SRA accepts binary files such as <strong>BAM</strong>, <strong>SFF</strong>, and <strong>HDF5</strong> formats and text formats such as <strong>FASTQ</strong>.</li>
</ul>
<h2 id="bam-files">BAM files</h2>
<p>Binary Alignment/Map files (BAM) represent one of the preferred SRA submission formats. BAM is a compressed version of the Sequence Alignment/Map (SAM)
format (see <a title="SAMv1" href="https://samtools.github.io/hts-specs/SAMv1.pdf">SAMv1</a> (.pdf)). BAM files can be decompressed to a human-readable
text format (SAM) using SAM/BAM-specific utilities
(e.g. <a target="_blank" title="samtools home page" href="http://www.htslib.org">samtools <img src="/core/assets/sra/images/offsite.png" alt="Different site " width="15" /></a>) and can contain unaligned sequences as well. SRA recommends aligning to an unmodified known reference,
if possible, to enable subsequent users to view the alignments in the Sequence Viewer or to compare the alignments with
other alignments on the same reference.</p>
<p>SAM is a tab-delimited format including both the raw read data and information about the
alignment of that read to a known reference sequence(s). There are two main sections in a SAM file, the header and the alignment (sequence read)
sections, each of which are described below. Note that this documentation will focus on a description of the SAM format with respect
to submission of BAM files to the SRA (i.e. SRA doe not accept SAM files for submission). A more comprehensive discussion of the
format specifications can be found at the <a target="_blank" title="samtools file formats, new window" href="http://www.htslib.org/doc/#file-formats">samtools <img src="/core/assets/sra/images/offsite.png" alt="Different site " width="15" /></a> website.</p>
<p>SAM Header Example:</p>
<div class="gray-box"><span class="code">
@HD    VN:1.4    SO:coordinate<br />
@SQ    SN:CHROMOSOME_I    LN:15072423<br />
UR:ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/invertebrates/Caenorhabditis_elegans/<br />
WBcel215/Primary_Assembly/assembled_chromosomes/FASTA/chrI.fa.gz    AS:ce10    <br />
SP:Caenorhabditis elegans<br />
 <br />
@SQ    SN:CHROMOSOME_II    LN:15279345    <br />
UR:ftp://ftp.ncbi.nlm.nih.gov/genbank/genomes/Eukaryotes/invertebrates/Caenorhabditis_elegans/<br />
WBcel215/Primary_Assembly/assembled_chromosomes/FASTA/chrII.fa.gz    
AS:ce10    <br />
SP:Caenorhabditis elegans
 <br />
 <br />
@RG    ID:1    PL:ILLUMINA    LB:C_ele_05    DS:WGS of C elegans    PG:BamIndexDecoder<br />
@PG    ID:bwa    PN:bwa    VN:0.5.10-tpx
</span></div>
<p>Ideally, the <strong><code>SN</code></strong> value should be a versioned accession (e.g., <strong><code>NC_003279.7</code></strong>, rather than <strong><code>CHROMOSOME_I</code></strong>). This will allow
the SRA to unambiguously identify the reference sequence(s) and process the BAM file with minimal intervention. Otherwise, submitters
are strongly encouraged to include the "URL/URI" that can be used to obtain the reference sequence(s) and <strong><code>AS</code></strong>
tags to clearly define which assembly has been used (as above).</p>
<p>If the data are instead aligned to a local or submitter-defined set of references (including any modifications to accessioned assemblies),
then the submitter must include a <strong><code>reference fasta</code></strong> along with each submitted bam file. Note: the FASTA header line(s) MUST match
the <strong><code>SN</code></strong> names provided in the BAM file exactly.</p>
<p>Deviation from these recommended practices will require manual intervention by SRA staff
in order to process a BAM file and can delay completion of a submission and acquisition of accession numbers.</p>
<p>SAM Alignment Example: </p>
<div class="gray-box"><span class="code">
3658435    145    CHROMOSOME_I    1    0    100M    CHROMOSOME_II    2716898    0    <br />
GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT<br />
AAGCCT    <br />
@CCC?:CCCCC@CCCEC&gt;AFDFDBEGHEAHCIGIHHGIGEGJGGIIIHFHIHGF@HGGIGJJJJJIJJJJJJJJJJJJJJJJJJJJJHHHHHFF<br />
FFFCCC    RG:Z:1    NH:i:1    NM:i:0<br />
    <br />
5482659    65    CHROMOSOME_I    1    0    100M    CHROMOSOME_II    11954696    0    <br />
GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT<br />
AAGCCT    <br />
CCCFFFFFHHGHGJJGIJHIJIJJJJJIJJJJJIJJGIJJJJJIIJIIJFJJJJJFIJJJJIIIIGIIJHHHHDEEFFFEEEEEDDDDCDCCCA<br />
AA?CC:    RG:Z:1    NH:i:1    NM:i:0
</span></div>
<p>The header and alignment section are internally consistent: each aligned read has an RNAME
(reference sequence name, 3<sup>rd</sup> field) that matches an SN tag value from the header (e.g., <strong><code>CHROMOSOME_I</code></strong>), and,
if provided, the alignment read group optional field (<strong><code>RG:Z:</code></strong>) is consistent with the read group ID in the header (<strong><code>1</code></strong>). It is
also important to ensure that the FLAG fields (2<sup>nd</sup> field in each line) are correctly set for the data.
The SRA pipeline will attempt to resolve incorrect FLAG values, but sufficiently incorrect values can lead to processing errors.
The SRA does not archive optional and non-standard tags/field values contained in the alignment section. However, the entire header
section of the bam file is retained. Additionally, although the SAM format allows for an equal sign (<code>=</code>) in the sequence field to
represent a match to the reference sequence or only an asterisk (<code>*</code>) in both the sequence and quality fields,
the SRA processing software does not recognize either of these formats.</p>
<p>Please note that unexpected notations used to indicated paired reads can lead to failure to recognize the pairs and an
improper SRA archive (i.e. paired reads are treated like fragments). For example, using <strong><code>:0</code></strong> and <strong><code>:1</code></strong> at the end of the
read names is atypical and is currently not recognized as an indication of read 1 and 2 in a pair. It would be better to exclude
these notations and provide the two reads with the same names. Expected notations for particular platforms will work. For example,
Illumina reads with <strong><code>/1</code></strong> or <strong><code>/2</code></strong> appended is an expected notation. Further, neglecting to set the proper bits for paired reads in the
SAM/BAM flags (e.g. multi-segment template 1-bit, first segment 64-bit, and last segment 128-bit) or splitting paired reads into separate
bam files can result in an improper SRA archive or failure to generate the SRA archive.</p>
<div class="warning-message"> <img src="/core/assets/sra/images/tack.png" alt="Tack" height="20" /> When submitting BAM files of
aligned reads to the SRA you must also specify an assembly - the reference genome that your reads were aligned against.
You can identify your reference assembly by its name or accession from
<a title="the NCBI Assembly database, new window" target="_blank" href="/assembly">the NCBI Assembly database</a>.
<a title="University of California Santa Cruz (UCSC) Genome Browser, new window" target="_blank" href="https://genome.ucsc.edu/">UCSC</a> and
<a title="Ensembl, new window" target="_blank" href="http://www.ensembl.org/index.html">Ensembl</a> assembly names may also
be used. If the assembly is not available from a public repository you will need to submit your own (<span class="bold">local</span>)
assembly in FASTA format (<span class="bold">reference_fasta</span>) along with your BAM file.
</div>
<h2 id="cram-files">CRAM files</h2>
<p>Another acceptable SRA submission format is the CRAM format (see <a title="CRAMv3" href="http://samtools.github.io/hts-specs/CRAMv3.pdf">CRAMv3</a>(.pdf)).
Files received in this format are converted to the BAM
format for processing. The references provided in this format are treated in the same manner as
BAM references with the added possibility of a check against the European Nucleotide Archive (ENA)
<a target="_blank" title="CRAM reference registry" href="https://www.ebi.ac.uk/ena/software/cram-reference-registry">CRAM reference registry</a>. </p>
<h2 id="sff-files">SFF files</h2>
<p>In the absence of a BAM file, Standard Flowgram Files or SFF is the preferred input format for 454 Life Sciences (now part of Roche) data; IonTorrent data can also be submitted as SFF. Extensive technical details about the format can be obtained <a target="_blank" title="SFF files, new window" href="/Traces/trace.cgi?view=doc_formats#sffhere">here <img src="/core/assets/sra/images/offsite.png" alt="Different site " width="15" /></a>.</p>
<div class="warning-message"> <img src="/core/assets/sra/images/tack.png" alt="Tack" height="20" /> Submitters of SFF data should ensure
that the data are demultiplexed (if barcoded) this is particularly common in pyrotag / 16S rRNA amplicon sequencing.
</div>
<h2 id="hdf5-files">HDF5 files</h2>
<p>HDF5 is a data model, library, and file format for storing and managing data.
The SRA accepts <code>bas.h5</code> and <code>bax.h5</code> file submissions for PacBio-based submission and <code>.fast5</code> files for submissions related to MinION Oxford Nanopore.</p>
<h3 id="pacbio">PacBio</h3>
<p>Submission of data from the
RS II instrument requires one (1) <code>bas.h5</code> file and three (3) <code>bax.h5</code> files.
Do not link more than one PacBio RS II to an SRA run and please do not change the <code>bax.h5</code> files names from
those indicated in the <code>bas.h5</code> file.</p>
<p>Depending on the platform used for your PacBio sequencing project, the following data files with respective
extensions are produced and required for SRA submission.</p>
<table>
<colgroup>
<col width="50%" />
<col width="50%" />
</colgroup>
<thead>
<tr class="header">
<th>PacBio RS Platform</th>
<th>Data Files Delivered</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>PacBio RS</td>
<td><ol>
<li>xxxx.metadata.xml (optional but desirable)</li>
<li>xxxx.bas.h5</li>
</ol></td>
</tr>
<tr class="even">
<td>PacBio RS II</td>
<td><ol>
<li>xxxx.metadata.xml (optional but desirable)</li>
<li>xxxx.bas.h5 (optional but desirable)</li>
<li>xxxx.1.bax.h5</li>
<li>xxxx.2.bax.h5</li>
<li>xxxx.3.bax.h5</li>
</ol></td>
</tr>
</tbody>
</table>
<p>Please be sure to list the files for each SMRT Cell in a separate Run or on a separate row of your sra_metadata sheet.</p>
<p>PacBio documentation on <strong><code>bax.h5 / bas.h5</code></strong> format: <a target="_blank" title="bas.h5 Reference Guide" href="/core/assets/sra/files/bas.h5ReferenceGuide.pdf">bas.h5ReferenceGuide.pdf</a>.</p>
<h3 id="minion-oxford-nanopore">MinION Oxford Nanopore</h3>
<p>In this case, there are 1-3 sequences per fast5 HDF file (one spot of information) and the entire set of <strong><code>fast5</code></strong>
files should be submitted in a <strong><code>tar.gz</code></strong> file. You must submit the fast5 files generated after base calling.</p>
<p>Learn more about this platform at <a target="_blank" title="Oxford Nanopore Technologies" href="https://www.nanoporetech.com/">Oxford Nanopore Technologies <img src="/core/assets/sra/images/offsite.png" alt="Different site " width="15" /></a> website.</p>
<h3 id="hdf5-tools">HDF5 tools</h3>
<p>HDF5 tools: <a target="_blank" title="HDF5 tools" href="http://www.hdfgroup.org/products/hdf5_tools">http://www.hdfgroup.org/products/hdf5_tools <img src="/core/assets/sra/images/offsite.png" alt="Different site " width="15" /></a></p>
<h2 id="fastq-files">FASTQ files</h2>
<p>Fastq consists of a defline that contains a read identifier and possibly other information, nucleotide
base calls, a second defline, and per-base quality scores, all in text form. There are many variations.</p>
<p>The following terms and formats are defined in general:</p>
<ul>
<li>Identifier and other information: text string terminated by white space.</li>
<li>Bases: fastq sequence should contain standard base calls (ACTGactg) or unknown bases (Nn) and can vary in length.</li>
<li>
<p>Qualities options:</p>
<table>
<thead>
<tr>
<th></th>
<th></th>
</tr>
</thead>
<tbody>
<tr>
<td>Decimal-encoding, space-delimited</td>
<td><code>[0-9]+ | &lt;quality&gt;\s[0-9]+</code></td>
</tr>
<tr>
<td>Phred-33 ASCII</td>
<td><code>[\!\"\#\$\%\&amp;\'\(\)\*\+,\-\.\/0-9:;&lt;=&gt;\?\@A-I]+</code></td>
</tr>
<tr>
<td>Phred-64 ASCII</td>
<td><code>[\@A-Z\[\\\]\^_`a-h]+</code></td>
</tr>
</tbody>
</table>
<p>Quality string length should be equal to sequence length.</p>
<p>In a limited set of cases, log odds or non-ASCII numerical quality values will succeed during an SRA submission.</p>
</li>
</ul>
<p>Files from various platforms employing this format are acceptable:</p>
<div class="gray-box"><span class="code">
@&lt;identifier and expected information&gt;<br />
&lt;sequence&gt;<br />
+&lt;identifier and other information OR empty string&gt;<br />
&lt;quality&gt;
</span></div>
<p>Where each instance of Identifier, Bases, and Qualities are newline-separated.
Extra information added beyond the <code>&lt;identifier and expected information&gt;</code> examples is likely to be discarded/ignored.</p>
<p>As indicated above, the Qualities string can be space-separated numeric Phred scores or an ASCII string of the Phred
scores with the ASCII character value = Phred score plus an offset constant used to place the ASCII characters in the printable
character range. There are 2 predominant offsets: 33 (0 = !) and 64 (0=@).</p>
<h3 id="pairedend-fastq">Paired-end FASTQ</h3>
<p>Although generally the case, there are some instances where paired reads are not a forward read paired with a reverse read.</p>
<p>Paired-end data submitted in FASTQ format should be submitted in one of two formats:</p>
<ol>
<li>As separate files for forward and reverse reads, in which the reads are in the same order.</li>
<li>As interleaved, or "8-line", FASTQ, in which forward and reverse reads alternate in the file and are in order (i.e., read "1F", followed by read "1R", then read "2F", then "2R").</li>
</ol>
<p>SRA supports the following forward/reverse read indicators: <code>'/1'</code> and <code>'/2'</code> at the end of the read name or newer Illumina style <code>'1:Y:18:ATCACG'</code> and <code>'2:Y:18:ATCACG'</code>.</p>
<div class="warning-message"> <img src="/core/assets/sra/images/tack.png" alt="Tack" height="20" /> Concatenated FASTQ (in which all forward reads are followed by all reverse reads) is not supported.
</div>
<h3 id="platform-specific-fastq-files">Platform specific FASTQ files</h3>
<h4 id="454-fastq"><a data-jig="ncbitoggler" href="#454">454 fastq</a></h4>
<div id="454">
<div class="gray-box"><span class="code">@&lt;454_universal_accession&gt;</span></div>
<p>Under Roche 454, SRA accepts both 'pre-split' or 'post-split' 454 fastq sequences. Paired 'post-split' 454 reads must be provided in separate
files or in the interleaved format. 'Split' means the 454 linker has been located/removed and used to split the sequence into biological
read pairs (and all other technical reads have been removed).</p>
</div>
<h4 id="ion-torrent-fastq"><a data-jig="ncbitoggler" href="#iontorrent">Ion Torrent fastq</a></h4>
<div id="iontorrent">
<div class="gray-box"><span class="code">@&lt;Run_ID&gt;:&lt;Chip_Row_Coordinate&gt;:&lt;Chip_Column_Coordinate&gt;</span></div>
<p>In the same manner as Roche 454, SRA only accepts 'pre-split' Ion Torrent sequences or 'post-split' Ion Torrent single read fragments in a fastq form. Paired 'post-split' Ion Torrent reads will require submission in a BAM file. 'Split' means the Ion Torrent linker has been located/removed and used to split the sequence into biological read pairs (and all other technical reads have been removed).</p>
</div>
<h4 id="recent-illumina-fastq"><a data-jig="ncbitoggler" href="#recentIllum&quot;">Recent Illumina fastq</a></h4>
<div id="recentIllum">
<div class="gray-box"><span class="code">
@&lt;instrument&gt;:&lt;run number&gt;:&lt;flowcell ID&gt;:&lt;lane&gt;:&lt;tile&gt;:&lt;xpos&gt;:&lt;y-pos&gt; &lt;read&gt;:&lt;is filtered&gt;:&lt;control number&gt;:&lt;index&gt;
</span></div>
<p><code>&lt;index&gt;</code> values for Illumina fastq can be barcodes.</p>
</div>
<h4 id="older-illumina-fastq"><a data-jig="ncbitoggler" href="#oldIllum">Older Illumina fastq</a></h4>
<div id="oldIllum">
<div class="gray-box"><span class="code">@&lt;machine_id&gt;:&lt;lane&gt;:&lt;tile&gt;:&lt;x_coord&gt;:&lt;y_coord&gt;#&lt;index&gt;/&lt;read&gt;
</span></div>
<p><code>&lt;index&gt;</code> values for Illumina fastq can be barcodes.</p>
</div>
<h4 id="qiime-demultiplexed-sequences-in"><a data-jig="ncbitoggler" href="#QIIME">QIIME de-multiplexed sequences in fastq</a></h4>
<div id="QIIME">
<div class="gray-box"><span class="code">
@&lt;SampleID-based_identifier&gt; &lt;Original_information&gt; orig_bc=&lt;original_barcode&gt; new_bc=&lt;corrected_barcode&gt; bc_diffs=&lt;0|1&gt;
</span></div>
</div>
<h4 id="pacbio-ccs-circular-consensus-se"><a data-jig="ncbitoggler" href="#CCS">PacBio CCS (Circular Consensus Sequence) or RoI (Read of Insert) read</a></h4>
<div id="CCS">
<div class="gray-box"><span class="code">
@&lt;MovieName&gt;/&lt;ZMW_number&gt;
</span></div>
</div>
<h4 id="pacbio-ccs-subread"><a data-jig="ncbitoggler" href="#CCSsubread&quot;">PacBio CCS subread</a></h4>
<div id="CCSsubread">
<div class="gray-box"><span class="code">
@&lt;MovieName&gt; /&lt;ZMW_number&gt;/&lt;subread-start&gt;_&lt;subread-end&gt;
</span></div>
</div>
<h4 id="helicos-fastq-with-a-fixed-ascii"><a data-jig="ncbitoggler" href="#helicos">Helicos fastq with a fixed ASCII-based Phred value for quality</a></h4>
<div id="helicos">
<div class="gray-box"><span class="code">@VHE-242383071011-15-1-0-2
</span></div>
<p>Characteristic use of a quality <code>'/'</code>, which gives a Phred value of 14.</p>
<p>The native format for helicos is fasta so converting to fastq requires creating a default quality score. The default value selected by the SRA team is '14'.</p>
</div>
<h4 id="fasta-files"><a data-jig="ncbitoggler" href="#fasta">FASTA files</a></h4>
<div id="fasta">
<p><strong>Fasta</strong> files adhering to the definition lines described in the <strong>fastq</strong> section are acceptable, too, although <strong>fastq</strong> is preferred
(a file type of <strong>fastq</strong> should still be specified). The SRA assigns a default quality value of 30 in this case and expects this format:</p>
<div class="gray-box"><span class="code">
&gt;(identifier and other information)<br />
&lt;sequence&gt;
</span></div>
</div>
<h2 id="fasta-with-qual-file-pairs">FASTA with QUAL file pairs</h2>
<p><strong>Fasta</strong> files may be submitted with corresponding <strong>qual</strong> files, too. These are recognized in the SRA data
processing pipeline as equivalent to <strong>fastq</strong> and should be specified as <strong>fastq</strong> when submitting the data files.</p>
<p>Files from some platforms (mostly older Illumina and Roche 454) employing this format are acceptable and the
entries in the pair of files should look like:</p>
<p>File 1 </p>
<div class="gray-box"><span class="code">
&gt;READNAME<br />
BASES<br />
</span></div>
<p>File 2 </p>
<div class="gray-box"><span class="code">
&gt;READNAME<br />
QUALITIES
</span></div>
<p>Where READNAME must be identical between files for a given read, and QUALITIES are generally in whitespace-separated decimal values.</p>
<p>Note the following guidelines for FASTA/QUAL pairs of files:</p>
<p>In a given pair of files, there must be the same number of reads in both. For a given read, there must be the same number of BASES and QUALITIES, i.e., if the BASES are trimmed to remove barcodes, then the same scores must be removed from the QUALITIES, etc.</p>
<h2 id="csfasta-with-qual-files">CSFASTA with QUAL Files</h2>
<p>The files have an optional header that is identified by lines that begin with the hash/pound/number sign (#). The HEADER can be defined as:</p>
<div class="gray-box"><span class="code">
# &lt;date&gt; &lt;path&gt; [--flag]* --tag &lt;tag&gt; --minlength=&lt;length&gt; --prefix=&lt;prefix&gt; &lt;path&gt;<br />
# Cwd: &lt;path&gt;<br />
# Title: &lt;flowcell&gt;
</span></div>
<p>The permissible CSFASTA format is as follows: </p>
<div class="gray-box"><span class="code">
#HEADER (multiple lines)<br />
&gt;TAGNAME<br />
BASES
</span></div>
<p>The permissible QUAL format is as follows: </p>
<div class="gray-box"><span class="code">
#HEADER (multiple lines)<br />
&gt;TAGNAME<br />
QUALITIES
</span></div>
<p>As with FASTA/QUAL pairs, there are several rules for pairs of CSFASTA/QUAL files. TAGNAME must be identical between files for a given read, and QUALITIES are generally in whitespace-separated decimal values.</p>
<p>Note the following guidelines for CSFASTA/QUAL pairs of files:</p>
<p>In a given pair of files, there must be the same number of reads in both. For a given read, there must be the same number of color space digits and QUALITIES, i.e., the BASES line is typically 1 character longer than the number of QUALITIES (due to the color space indexing base that begins each BASES string). HEADER must be identical between paired files.</p>
<p>Also see <a title="SOLiD™ Data Format and File Definitions Guide" href="http://tools.thermofisher.com/content/sfs/manuals/cms_058717.pdf">SOLiD™ Data Format and File Definitions Guide</a> (.pdf)</p>
<h2 id="legacy-formats">Legacy Formats</h2>
<p>These formats are still accepted by SRA, but are considered out-of-date and not recommended for submission. If you are able to update your files to a more common format please do so before submitting to SRA.</p>
<h3 id="srf-files">SRF files</h3>
<p>SRF is a generic format for DNA sequence data. This format has sufficient flexibility to store data from current and future DNA sequencing technologies. This is a single input file format for all downstream applications and a read lookup index enabling downstream formats to reference reads without duplication of all of the read specific information.</p>
<p>Sequence Read Format (SRF) homepage: <a target="_blank" title="Sequence Read Format (SRF) homepage" href="http://srf.sourceforge.net/">http://srf.sourceforge.net/ <img src="/core/assets/sra/images/offsite.png" alt="Different site " width="15" /></a>.</p>
<h3 id="native-illumina">Native Illumina</h3>
<p>Submitters may submit native data from the primary analysis output of the Illumina GA.</p>
<p>The filetype is <strong><code>Illumina_native</code></strong> and constituent files for a run should be tarred together into a single tar file.</p>
<p>Illumina GA readname can be defined as follows: </p>
<div class="gray-box"><span class="code">
&lt;flowcell&gt; = [a-zA-Z0-9_-]{2}+<br />
       &lt;lane&gt; = 1..8<br />
       &lt;title&gt; = 1..1024<br />
            &lt;X&gt; = 1..4096<br />
            &lt;Y&gt; = 1..4096<br />
&lt;sep&gt; ::= [_\t]<br />
READNAME ::= [&lt;flowcell&gt;&lt;sep&gt; | s_]&lt;lane&gt;&lt;sep&gt;&lt;tile&gt;&lt;sep&gt;&lt;x&gt;&lt;sep&gt;&lt;y&gt;
</span></div>
<p>Within a related set of files, reads are grouped by tile. Reads should be fixed length, and the number of quality scores and bases is the same in each.</p>
<p>Allowed characters:</p>
<p>BASES: <code>AaCcTtGgNn</code></p>
<p>QUALITIES: <code>\!\"\#\$\%\&amp;\'\(\)\*\+,\-\.\/0-9:;&lt;=&gt;\?\@A-I]+</code> or <code>\@A-Z\[\\\]\^_`a-h]+</code></p>
<h3 id="qseq">QSEQ</h3>
<p>The basecalling program Bustard emits a <code>_qseq.txt</code> file for each lane (two files for mate pairs). Paired-end data are presented in the orientation in which they were sequenced (5'-3'&amp; 3'-5').</p>
<p>Each read is contained on a single line with tab separators in the following format:</p>
<ul>
<li>Machine name: Unique identifier of the sequencer.</li>
<li>Run number: Unique number to identify the run on the sequencer.</li>
<li>Lane number: Positive Integer (currently 1-8).</li>
<li>Tile number: Positive Integer.</li>
<li>X coordinate of the spot: Integer (can be negative).</li>
<li>Y coordinate of the spot: Integer (can be negative).</li>
<li>Index: Positive Integer (no indexing should have a value of 1).</li>
<li>Read Number: 1 for single reads; 1 or 2 for paired-ends.</li>
<li>Sequence (BASES)</li>
<li>Quality: the calibrated quality string (QUALITIES).</li>
<li>Filter: Did the read pass filtering? 0 - No, 1 - Yes.</li>
</ul>
<h2 id="machine-specific-information">Machine Specific Information</h2>
<p>File types accepted by platform in approximate order of preference (formats that are least desirable marked with '*', those with uncertain outcome marked with '?'):</p>
<h3 id="illumina">Illumina</h3>
<p>bam, fastq, qseq, fasta+qual<sup>*?</sup>, native<sup>*</sup>, srf<sup>*?</sup></p>
<h3 id="solid">SOLiD</h3>
<p>bam, csfasta + QV.qual, srf<sup>*?</sup></p>
<h3 id="roche-454-formerly-life-sciences">Roche 454 (formerly Life Sciences)</h3>
<p>bam, sff, fastq, fasta+qual<sup>*?</sup></p>
<h3 id="iontorrent_1">IonTorrent</h3>
<p>bam, sff, fastq, fasta+qual<sup>*?</sup></p>
<h3 id="pacbio_1">PacBio</h3>
<p>bam, hdf5, fastq</p>
<h3 id="minion-oxford-nanopore_1">MinION Oxford Nanopore</h3>
<p>hdf5, fastq</p>
<h3 id="helicos_1">Helicos</h3>
<p>bam, fastq</p>
<h3 id="capillary-sanger">Capillary (Sanger)</h3>
<p>bam, fastq<sup>*?</sup></p>
<h3 id="completegenomics">CompleteGenomics</h3>
<p>native, bam<sup>*</sup></p>
<p>Complete Genomics format see <a target="_blank" title="CG Data File Formats" href="http://www.completegenomics.com/customer-support/documentation/100357139-2">CG Data File Formats <img src="/core/assets/sra/images/offsite.png" alt="Different site " width="15" /></a>. This format requires providing tarred versions of the ASM, LIB, and MAP sub-directories for a successful submission to take place. Additionally, processing of reference sequences occurs in the same manner as for BAM and CRAM files. For this format, please contact SRA prior to submission.</p>
<hr />
<h2 id="contact-sra">Contact SRA</h2>
<p>Contact SRA staff for assistance at <a href="mailto:sra@ncbi.nlm.nih.gov">sra@ncbi.nlm.nih.gov</a></p>
</div>
<!--/.col1-->
<div class="col2">
<div id="shared-content-1">
<h2 data-heading="h2" data-no-toc="true">Getting Started</h2>
<ul>
<li><a title="SRA Getting Started" href="/sra/docs/">Getting Started</a></li>
</ul>
<h2 data-heading="h2" data-no-toc="true">Submitting Data to SRA</h2>
<h3 class="nav" data-heading="h3" data-no-toc="true">General</h3>
<ul>
<li><a title="SRA Submission Quick Start" href="/sra/docs/submit">Quick Start</a></li>
<li><a title="How to submit BioProject and BioSample for SRA" href="/sra/docs/submitbio">BioProject &amp; BioSample</a></li>
<li><a title="SRA file format guide" href="/sra/docs/submitformats">File Format Guide</a></li>
<li><a title="Understanding SRA Metadata and Submission Overview" href="/sra/docs/submitmeta">SRA Metadata Overview</a></li>
<li><a title="SRA File Upload" href="/sra/docs/submitfiles">SRA File Upload</a></li>
<li><a title="SRA Frequently Asked Questions" href="/sra/docs/submitquestions">Frequently Asked Questions</a></li>
</ul>
<h3 class="nav" data-heading="h3" data-no-toc="true">SRA Submission Portal</h3>
<ul>
<li><a title="Submitting Data in Submission Portal" href="/sra/docs/submitportal">Submitting to SRA</a></li>
<li><a title="Troubleshooting SRA submission" href="/sra/docs/submitspfiles">Troubleshooting Submission</a></li>
</ul>
<h3 class="nav" data-heading="h3" data-no-toc="true">Submitting for dbGaP &amp; GEO</h3>
<ul>
<li><a title="Submitting SRA data for dbGaP project" href="/sra/docs/submitdbgap">Submitting for dbGaP</a></li>
<li><a title="Submitting SRA data for GEO" href="/sra/docs/submitgeo">Submitting for GEO</a></li>
</ul>
<h3 class="nav" data-heading="h3" data-no-toc="true">Updating SRA Data</h3>
<ul>
<li><a title="Updating SRA submission" href="/sra/docs/submitupdate">Updating SRA data</a> </li>
<li><a title="Request status change" href="/sra/docs/request-status-change">Request status change</a> </li>
<li><a title="Change release date" href="/sra/docs/submitsra">Change Release Date</a></li>
</ul>
</div>
</div>
<!--/.col2-->
<div class="col3">
</div>
<!--/.col3-->
<div class="col4">
</div>
<!--/.col4-->
<div class="col5">
</div>
<div class="col6">
</div>
<div class="col7">
</div>
<div class="col8">
</div>
<div class="col9">
</div>
</div><!--/.content-->
</div><!--/.container-->
<div id="NCBIFooter_dynamic">
<div class="breadcrumbs">You are here:
<span id="breadcrumb_text"><a href="/guide/">NCBI</a></span></div>
<a id="help-desk-link" class="help_desk" href="https://support.ncbi.nlm.nih.gov/ics/support/default.asp?Time=2025-03-06T16:54:20-05:00&amp;Snapshot=%2Fprojects%2Fstaticsites%2FSraDocs@1.5&amp;Host=portal107&amp;ncbi_phid=CE8E9A0D7CA138E100000000008D0077&amp;ncbi_session=CE8B5AF87C7FFCB1_0191SID&amp;from=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fsra%2Fdocs%2Fsubmitformats%2F&amp;Ncbi_App=sra&amp;Page=sra-custom-page&amp;style=classic&amp;deptID=28049" target="_blank">Support Center</a>
<noscript><img alt="" src="/stat?jsdisabled=true&amp;ncbi_app=sra&amp;ncbi_db=&amp;ncbi_pdid=sra-custom-page&amp;ncbi_phid=CE8E9A0D7CA138E100000000008D0077" /></noscript>
</div>
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" class="footer" id="footer" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="icon-section">
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
<div class="grid-container container">
<div class="icon-section_container">
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter">
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
<title>Twitter</title>
<g id="twitterx1008">
<path id="path1008" d="M6.06736 7L16.8778 20.8991L6.00001 32.2H10.2L18.6 23.1L25.668 32.2H34L22.8 17.5L31.9 7H28.4L20.7 15.4L14.401 7H6.06898H6.06736ZM9.66753 8.73423H12.9327L29.7327 30.4658H26.5697L9.66753 8.73423Z" fill="#5B616B"></path>
</g>
</svg>
</a>
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>Facebook</title>
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>LinkedIn</title>
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11,
.cls-12 {
fill: #737373;
}
.cls-11 {
fill-rule: evenodd;
}
</style>
</defs>
<title>GitHub</title>
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
</path>
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
</path>
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
</path>
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
</path>
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
</path>
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
</path>
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
</svg></a>
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
<defs><style>.cls-1{fill:#737373;}</style></defs>
<title>NCBI Insights Blog</title>
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
</svg>
</a>
</div>
</div>
</section>
<section class="container-fluid bg-primary">
<div class="container pt-5">
<div class="row mt-3">
<div class="col-lg-3 col-12">
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
<ul class="list-inline social_media">
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Twitter</title>
<g id="twitterx1009" clip-path="url(#clip0_65276_3946)">
<path id="Vector_Twitter" d="M17.5006 34.6565C26.9761 34.6565 34.6575 26.9751 34.6575 17.4996C34.6575 8.02416 26.9761 0.342773 17.5006 0.342773C8.02514 0.342773 0.34375 8.02416 0.34375 17.4996C0.34375 26.9751 8.02514 34.6565 17.5006 34.6565Z" fill="#205493" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
<path id="path1009" d="M8.54811 8.5L16.2698 18.4279L8.50001 26.5H11.5L17.5 20L22.5486 26.5H28.5L20.5 16L27 8.5H24.5L19 14.5L14.5007 8.5H8.54927H8.54811ZM11.1197 9.73873H13.4519L25.4519 25.2613H23.1926L11.1197 9.73873Z" fill="white"></path>
</g>
<defs>
<clipPath id="clip0_65276_3946">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Facebook</title>
<g id="Facebook" clip-path="url(#clip0_1717_1086)">
<path id="Vector_Facebook" d="M15.1147 29.1371C15.1147 29.0822 15.1147 29.0296 15.1147 28.9747V18.9414H11.8183C11.6719 18.9414 11.6719 18.9414 11.6719 18.8018C11.6719 17.5642 11.6719 16.3289 11.6719 15.0937C11.6719 14.9793 11.7062 14.9518 11.816 14.9518C12.8683 14.9518 13.9206 14.9518 14.9751 14.9518H15.1215V14.8329C15.1215 13.8057 15.1215 12.774 15.1215 11.7492C15.1274 10.9262 15.3148 10.1146 15.6706 9.37241C16.1301 8.38271 16.9475 7.60378 17.9582 7.19235C18.6492 6.90525 19.3923 6.76428 20.1405 6.7783C21.0029 6.79202 21.8653 6.83091 22.7278 6.86065C22.8879 6.86065 23.048 6.89496 23.2082 6.90182C23.2974 6.90182 23.3271 6.94071 23.3271 7.02993C23.3271 7.54235 23.3271 8.05477 23.3271 8.5649C23.3271 9.16882 23.3271 9.77274 23.3271 10.3767C23.3271 10.4819 23.2974 10.5139 23.1921 10.5116C22.5379 10.5116 21.8814 10.5116 21.2271 10.5116C20.9287 10.5184 20.6316 10.5528 20.3395 10.6146C20.0822 10.6619 19.8463 10.7891 19.6653 10.9779C19.4842 11.1668 19.3672 11.4078 19.3307 11.6669C19.2857 11.893 19.2612 12.1226 19.2575 12.3531C19.2575 13.1904 19.2575 14.0299 19.2575 14.8695C19.2575 14.8946 19.2575 14.9198 19.2575 14.9564H23.0229C23.1807 14.9564 23.183 14.9564 23.1624 15.1074C23.0778 15.7662 22.9885 16.425 22.9039 17.0816C22.8322 17.6321 22.7636 18.1827 22.698 18.7332C22.6729 18.9437 22.6797 18.9437 22.4693 18.9437H19.2644V28.8992C19.2644 28.9793 19.2644 29.0593 19.2644 29.1394L15.1147 29.1371Z" fill="white"></path>
<path id="Vector_2_Facebook" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1086">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Youtube</title>
<g id="YouTube" clip-path="url(#clip0_1717_1101)">
<path id="Vector_Youtube" d="M26.2571 11.4791C25.9025 11.1589 25.5709 10.9576 24.228 10.834C22.5512 10.6785 20.2797 10.6556 18.564 10.6533H16.4365C14.7208 10.6533 12.4493 10.6785 10.7725 10.834C9.43196 10.9576 9.09798 11.1589 8.7434 11.4791C7.81464 12.321 7.6202 14.6268 7.59961 16.8938C7.59961 17.3178 7.59961 17.741 7.59961 18.1635C7.62706 20.4121 7.82837 22.686 8.7434 23.521C9.09798 23.8412 9.42967 24.0425 10.7725 24.1661C12.4493 24.3216 14.7208 24.3445 16.4365 24.3468H18.564C20.2797 24.3468 22.5512 24.3216 24.228 24.1661C25.5686 24.0425 25.9025 23.8412 26.2571 23.521C27.1722 22.6929 27.3735 20.451 27.4009 18.2206C27.4009 17.7402 27.4009 17.2599 27.4009 16.7795C27.3735 14.5491 27.1699 12.3072 26.2571 11.4791ZM15.5604 20.5311V14.652L20.561 17.5001L15.5604 20.5311Z" fill="white"></path>
<path id="Vector_2_Youtube" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1101">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
</ul>
</div>
<div class="col-lg-3 col-12">
<p class="address_footer text-white">National Library of Medicine<br />
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
Bethesda, MD 20894</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
</div>
</div>
<div class="row">
<div class="col-lg-12 centered-lg">
<nav class="bottom-links">
<ul class="mt-3">
<li>
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
</li>
<li>
<a class="text-white" href="https://www.nih.gov/">NIH</a>
</li>
<li>
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
</li>
<li>
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
</section>
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
</div>
<!--/.footer-->
<p class="last-updated small">Last updated: 2019-09-20T18:35:04Z</p>
</div>
<!--/.page-->
</div>
<!--/.wrap-->
<span class="PAFAppResources"></span>
</div><!-- /.twelve_col -->
</div>
<!-- /.grid -->
<!-- usually for JS scripts at page bottom -->
<span class="pagefixtures"></span>
<!-- CE8B5AF87C7FFCB1_0191SID /projects/staticsites/SraDocs@1.5 portal107 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4217302/js/3879255/4121861/4082503/4217304/4087685.js" snapshot="sra"></script></body>
</html>