nih-gov/www.ncbi.nlm.nih.gov/genbank/TSA

344 lines
21 KiB
XML

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<!-- AppResources meta begin -->
<meta name="paf-app-resources" content="" />
<!-- AppResources meta end -->
<!-- TemplateResources meta begin -->
<meta name="paf_template" content="StdNCol" />
<!-- TemplateResources meta end -->
<!-- Page meta begin -->
<!-- Page meta end -->
<!-- Logger begin -->
<meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="genbank" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="custom-page" />
<!-- Logger end -->
<title>Transcriptome Shotgun Assembly Sequence Database</title>
<!-- PageFixtures headcontent begin -->
<meta name="cms-local-nav-url" content="https://cms.ncbi.nlm.nih.gov//genbank/_nav" />
<!-- PageFixtures headcontent end -->
<!-- AppResources external_resources begin -->
<script type="text/javascript" src="/core/jig/1.15.6/js/jig.min.js"></script>
<!-- AppResources external_resources end -->
<!-- Page headcontent begin -->
<meta name="subsite" content="genbank" />
<meta name="path" content="genbank/tsa" />
<meta name="modified" content="2022-08-26T17:10:33Z" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="cms-edit-aux-url" content="http://cms.ncbi.nlm.nih.gov/node//edit" />
<!-- Page headcontent end -->
<!-- PageFixtures resources begin -->
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="http://127.0.0.1/sites/static/header_footer" xpointer="xmlns(x=http://www.w3.org/1999/xhtml) xpointer(//x:link[@rel='stylesheet'])"></xi:include>
<!-- PageFixtures resources end -->
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8D93597C7F2EE10000000000D700A9.m_5" />
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/css/4121862/3974050/3917732/251717/4108189/14534/45193/3534283/4128070/3407145/4005757/4062871.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/css/3529741/3529739.css" media="print" /></head>
<body class=" col2 custom-page">
<div class="grid">
<div class="col twelve_col nomargin shadow">
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
<div class="sysmessages">
<noscript>
<p class="nojs">
<strong>Warning:</strong>
The NCBI web site requires JavaScript to function.
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
</p>
</noscript>
</div>
<!--/.sysmessage-->
<div class="wrap">
<div class="page">
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="http://127.0.0.1/sites/static/header_footer" xpointer="xmlns(x=http://www.w3.org/1999/xhtml) xpointer(//x:div[@id='universal_header'])"></xi:include>
</div>
<!--/.header-->
<div class="header">
<div class="res_logo"><h1 class="res_name"><a href="/genbank/" title="GenBank home">GenBank</a></h1><h2 class="res_tagline">Public nucleic acid sequence repository</h2></div>
<div class="search"><form method="get" action="/nuccore/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="nuccore" selected="selected">Nucleotide</option><option value="sra">SRA</option><option value="books">Books</option><option value="clinvar" class="last">ClinVar</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Nucleotide" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form></div>
</div>
<div class="nav_and_browser">
<div class="localnav"><ul class="jig-ncbilocalnav">
<li><a href="#">GenBank</a><ul>
<li><a href="/genbank/">About GenBank</a></li>
<li><a href="/genbank/submit_types">Submission Types</a></li>
<li><a href="/genbank/submit">Submission Tools</a></li>
<li><a href="/genbank/update">Update GenBank Records</a></li>
<li><a href="/nuccore/">Search</a></li>
<li><a href="/BLAST/Blast.cgi?CMD=Web&amp;PAGETYPE=BLASTHome">BLAST</a></li>
<li><a href="/genbank/statistics">Statistics</a></li>
<li><a href="/genbank/samplerecord/">Sample Record</a></li>
<li><a href="/genbank/sequencerevisionhistory/">Revision History</a></li>
<li><a href="/genbank/sequenceids/">Sequence IDs</a></li>
</ul>
</li>
<li><a href="#">Submit</a><ul>
<li><a href="/genbank/submit">Submission Tools</a></li>
<li><a href="/genbank/submit_types">Submission Types</a></li>
<li><a href="/WebSub/?tool=genbank">BankIt</a></li>
<li><a href="/genbank/table2asn">table2asn</a></li>
<li><a href="https://www.ncbi.nlm.nih.gov/sra/docs/sequence-data-processing">Sequence Data Processing</a></li>
</ul>
</li>
<li><a href="#">Genomes</a><ul>
<li><a href="/genbank/genomesubmit">Complete Genome Submission Guide</a></li>
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Genome Annotation Guide</a></li>
<li><a href="/genbank/eukaryotic_genome_submission_annotation">Eukaryotic Genome Annotation Guide</a></li>
<li><a href="/genbank/examples.wgs">Annotation Examples</a></li>
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/wgs/">Genome Submission Portal</a></li>
</ul>
</li>
<li><a title="Whole Genome Shotgun sequences and submissions" href="#">WGS</a><ul>
<li><a href="/genbank/wgs">About WGS</a></li>
<li><a href="/Traces/wgs">WGS Project List</a></li>
<li><a href="/genbank/wgs.submit">WGS Submission Guide</a></li>
<li><a href="/genbank/wgsfaq/">FAQ</a></li>
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/wgs/">Genome Submission Portal</a></li>
<li><a href="/genbank/eukaryotic_genome_submission_annotation">Eukaryotic Annotation Guide</a></li>
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Annotation Guide</a></li>
<li><a href="/genbank/asndisc">Discrepancy Report</a></li>
<li><a href="/assembly/agp/AGP_Specification/">AGP format</a></li>
</ul>
</li>
<li><a href="#">Metagenomes</a><ul>
<li><a href="/genbank/metagenome">About Metagenomes</a></li>
<li><a href="/genbank/structuredcomment">Structured Comment</a></li>
</ul>
</li>
<li><a href="#">TPA</a><ul>
<li><a href="/genbank/TPA">About TPA</a></li>
<li><a href="/genbank/tpafaq">FAQ</a></li>
<li><a href="/genbank/TPA-Exp">TPA-Exp</a></li>
<li><a href="/genbank/TPA-Inf">TPA-Inf</a></li>
</ul>
</li>
<li><a href="#">TSA</a><ul>
<li><a href="/genbank/TSA">About TSA</a></li>
<li><a href="/genbank/TSAguide">TSA Submission Guide</a></li>
<li><a href="/genbank/TSAfaq">FAQ</a></li>
</ul>
</li>
<li><a href="#">INSDC</a><ul>
<li><a href="/genbank/collab">About INSDC</a></li>
<li><a href="/genbank/collab/country">Geographic Location Name List</a></li>
<li><a href="/genbank/collab/db_xref">db_xref List</a></li>
<li><a href="http://www.insdc.org/documents/feature_table.html">Feature Table</a></li>
</ul>
</li>
<li><a href="#">Documentation</a><ul>
<li><a href="https://www.ncbi.nlm.nih.gov/sra/docs/sequence-data-processing/">Sequence Data Processing</a></li>
<li><a href="/genbank/submission_brokers">Submission Brokers</a></li>
<li><a href="/genbank/acc_prefix">Accession Number Prefixes</a></li>
<li><a href="/genbank/organelle_submit/">Organelle Submission Guide</a></li>
<li><a href="/genbank/monkeypox_submission/">Monkeypox Submission Guide</a></li>
<li><a href="/genbank/validation/">Common Submission Errors</a> </li>
<li><a href="/genbank/sequencecheck/">Ribosomal Submission Errors</a></li>
<li><a href="/genbank/sequencecheck/virus">Common Sequence Errors</a></li>
<li><a href="https://support.nlm.nih.gov/knowledgebase/category/?id=CAT-01240">Submission FAQs</a></li>
</ul>
</li>
<li><a href="#">Other</a><ul>
<li><a href="/genbank/htgs">About HTGs</a></li>
<li><a href="/genbank/dbest">About EST</a></li>
<li><a href="/genbank/dbgss">About GSS</a></li>
<li><a href="/genbank/tls">About TLS</a></li>
<li><a href="/genbank/tlsguide">Submit TLS</a></li>
</ul>
</li>
</ul></div>
</div>
<!-- was itemctrl -->
<div class="container">
<div id="maincontent" class="content col twelve_col last">
<div class="col1">
<h1 id="transcriptome-shotgun-assembly-s">Transcriptome Shotgun Assembly Sequence Database</h1>
<h2 id="what-is-the-transcriptome-shotgu">What is the Transcriptome Shotgun Assembly (TSA) Database?</h2>
<p><strong>TSA</strong> is an archive of computationally assembled transcript sequences from primary data such as ESTs and Next Generation Sequencing Technologies. The overlapping sequence reads from a complete transcriptome are assembled into transcripts by computational methods instead of by traditional cloning and sequencing of cloned cDNAs. The primary sequence data used in the assemblies must have been experimentally determined by the same submitter. TSA sequence records differ from GenBank records because there are no physical counterparts to the assemblies.</p>
<h2 id="how-do-tsa-sequence-records-diff">How Do TSA Sequence Records Differ from Other GenBank/EMBL/DDBJ Records?</h2>
<p>The display of a TSA sequence is similar to other International Nucleotide Sequence Database Collaboration (INSDC) records, but includes the following:</p>
<ul>
<li>The label 'TSA:' at the beginning of each Definition Line.</li>
<li>DBLINK<ul>
<li>BioProject</li>
<li>BioSample</li>
<li>Sequence Read Archive</li>
</ul>
</li>
<li>Keywords: TSA; Transcriptome Shotgun Assembly</li>
<li>Assembly data</li>
<li>Comment describing the assembly if from a multi-step process.</li>
</ul>
<p>Each TSA project is assigned a stable 4-letter TSA accession prefix, which does not change as the project is updated. In addition to the TSA accession prefix, the transcript identifiers have a version number corresponding to a specific TSA project update. Finally, each individual assembly is assigned a unique accession number prefixed by the TSA accession prefix and version number. For instance, if a TSA project's assigned accession number is XXXX00000000, then that project's first transcript version would be XXXX01000000, and the first assembly of that version would be XXXX01000001. (The last six digits of this ID identify each individual assembly). When a project is reassembled, the new assemblies are submitted as the 02 version of the TSA project. No linkage or relationship is expected between the old and new assemblies, and the new assemblies are given new accession numbers beginning with XXXX02000001. The 01 transcripts are suppressed when the 02 transcripts are released.</p>
<p>An example of a TSA master record is <a href="https://www.ncbi.nlm.nih.gov/nuccore/GAAA00000000.1">GAAA00000000.</a></p>
<h2 id="nucleotide-sequences-must-confor">Nucleotide sequences must conform to the following standards</h2>
<ul>
<li>Submitted sequences must be assembled from data experimentally determined by the submitter.</li>
<li>Screened for vector contamination and any vector/linker sequence removed. This includes the removal of NextGen sequencing primers.</li>
<li>Sequences should be greater than 200 bp in length.</li>
<li>Ambiguous bases should not be more than total 10% length or more than 14n's in a row.</li>
<li>Sequence gaps of known length may be present and annotated with the assembly_gap feature if there is sufficient evidence for the linkage between the sequences. See the <a href="/genbank/tsaguide">TSA Submission Guide</a> for more information about adding assembly_gap features.</li>
<li>Gaps cannot be of unknown length.</li>
</ul>
<h2 id="requirements">Requirements</h2>
<ul>
<li>Raw reads should be submitted to <a href="https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi">SRA</a> prior to submitting your transcriptome. The SRA run accession(s) (SRRXXXXXX) and associated BioProject (PRJNAXXXXXX) and BioSample(s) (SAMNXXXXXX) are required for TSA submission. </li>
<li>Assembly Data Structured Comment. This information is input directly in the Submission Portal dialogs.</li>
<li>Description of the assembly process if a multi-step assembly was performed should be provided in the COMMENT section.</li>
<li>If annotation is provided the product names should follow the <a href="https://www.ncbi.nlm.nih.gov/genome/doc/internatprot_nomenguide/">International Protein Nomenclature Guidelines</a>.</li>
<li>The keyword 'Targeted' and feature annotation should be included for all targeted subsets of transcriptome data. See <a href="/genbank/tsaguide#target">Targeted vs. Non-targeted TSA Studies</a> for more information.</li>
<li>Annotation must be biologically valid.</li>
</ul>
<h2 id="how-to-submit-to-tsa">How to Submit to TSA</h2>
<p>All TSA submissions must be submitted through the TSA <a href="https://submit.ncbi.nlm.nih.gov/subs/tsa/">Submission Portal</a> . Submission details can be found in the <a href="/genbank/tsaguide">TSA submission guide</a> .</p>
<h2 id="how-to-update-an-existing-tsa-su">How to Update an Existing TSA Submission</h2>
<p>See <a href="/genbank/tsaupdate">Update TSA Records</a>. Contact <a href="mailto:gb-admin@ncbi.nlm.nih.gov">gb-admin@ncbi.nlm.nih.gov</a> with any addditional questions.</p>
<h2 id="how-to-search-for-tsa-sequences">How to Search for TSA Sequences</h2>
<ul>
<li>You can search Entrez Nucleotide using the following terms: tsa-master [prop] and 'Genus Species' [orgn]<ul>
<li>For example: tsa-master [prop] AND Nitella mirabilis [orgn]</li>
</ul>
</li>
<li>The public submissions are available through the <a href="https://www.ncbi.nlm.nih.gov/Traces/wgs/?term=tsa">WGS/TSA browser</a> .</li>
<li>The sequences can be downloaded from the <a href="https://ftp.ncbi.nlm.nih.gov/genbank/tsa/">NCBI FTP GenBank site</a> .</li>
</ul>
<h2 id="should-not-be-submitted-to-tsa">Should not be submitted to TSA</h2>
<ul>
<li>Assemblies from sequences not directly sequenced by the submitter.</li>
<li>Clonal based assemblies. These should be submitted to GenBank.</li>
<li>A single assembly from multiple organisms.</li>
<li>Subsets of a transcriptome study unless it is part of a targeted study. See the <a href="/genbank/tsaguide">TSA submission guide</a> for more information about submitting a targeted study.</li>
</ul>
</div>
<!--/.col1-->
<div class="col2">
<div class="rightnav">
<h2 id="tsa-resources">TSA Resources</h2>
<ul>
<li><a href="/genbank/TSA">About TSA</a></li>
<li><a href="/genbank/TSAguide">TSA Submission Guide</a></li>
<li><a href="/genbank/TSAfaq">FAQ</a></li>
<li><a href="/genbank/tsaupdate">Update TSA Records</a></li>
<li><a href="/genbank/table2asn">table2asn</a></li>
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/">Submission Portal</a></li>
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/bioproject/">BioProject</a></li>
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/biosample/">BioSample</a></li>
<li><a href="https://www.ncbi.nlm.nih.gov/Traces/wgs/?view=TSA">TSA Browser</a></li>
<li><a href="https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi">SRA</a></li>
</ul>
</div>
</div>
<!--/.col2-->
<div class="col3">
</div>
<!--/.col3-->
<div class="col4">
</div>
<!--/.col4-->
<div class="col5">
</div>
<div class="col6">
</div>
<div class="col7">
</div>
<div class="col8">
</div>
<div class="col9">
</div>
</div><!--/.content-->
</div><!--/.container-->
<div id="NCBIFooter_dynamic">
<div class="breadcrumbs">You are here:
<span id="breadcrumb_text"><a href="/guide/">NCBI</a></span></div>
<a id="help-desk-link" class="help_desk" href="https://support.ncbi.nlm.nih.gov/ics/support/default.asp?Time=2025-03-05T02:45:46-05:00&amp;Snapshot=%2Fprojects%2Fstaticsites%2Fgenbank%2Fgenbank@2.21&amp;Host=portal106&amp;ncbi_phid=CE8D93597C7F2EE10000000000D700A9&amp;ncbi_session=CE8B5AF87C7FFCB1_0191SID&amp;from=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fgenbank%2Ftsa%2F&amp;Ncbi_App=genbank&amp;Page=custom-page&amp;style=classic&amp;deptID=28049" target="_blank">Support Center</a>
<noscript><img alt="" src="/stat?jsdisabled=true&amp;ncbi_app=genbank&amp;ncbi_db=&amp;ncbi_pdid=custom-page&amp;ncbi_phid=CE8D93597C7F2EE10000000000D700A9" /></noscript>
</div>
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="http://127.0.0.1/sites/static/header_footer" xpointer="xmlns(x=http://www.w3.org/1999/xhtml) xpointer(//x:div[@id='footer'])"></xi:include>
</div>
<!--/.footer-->
<p class="last-updated small">Last updated: 2022-08-26T17:10:33Z</p>
</div>
<!--/.page-->
</div>
<!--/.wrap-->
<span class="PAFAppResources"></span>
</div><!-- /.twelve_col -->
</div>
<!-- /.grid -->
<!-- usually for JS scripts at page bottom -->
<span class="pagefixtures"></span>
<!-- CE8B5AF87C7FFCB1_0191SID /projects/staticsites/genbank/genbank@2.21 portal106 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/js/3879255/4121861/1490097/4087685.js" snapshot="genbank"></script></body>
</html>