nih-gov/www.ncbi.nlm.nih.gov/snp/docs/submission/vcf_submission_guidelines

2606 lines
102 KiB
XML
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
<!-- AppResources meta begin -->
<meta name="paf-app-resources" content="" />
<!-- AppResources meta end -->
<!-- TemplateResources meta begin -->
<meta name="paf_template" content="StdNCol" />
<!-- TemplateResources meta end -->
<!-- Page meta begin -->
<!-- Page meta end -->
<!-- Logger begin -->
<meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="snp" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="static" />
<!-- Logger end -->
<title>dbSNP VCF Submission Format Guidelines</title>
<!-- PageFixtures headcontent begin -->
<!-- PageFixtures headcontent end -->
<!-- AppResources external_resources begin -->
<script type="text/javascript" src="/core/jig/1.15.3/js/jig.min.js"></script>
<!-- AppResources external_resources end -->
<!-- Page headcontent begin -->
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="node-id" content="4791" />
<meta name="revision-id" content="31513" />
<meta name="created" content="2015-09-09T11:57:50-04:00" />
<meta name="modified" content="2015-12-26T12:28:43-05:00" />
<meta name="publication-date" content="2015-09-09T10:37:00-04:00" />
<meta name="subsite" content="snp" />
<meta name="path" content="snp/docs/submission/vcf_submission_guidelines" />
<meta name="node-type" content="page" />
<meta name="jira-ticket" content="" />
<meta name="" content="" />
<!-- Page headcontent end -->
<!-- PageFixtures resources begin -->
<link xmlns="http://www.w3.org/1999/xhtml" type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218191/css/4207974/4206132.css" xml:base="http://127.0.0.1/sites/static/header_footer" />
<!-- PageFixtures resources end -->
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8EEC6D7C9B8A0100000000006B005B.m_5" /><script type="text/javascript"><!--
var ScriptPath = '/portal/';
var objHierarchy = {"name":"PAFAppLayout","type":"Layout","realname":"PAFAppLayout",
"children":[{"name":"PAFAppLayout.AppController","type":"Cluster","realname":"PAFAppLayout.AppController",
"children":[{"name":"PAFAppLayout.AppController.AppResources","type":"Portlet","realname":"PAFAppLayout.AppController.AppResources","shortname":"AppResources"},
{"name":"PAFAppLayout.AppController.RequestProcessor","type":"Portlet","realname":"PAFAppLayout.AppController.RequestProcessor","shortname":"RequestProcessor"},
{"name":"PAFAppLayout.AppController.Controller","type":"Cluster","realname":"PAFAppLayout.AppController.Controller",
"children":[{"name":"PAFAppLayout.AppController.Controller.PAFStaticContentController","type":"Portlet","realname":"PAFAppLayout.AppController.Controller.PAFControllerSelector.PAFStaticContentController","shortname":"PAFStaticContentController"}]},
{"name":"PAFAppLayout.AppController.Page","type":"Cluster","realname":"PAFAppLayout.AppController.Page",
"children":[{"name":"PAFAppLayout.AppController.Page.PAFPageSelectorData","type":"Portlet","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFPageSelectorData","shortname":"PAFPageSelectorData"},
{"name":"PAFAppLayout.AppController.Page.CustomStaticPage","type":"Cluster","realname":"PAFAppLayout.AppController.Page.CustomStaticPage",
"children":[{"name":"PAFAppLayout.AppController.Page.CustomStaticPage.MainPortlet","type":"Portlet","realname":"PAFAppLayout.AppController.Page.CustomStaticPage.MainPortlet","shortname":"MainPortlet"}]}]},
{"name":"PAFAppLayout.AppController.PageFixtures","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.PageFixturesP","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.PageFixturesP","shortname":"PageFixturesP"},
{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar.SearchBarChooser","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar.SearchBarChooser","shortname":"SearchBarChooser"},
{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar.PAFSearchBar","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar.PAFSearchBar","shortname":"PAFSearchBar"}]},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIBreadcrumbs","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIBreadcrumbs","shortname":"NCBIBreadcrumbs"},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIHelpDesk","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIHelpDesk","shortname":"NCBIHelpDesk"},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIApplog_NoScript_Ping","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIApplog_NoScript_Ping","shortname":"NCBIApplog_NoScript_Ping"}]},
{"name":"PAFAppLayout.AppController.PageFixtures.LocalNavPortlet","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.LocalNavPortlet","shortname":"LocalNavPortlet"}]},
{"name":"PAFAppLayout.AppController.TemplateResources","type":"Cluster","realname":"PAFAppLayout.AppController.TemplateResources",
"children":[{"name":"PAFAppLayout.AppController.TemplateResources.StdNColResources","type":"Portlet","realname":"PAFAppLayout.AppController.TemplateResources.PAFTemplateResources.StdNColResources","shortname":"StdNColResources"}]},
{"name":"PAFAppLayout.AppController.Logger","type":"Portlet","realname":"PAFAppLayout.AppController.Logger","shortname":"Logger"},
{"name":"PAFAppLayout.AppController.DebugConsole","type":"Portlet","realname":"PAFAppLayout.AppController.DebugConsole","shortname":"DebugConsole"}]}]};
--></script>
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4206108/css/4121862/3974050/3917732/251717/4154243/14534/45193/3534283/4128070/4062871/4005757.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4206108/css/3529741/3529739.css" media="print" /><script type="text/javascript">
var ObjectLinks=[{i:0, ename: "p$ExL", esid:"*", sname: "p$ExL", ssid:"*", dname:"p$el", dsid:"0",m:"CopyValue",p:[],f: function(src, dst) {fn_CopyValue(src, dst);}}]
var ActiveNames = {"p$ExL":1};
</script></head>
<body class=" static">
<div class="grid">
<div class="col twelve_col nomargin shadow">
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
<div class="sysmessages">
<noscript>
<p class="nojs">
<strong>Warning:</strong>
The NCBI web site requires JavaScript to function.
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
</p>
</noscript>
</div>
<!--/.sysmessage-->
<div class="wrap">
<div class="page">
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" id="universal_header" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="usa-banner">
<div class="usa-accordion">
<header class="usa-banner-header">
<div class="usa-grid usa-banner-inner">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
<p>An official website of the United States government</p>
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
<span class="usa-banner-button-text">Here's how you know</span>
</button>
</div>
</header>
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
<div class="usa-banner-guidance-gov usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
<div class="usa-media_block-body">
<p>
<strong>The .gov means it's official.</strong>
<br />
Federal government websites often end in .gov or .mil. Before
sharing sensitive information, make sure you're on a federal
government site.
</p>
</div>
</div>
<div class="usa-banner-guidance-ssl usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
<div class="usa-media_block-body">
<p>
<strong>The site is secure.</strong>
<br />
The <strong>https://</strong> ensures that you are connecting to the
official website and that any information you provide is encrypted
and transmitted securely.
</p>
</div>
</div>
</div>
</div>
</section>
<div class="usa-overlay"></div>
<header class="ncbi-header" role="banner" data-section="Header">
<div class="usa-grid">
<div class="usa-width-one-whole">
<div class="ncbi-header__logo">
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
</a>
</div>
<div class="ncbi-header__account">
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
<span class="fa fa-user" aria-hidden="true">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
<g style="fill: #fff">
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
</g>
</svg>
</span>
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
<span class="sr-only">Show account info</span>
</button>
</div>
<div class="ncbi-popup-anchor">
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
<div class="ncbi-popup-head">
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
<span class="fa fa-times">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
</svg>
</span>
<span class="usa-sr-only">Close</span></button>
<h4>Account</h4>
</div>
<div class="account-user-info">
Logged in as:<br />
<b><span class="username" id="uname_long">username</span></b>
</div>
<div class="account-links">
<ul class="usa-unstyled-list">
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
</ul>
</div>
</div>
</div>
</div>
</div>
</header>
<div role="navigation" aria-label="access keys">
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
</div>
<section data-section="Alerts">
<div class="ncbi-alerts-placeholder"></div>
</section>
</div>
</div>
<!--/.header-->
<div class="header">
<div class="res_logo"><h1 class="res_name"><a href="/snp/" title="dbSNP home">dbSNP</a></h1><h2 class="res_tagline">Small genetic variation</h2></div>
<div class="search"><form method="get" action="/snp/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="snp" selected="selected">SNP</option><option value="clinvar">ClinVar</option><option value="omim">OMIM</option><option value="medgen" class="last">MedGen</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search SNP" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div><input type="hidden" name="p$a" id="p$a" /><input type="hidden" name="p$l" id="p$l" value="PAFAppLayout" /><input type="hidden" name="p$st" id="p$st" value="snp" /><input name="SessionId" id="SessionId" value="CE8B5AF87C7FFCB1_0191SID" disabled="disabled" type="hidden" /><input name="Snapshot" id="Snapshot" value="/projects/staticsites/snp/snpdoc@1.10" disabled="disabled" type="hidden" /></form><ul class=" inline_list searchlinks"><li>
<a href="/snp/advanced">Advanced</a>
</li></ul></div>
</div>
<div class="nav_and_browser">
</div>
<!-- was itemctrl -->
<div class="container">
<div id="maincontent" class="content col twelve_col last">
<div class="col1">
<h1>
dbSNP VCF Submission Format Guidelines</h1>
<h2><br />
Contact: <a href="mailto:snp-admin@ncbi.nlm.nih.gov">snp-admin@ncbi.nlm.nih.gov</a><br />
Last update: September 19, 2015</h2>
<p>
<a href="#Introduction">Introduction</a>
<br />
<a href="#submission_overview">Submission Overview</a>
<br />
<a href="#required_metadata_files">Required Metadata Files</a>
<br />
<a href="#dbsnp_vcf_submission_format">dbSNP VCF Submission Format</a>
<br />
<a href="#vcf_submission_file_header">Submission File Header</a>
<br />
<a href="#vcf_submission_data_table">Submission Data Table</a>
<br />
<a href="#submission_data_table_special_case_examples">Submission Data Table Special Case Examples: Reporting POS, REF and ALT for insertion/deletion variants</a>
<br />
<a href="#info_tag_descriptions_and_examples">INFO Tag Descriptions and Examples</a>
<br />
<a href="#required_dbsnp_vcf_info_tag">Required INFO Tag</a>
<br />
<a href="#optional_dbsnp_vcf_info_tags">Optional INFO Tags</a>
<br />
<a href="#genotype_format_example">Genotype Format Example</a>
<br />
<a href="#allele_frequency_format_example">Allele Frequency Format Examples</a>
<br />
<a href="#appendix_example_of_vcf_submission">Appendix: Example of a VCF Formatted dbSNP Submission</a>
</p>
<h2 id="Introduction">Introduction</h2>
<h3><br />
dbSNP Submissions</h3>
<p>
dbSNP is a public database of short genetic variations. The data can be from any species, and from any part of a genome. dbSNP has been designed to include a broad collection of simple genetic variations such as single-base nucleotide substitutions, small-scale multi-base deletions or insertions, retrotransposable element insertions, and microsatellite repeats. Submissions can include genotype and allele frequency data if those data are available. dbSNP accepts submissions for all classes of simple molecular variation, including common variations as well as rare variations of germline or somatic origin that are clinically significant. Large-scale insertion/deletion, inversion and translocation data that are over 50bp long should be submitted to <a href="https://www.ncbi.nlm.nih.gov/dbvar">dbVar</a>, the NCBI database of genomic structural variation.</p>
<h3><br />
The Variant Call Format (VCF)</h3>
<p>
The Variant Call Format, or VCF, was developed for the <a href="https://www.1000genomes.org/about">1000 Genomes Project </a>as a standardized format for storing large quantities of sequence variation data (SNPs, indels, larger structural variants, etc.) and any accompanying genotype data and annotation. A VCF file contains a header section and a data table section. Since the metadata lines in the header section can be altered to fit the requirements of the data to be submitted, you can use VCF to submit many different kinds of common variations (as well as their associated genotypes and annotation) that are contained within one reference sequence. VCF files are compressed (using bgzip), and easily accessed. See <a href="http://bioinformatics.oxfordjournals.org/content/early/2011/06/07/bioinformatics.btr330.full.pdf">Danecek, et. al.</a> for a concise overview of VCF, and the official 1000 Genomes site for a <a href="https://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41">detailed description of the VCF format</a>. Submissions to dbSNP currently use VCF format <a href="http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41">version 4.1</a>.</p>
<p><strong>NOTE</strong>: Do not use the VCF format if you have human mutations or variations with clinical significance or phenotype. They should be submitted to <a href="https://www.ncbi.nlm.nih.gov/clinvar/">ClinVar</a>.  Contact <a href="mailto:snp-admin@ncbi.nlm.nih.gov">dbSNP</a> if you have any questions.</p>
<h3><br />
When should I use the VCF format for dbSNP Submissions?</h3>
<p>
Use dbSNPs Variant Call Format (VCF) to submit large or small numbers of short genetic variations that have asserted positions on genome or reference sequences<sup>a</sup>. Large scale submitters especially will find dbSNPs VCF submission format a very useful submission tool since it allows for the submission of numerous variations generated by high-throughput sequencing (HTS) projects over multiple populations, as well as a wide variety of associated data. The VCF file for dbSNP submissions, as opposed to the standard VCF format as defined by the 1000 Genomes project, includes additional fields and attributes that describe dbSNP-specific submission and variation properties, and may include tags that are different than those used in standard VCF.</p>
<p>
<sup>a</sup>
<strong>dbSNP prefers that all variant asserted positions submitted using the VCF format are submitted either on a sequence accession that is part of an assembly housed in the <a href="https://www.ncbi.nlm.nih.gov/assembly/">NCBI Assembly Resource</a> or as an asserted location on an <a href="https://www.insdc.org/">INSDC</a> sequence housed in DDBJ, ENA, and GenBank.</strong>
</p>
<h2 id="submission_overview">Submission Overview</h2>
<p>
1. Check to see if your lab already has a handle assignment from NCBI. If it does not, request a handle using the <a href="https://www.ncbi.nlm.nih.gov/projects/SNP/handle.html">dbSNP online handle request form</a>.<br />
<br />
2. Prepare your submission:<br />
    a. The VCF file format required for dbSNP submissions is based on the 1000 Genomes Project VCF format guidelines with the addition of dbSNP specific  <br />
    fields. These additional fields describe dbSNP submission and variation properties.<br />
    b. Create required <a href="#required_metadata_files">metadata (meta) files</a> for the <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#publication">publication</a>, <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#Method">method</a>, <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#population">population</a>, and <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#Assay">assay</a> information associated with the submission.<br />
    c. Create a VCF Submission file for your data. Include:</p>
<p>
        --a properly formatted <a href="#vcf_submission_file_header">dbSNP VCF file header</a><br />
        --a <a href="#vcf_submission_data_table">data table</a> that contains the <a href="#required_dbsnp_vcf_info_tag">required INFO tag</a> for the variants you are submitting<br />
        --<a href="#optional_dbsnp_vcf_info_tags">optional INFO tags</a> that will describe your data more fully.</p>
<p>
d. We suggest you compress the VCF file using <a href="https://www.gzip.org/">gzip</a> and send the compressed file by email or FTP:</p>
<ul>
<li>
 File size less than 10MB:<br />
 1.  Email your submission at attachments to <a href="mailto:snp-sub@ncbi.nlm.nih.gov">snp-sub@ncbi.nlm.nih.gov</a>.</li>
<li>
File size more than 10 MB:<br />
1. Request a FTP account from NCBI for uploading by sending your handle confirmation information to <a href="mailto:snp-sub@ncbi.nlm.nih.gov">snp-sub@ncbi.nlm.nih.gov</a>.<br />
2. Upload your submission files to your assigned FTP account and notify <a href="mailto:snp-sub@ncbi.nlm.nih.gov">snp-sub@ncbi.nlm.nih.gov </a>when the upload is complete.</li>
</ul>
<p>
See the <a href="#appendix_example_of_vcf_submission">appendix</a> of this document for an example of a VCF formatted dbSNP submission.</p>
<h2 id="required_metadata_files">Required Metadata Files</h2>
<p>
In addition to VCF formatted variation files, dbSNP also requires VCF submissions to include separate Meta file(s).</p>
<ul>
<li>
The required Meta files are: <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#publication">Publication</a>, <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#Method">Method</a>, <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#population">Population</a>, and <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#Assay">Assay</a>. </li>
<li>
You can submit these Meta files separately or combine them into a single text file for submission.</li>
<li><a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting">Specifications for each Meta file</a> are available in the “<a href="/snp/docs/submission/hts_launch_and_introductory_material">How to Submit</a>” documentation for dbSNP. Links to the specific sections of the document that provide the required specifications are provided above.</li>
</ul>
<p>
Below is an example of a Meta file that combines all four Meta file types into a single file:<br />
<span>TYPE: CONT<br />
HANDLE: MYSEQ_SNP<br />
NAME: Jim Johnson<br />
FAX: 111 111 1111<br />
TEL: 222 222 2222<br />
EMAIL: jj@nih.gov<br />
LAB: NCBI<br />
INST: NCBI, NIH<br />
ADDR: 9600 Rockville Pike, Bethesda, MD 20892<br />
||<br />
TYPE: PUB<br />
HANDLE: MYSEQ_SNP<br />
PMID: 123456<br />
TITLE: Variation discovery in European and African Populations<br />
AUTHORS: Jim Johnson<br />
YEAR: 2014<br />
STATUS: 1<br />
||<br />
TYPE: METHOD<br />
HANDLE: MYSEQ_SNP</span><br />
ID: AgilentWholeExome<br />
<span>METHOD_CLASS: Sequence<br />
TEMPLATE_TYPE: DIPLOID<br />
METHOD:<br />
Solution hybridization exome capture was carried out using the Human All Exon System. The captured regions totaled approximately 38 or 50 Mb depending on the kit used. Flow cell preparation and paired end read sequencing were carried out on GAIIx and HiSeq2000 sequencers (Illumina Inc, San Diego CA). Sequence reads were aligned with the diagCM aligner and genotypes were called with bam2mpg (Teer et al, Systematic comparison of three genomic enrichment methods for massively parallel DNA sequencing, Genome Res. 2010 Oct;20(10):1420-31).<br />
||<br />
TYPE: POPULATION<br />
HANDLE: MYSEQ_SNP<br />
ID: EUROPEAN<br />
POPULATION: This population includes 712 participants of European descent.<br />
||<br />
TYPE: POPULATION<br />
HANDLE: MYSEQ_SNP<br />
ID: AFRICAN<br />
POPULATION: This population includes 600 participants of African descent.<br />
||<br />
TYPE: SNPASSAY<br />
HANDLE: MYSEQ_SNP<br />
BATCH: Exome_SNP_Discovery<br />
MOLTYPE: Genomic<br />
METHOD: AgilentWholeExome<br />
ORGANISM: Homo sapiens<br />
||<br />
TYPE: SNPPOPUSE<br />
HANDLE: MYSEQ_SNP<br />
BATCH: Exome_SNP_MAF<br />
METHOD: AgilentWholeExome<br />
||</span></p>
<h2 id="dbsnp_vcf_submission_format">dbSNP VCF Submission Format</h2>
<h3 id="vcf_submission_file_header">VCF Submission File Header</h3>
<h4><br />
Required VCF Header Metadata</h4>
<p><br />
The VCF file header for a dbSNP submission should start with the following metadata:</p>
<table border="1" cellpadding="1" cellspacing="1">
<tbody>
<tr>
<th scope="row">
<span>##fileformat=   </span>
</th>
<td>
<span>The current VCF version ID: i.e. VCF v4.1</span>
</td>
</tr>
<tr>
<th scope="row">
<span>##fileDate=</span>
</th>
<td>
<span>The date that the file was generated or the date when the file was updated. Use YYYYMMDD format:i.e.20120201</span>
</td>
</tr>
<tr>
<th scope="row">
<span>##handle= </span>
</th>
<td>
<span>Your registered dbSNP submission handle.</span>
</td>
</tr>
<tr>
<th scope="row">
<span>##batch= </span>
</th>
<td>
<span>A unique local batch ID. Use the same value placed in the BATCH field of the Meta file SNPASSAY section; dbSNP uses the local batch ID to associate the VCF submission with the ASSAY, PUBLICATION, and METHOD meta data.</span>
</td>
</tr>
<tr>
<th scope="row">
<span>##bioproject_id= </span>
</th>
<td>
<span>A registered BioProject ID if available.</span>
</td>
</tr>
<tr>
<th scope="row">
<span>##biosample_id= </span>
</th>
<td>
<span>A comma separated list of registered <a href="https://www.ncbi.nlm.nih.gov/biosample/">BioSample</a> IDs. We encourage submitters to register their samples with BioSample and provide detail descriptions such as traits and phenotype. In this example of two Biosample records, the ID numbers are 423 and 1595.</span>
</td>
</tr>
<tr>
<th scope="row">
<span>##reference= </span>
</th>
<td>
<span>The RefSeq Assembly accession.version on which the variation position is based: i.e. GCF_000001405.12. You can find this ID by accessing NCBIs <a href="https://www.ncbi.nlm.nih.gov/assembly/">Genome Assembly Resource</a> and search for the record of the specific assembly. You can use the organism or assembly name(e.g. GRCh37) as your search term: the assembly record for GRCh37 shows the RefSeq ID is GCF_000001405.12. Only the accession.version for a fully assembled genome can be reported here. For unassembled and unplaced contigs, leave this tag blank and use the reporting method for INSDC sequence coordinates as shown in the example below (VCF Data Table Examples B) for the CHROM column.</span>
</td>
</tr>
</tbody>
</table>
<h4>
Example of dbSNP Metadata in a VCF formatted file:</h4>
<p>
<span>##fileformat=VCFv4.1<br />
##fileDate=20120215<br />
##handle=MYSEQ_SNP<br />
##batch=Exome_SNP_Discovery<br />
##bioproject_id=60153<br />
##biosample_id=423, 1595<br />
##reference=GCF_000001405.12</span>
</p>
<h3>
INFO Tag Descriptions</h3>
<p>
The VCF header continues with tag/value descriptions for required and optional dbSNP INFO tags. These descriptions should be placed in the header following the required metadata. The INFO tag/value descriptions you provide in the VCF header will serve to define the data you place in the INFO column of the data table. These descriptions are an important part of the VCF header as they will allow users viewing your data in VCF format to identify a tag you placed in the INFO column and see definitions for values of that tag. The data you present in the INFO column of the data table will be meaningless to some users without the inclusion of the tag/value descriptions in the VCF header for those data.</p>
<h4>
Descriptions for Required INFO Tag</h4>
<p>
Currently, the only required INFO tag for a dbSNP submission is the <a href="#variation_type_info_tag">Variation Type (VRT) tag</a>. Place the VRT tag description in the VCF file header after the required metadata. The VRT tag is required for each variant submitted in VCF format.<strong> Failure to include this required INFO tag will result in the delay of your submission</strong>.<br />
See the <a href="#required_dbsnp_vcf_info_tag">dbSNP Required INFO Tag Description and Example section</a> of this document for example tag descriptions you can cut and paste into the VCF file header for both the required INFO tags and the optional INFO tags.</p>
<h4>
Descriptions for Optional INFO Tags</h4>
<p>
Place descriptions for the optional INFO tags in the VCF file header after the required metadata. These descriptions identify and define the optional INFO tags you have elected to use in the data table portion of the file. See the <a href="#optional_dbsnp_vcf_info_tags">dbSNP Optional INFO Tag Descriptions and Examples section</a> of this document for example tag descriptions you can cut and paste into the VCF file header for both the required INFO tags and the optional INFO tags.</p>
<h3 id="vcf_submission_data_table">VCF Submission Data Table</h3>
<p>
Create a tab-delimited table to house your variations and variation data for your submission. The table header should include these six fixed, mandatory columns (in order):<br />
<span>#CHROM           POS           ID           REF           ALT           INFO</span><br />
The above columns represent six fixed fields that must be filled out for each submitted variant. If you do not have data for a particular field, use a dot (“.”) to represent the missing value.</p>
<h3>
VCF Data Table Field Values<br />
 </h3>
<h4>
#CHROM</h4>
<p>
This field contains the chromosome identifier from the reference genome where the variant is located or an angle-bracketed ID String ("&lt;ID&gt;") pointing to a contig in the assembly file. (cf. the ##assembly line in the header). Entries for a specific CHROM should form a contiguous block within the VCF file. Alternatively, the sequence accession and version can be used for this field if the variation position is based on a non-chromosomal sequence (see example B above). Do not use the colon symbol (:) in a chromosome name.</p>
<h4>
#POS</h4>
<p>
This field contains the reference position of the variant, which is the 1st base of the variation event. Positions are sorted numerically within each reference sequence chromosome (CHROM) in increasing order. You are permitted to have multiple records of different variation type (VRT) at the same POS. Telomeres are indicated by using positions 0 or N+1, where N is the length of the corresponding chromosome or contig.<br />
<strong>Note:</strong> For short, simple insertions and deletions in which the REF or one of the ALT alleles would otherwise be null/empty, the POS field must contain the coordinates of the base preceding the indel event. See the <a href="#submission_data_table_special_case_examples">Submission Data Table Special Case Examples</a> section of this document for instruction on reporting insertion/deletion POS values.<br />
<br />
Large indels and structural variants must be submitted to <a href="https://www.ncbi.nlm.nih.gov/dbvar/">dbVAR</a>.</p>
<h4>
ID</h4>
<p>
This field contains the unique local ID (LID) of the variant, and <strong>is a required value</strong> <strong>(cannot be NULL)</strong>.The LID provided here combined with the handle must be unique for a particular submitter. You can use an <a href="https://www.hgvs.org/mutnomen/recs.html">HGVS expression</a> for the variant ID if you do not have a unique identifier of your own.</p>
<h4>
REF</h4>
<p>
This field contains the reference allele of the variant. The bases representing the reference allele can be any of the following: A, C, G, T (case insensitive).<br />
<strong>Note: In order for the variant to be included in dbSNP, the maximum length for the REF allele is 51bp.</strong><br />
<br />
<strong>Note: </strong>For short, simple insertions and deletions in which the REF or one of the ALT alleles would otherwise be null/empty, the REF and ALT Strings must include the base preceding the indel event. See the <a href="#submission_data_table_special_case_examples">Submission Data Table Special Case Examples</a> section of this document for instruction on reporting indel reference (REF) alleles.</p>
<h4>
ALT</h4>
<p>
This field contains a comma separated list of alternate, non-reference alleles that you have called in at least one sample. You can use A, C, G, or T (case insensitive) or you can use an angle-bracketed ID String (”&lt;ID&gt;”).. <strong>Note: In order for the variant to be included in dbSNP, the maximum length of each ALT allele is 51bp.</strong></p>
<p><strong>Note: </strong>For short, simple insertions and deletions in which the REF or one of the ALT alleles would otherwise be null/empty, the REF and ALT Strings must include the base preceding the indel event. See the <a href="#submission_data_table_special_case_examples">Submission Data Table Special Case Examples section</a> of this document for instruction on reporting indel alternate (ALT) alleles.</p>
<h4>
QUAL</h4>
<p>
This field contains the quality score for the assertion if available.</p>
<h4>
FILTER</h4>
<p>
This field contains the filter status if available.</p>
<h4>
INFO</h4>
<p>
This field contains additional information for the reported variation. INFO fields are encoded as a semicolon-separated series of short keys with optional values in the format: &lt;key&gt;=&lt;data&gt;[,data] See the <a href="#info_tag_descriptions_and_examples">INFO Tag Descriptions and Examples section</a> of this document for examples of the required and optional INFO Tags that dbSNP supports.</p>
<h4>
VCF Data Table Examples<br />
 </h4>
<h5>
A.  Reporting positions using chromosome coordinates (provide the reference tag in the header if the assembly and version is known).</h5>
<table border="1" cellpadding="1" cellspacing="1" height="154" width="768">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>23</span>
</td>
<td>
<span>135498962</span>
</td>
<td>
<span>NG_021219.1:g.120841A&gt;G</span>
</td>
<td>
<span>A</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>29</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1</span>
</td>
</tr>
<tr>
<td>
<span>23</span>
</td>
<td>
<span>135499109</span>
</td>
<td>
<span>NG_021219.1:g.120988G&gt;A</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>A</span>
</td>
<td>
<span>40</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1</span>
</td>
</tr>
<tr>
<td>
<span>23</span>
</td>
<td>
<span>135499270</span>
</td>
<td>
<span>NG_021219.1:g.121149C&gt;T</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>51</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1</span>
</td>
</tr>
<tr>
<td>
<span>23</span>
</td>
<td>
<span>135499419</span>
</td>
<td>
<span>NG_021219.1:g.121298G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>68</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1</span>
</td>
</tr>
</tbody>
</table>
<h5><br />
B.  Reporting positions using <a href="https://www.insdc.org/">INSDC</a> sequence (i.e. GenBank sequence with accession) coordinates if assembly is not known.</h5>
<table border="1" cellpadding="1" cellspacing="1" height="154" width="1019">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>NG_021219.1</span>
</td>
<td>
<span>140860</span>
</td>
<td>
<span>SNV1</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>29</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1</span>
</td>
</tr>
<tr>
<td>
<span>NG_021219.1</span>
</td>
<td>
<span>140879</span>
</td>
<td>
<span>SNV2</span>
</td>
<td>
<span>A</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>40</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1</span>
</td>
</tr>
<tr>
<td>
<span>NG_021219.1</span>
</td>
<td>
<span>140921</span>
</td>
<td>
<span>SNV3</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>51</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1</span>
</td>
</tr>
<tr>
<td>
<span>NG_021219.1</span>
</td>
<td>
<span>140939</span>
</td>
<td>
<span>SNV4</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>68</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1</span>
</td>
</tr>
</tbody>
</table>
<h5><br />
C) Reporting positions using non-<a href="https://www.insdc.org/">INSDC</a> sequence coordinates or sequence yet to be submitted to GenBank<br />
<br />
Users that submit variations with an asserted location on a sequence that is being submitted simultaneously to GenBank or has a pending GenBank submission accession assignment can temporarily report the variants asserted location based on the local (user-defined) sequence ID with the following additional requirements:</h5>
<ul>
<li>
Provide the 5 and 3 flanking sequence surrounding the variation. A minimum of 25bp is required for each 5 and 3 flanking sequence provided in the INFO tag FLANK-5 and FLANK-3, respectively (see example below)</li>
<li>
Upon receiving GenBank accession numbers, the submitter can: 1) update the VCF and replace the local sequence ID with the corresponding GenBank accession or 2) provide a tab-delimited file containing the GenBank accession for each the local sequence ID per row.</li>
</ul>
<table border="1" cellpadding="1" cellspacing="1" height="154" width="1143">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>My_Seq_ID_1</span>
</td>
<td>
<span>140860</span>
</td>
<td>
<span>SNV1</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>29</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1;FLANK-5=TGCAACAATCTGGGCTATGAGATCA;FLANK-3=TAAAGTCAGAGCCAAAAGAAGCAGC</span>
</td>
</tr>
<tr>
<td>
<span>My_Seq_ID_1</span>
</td>
<td>
<span>140979</span>
</td>
<td>
<span>SNV2</span>
</td>
<td>
<span>A</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>40</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1;FLANK-5=TTAACTAGCTTGGTTGCTGAACACC;FLANK-3=GGTTAGGCTCTCAAATTACCCTCTG</span>
</td>
</tr>
<tr>
<td>
<span>My_Seq_ID_1</span>
</td>
<td>
<span>141921</span>
</td>
<td>
<span>SNV3</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>51</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1;FLANK-5=TGCAACAATCTGGGCTATGAGATCA;FLANK-3=AGGCTGGTGAGCATTCTGGGCTAAA</span>
</td>
</tr>
<tr>
<td>
<span>My_Seq_ID_1</span>
</td>
<td>
<span>149939</span>
</td>
<td>
<span>SNV4</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>68</span>
</td>
<td>
<span>PASS</span>
</td>
<td>
<span>VRT=1;FLANK-5=GACACCATGGTGCATCTGACTCCTG;FLANK-3=GGAGAAGTCTGCCGTTACTGCCCTG</span>
</td>
</tr>
</tbody>
</table>
<h3>
 </h3>
<h4 id="submission_data_table_special_case_examples">Submission Data Table Special Case Examples:
 </h4>
<h5>
Reporting POS, REF and ALT for insertion/deletion variants</h5>
<p>
For simple insertions and deletions where either the REF or one of the ALT alleles would otherwise be null/empty, include the base preceding the variation event (a “padding base”) in the REF and ALT allele Strings, and report the coordinates of this “padding base” in POS. The “padding base” is not required for complex substitutions or other events where all alleles have at least one base represented in their Strings.</p>
<h6>
Insertion Example</h6>
<p>
<img src="/core/assets/snp/images/vcf_insertion_example.png" alt="vcf insertion example" />
</p>
<p>
If the coordinates of first base of the insertion event (“G” at position 43219) in the above sequence were used as the reference position (POS) of this event, the REF field would have no value since the inserted bases are only present in the ALT allele. In such a case, report the coordinates of the base that precedes the insertion event— the “t” at position 43218 — for POS and include this “padding base” in the REF and ALT Strings:</p>
<table border="1" cellpadding="1" cellspacing="1" height="145" width="852">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>10</span>
</td>
<td>
<span>
<strong>
<span>43218</span>
</strong>
</span>
</td>
<td>
<span>NC_0000010.10:g.43218_43219insGGAGTTC</span>
</td>
<td>
<span>
<strong>
<span>T</span>
</strong>
</span>
</td>
<td>
<span><span><strong>T</strong></span>GGAGTTC</span>
</td>
<td>
<span>    .</span>
</td>
<td>
<span>     .</span>
</td>
<td>
<span>VRT=2;ANC=T;NIO=12;SSR=0;SAO=0;SCS=0</span>
</td>
</tr>
</tbody>
</table>
<h6><br /><br />
Deletion Example</h6>
<p>
<img src="/core/assets/snp/images/vcf_deletion_example.png" alt="vcf deletion example" />
</p>
<p>
If the coordinates of first base of this deletion event (“A” at position 701132) in the above sequence were used as the reference position (POS) of this variant, the ALT field would have no value since the deleted bases are only present in the reference (REF) allele. In such a case, report the coordinates of the base that precedes the deletion event— the “a” at position 701131 — for POS and include this “padding base” in the REF and ALT Strings:</p>
<table border="1" cellpadding="1" cellspacing="1" height="145" width="822">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>15</span>
</td>
<td>
<span>
<strong>
<span>701131</span>
</strong>
</span>
</td>
<td>
<span>NC_000015.9:g.701132_701134delATA</span>
</td>
<td>
<span><strong><span>A</span></strong>ATA</span>
</td>
<td>
<span>
<strong>
<span>A</span>
</strong>
</span>
</td>
<td>
<span>    .</span>
</td>
<td>
<span>     .</span>
</td>
<td>
<span>VRT=2;ANC=A;NIO=5;SSR=0;SAO=0;SCS=0</span>
</td>
</tr>
</tbody>
</table>
<h3 id="info_tag_descriptions_and_examples">INFO Tag Descriptions and Examples</h3>
<h4 id="required_dbsnp_vcf_info_tag">Required dbSNP VCF INFO Tag</h4>
<p>
Place the required tag in the INFO column of the <a href="#vcf_submission_data_table">data table</a> and place the corresponding tag description in thefile header.</p>
<h4 id="variation_type_info_tag">Variation Type (VRT) INFO Tag</h4>
<h5>
The required “VRT” INFO tag allows you to define the kind of variation you are submitting to dbSNP. We use this information to verify position and that the reported alleles are consistent with reported variation type.<br />
<br />
<strong>Note:</strong> Only one variation type (VRT) can be reported per row. For instance, if you have a deletion variation and a SNV at the same location, they should be reported in two separate rows with the corresponding VRT value.<br />
<br />
<strong>Failure to include this required INFO tag will result in the delay of your submission.</strong></h5>
<h5><br />
VRT Tag/Value Description</h5>
<p>
<span>##INFO=&lt;ID=VRT,Number=1,Type=Integer,Description="Variation type,1 - SNV: single nucleotide variation,2 - DIV: deletion/insertion variation,3 - HETEROZYGOUS: variable, but undefined at nucleotide level,4 - STR: short tandem repeat (microsatellite) variation, 5 - NAMED: insertion/deletion variation of named repetitive element,6 - NO VARIATON: sequence scanned for variation, but none observed,7 - MIXED: cluster contains submissions from 2 or more allelic classes (not used),8 - MNV: multiple nucleotide variation with alleles of common length greater than 1,9 - Exception"&gt;</span>
</p>
<h5>
VRT Data Format Example</h5>
<table border="1" cellpadding="1" cellspacing="1" height="63" width="960">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>1</span>
</td>
<td>
<span>140860</span>
</td>
<td>
<span>NC_000001.10:g140860T&gt;C</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>
<strong>
<span>VRT=1</span>
</strong>
</span>
</td>
</tr>
</tbody>
</table>
<h4 id="optional_dbsnp_vcf_info_tags">Optional dbSNP VCF INFO Tags</h4>
<p>
The following INFO tags are optional and need only be used if they describe available data. If you want to include any of the following INFO tags with your submitted data, place the tag in the INFO column of the data table and place the corresponding tag description in the file header. Optional VCF INFO tags for dbSNP submissions include:<br />
<br />
<a href="#alternate_designations">Alternate Designations</a><br />
<a href="#ancestral_allele">Ancestral Allele</a><br />
<a href="#free_text_for_comment">Free Text for Comment</a><br />
<a href="#linkout">LinkOut</a><br />
<a href="#NIO">Number of Independent Observations</a><br />
<a href="#OMIM">OMIM/OMIA Record</a><br />
<a href="#population_ID">Population ID</a><br />
<a href="#pmid">PubMed ID</a><br />
<a href="#sao">Variant Allele Origin</a><br />
<a href="#ssr">Variant Suspect Reason</a></p>
<h5 id="alternate_designations">
Alternate Designations (AD) or Names
</h5>
<p>
The optional “AD” INFO tag allows you to provide dbSNP with a (comma separated) set of alternative names or common names used to describe the same submitted variant</p>
<h6>
AD Tag/Value Description</h6>
<p>
<span>##INFO=&lt;ID=AD,Number=1,Type=String,Description=”Alternate designations; a set of (comma separated)alternative names used to describe the same variant”&gt;</span>
</p>
<h6>
AD Tag/Value Example</h6>
<table border="1" cellpadding="1" cellspacing="1" height="63" width="1150">
<thead>
<tr>
<th scope="col">
<p>
#CHROM</p>
</th>
<th scope="col">
<p>
POS</p>
</th>
<th scope="col">
<p>
ID</p>
</th>
<th scope="col">
<p>
REF</p>
</th>
<th scope="col">
<p>
ALT</p>
</th>
<th scope="col">
<p>
QUAL</p>
</th>
<th scope="col">
<p>
FILTER</p>
</th>
<th scope="col">
<p>
INFO</p>
</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>10</span>
</td>
<td>
<span>134017295</span>
</td>
<td>
<span>NC_000010.10:g.134017295A&gt;G</span>
</td>
<td>
<span>A</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>        .</span>
</td>
<td>
<span>          .</span>
</td>
<td>
<span>VRT=1;ANC=T; NIO=12;<strong><span>AD=SNP-12313,chr10:134017295A&gt;G;</span></strong></span>
</td>
</tr>
</tbody>
</table>
<h5>
 </h5>
<h5 id="ancestral_allele">
Ancestral Allele (ANC)
</h5>
<h6>
The optional “ANC” INFO tag allows you to provide dbSNP with the ancestral allele (if you know it) for a variant.</h6>
<h5>
 </h5>
<h6>
ANC Tag/Value Description</h6>
<h6>
<span>
<span>##INFO=&lt;ID=ANC,Number=1,Type=String,Description=”Provide Ancestral Allele if known”&gt;</span>
</span>
</h6>
<h5>
 </h5>
<h6>
ANC Tag/Value Example</h6>
<table border="1" cellpadding="1" cellspacing="1" height="63" width="1150">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>8</span>
</td>
<td>
<span>19863</span>
</td>
<td>
<span>NC_000008.10:g.19863G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>         .</span>
</td>
<td>
<span>          .</span>
</td>
<td>
<span>VRT=1;<strong><span>ANC=T;</span></strong></span>
</td>
</tr>
</tbody>
</table>
<h5 id="free_text_for_comment">
Free Text for Comment (CMT)
</h5>
<p>
The optional “CMT” INFO tag allows you to provide dbSNP with text about any additional important information that cannot be described (e.g. phenotypic information) using the other available INFO tags</p>
<h6>
CMT Tag/Value Description</h6>
<p>
<span>##INFO=&lt;ID=CMT=1,Type=String,Description=”Comment”&gt;<br />
</span>
</p>
<h6>
CMT Data Format Example</h6>
<table border="1" cellpadding="1" cellspacing="1" height="63" width="1150">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>8</span>
</td>
<td>
<span>19863</span>
</td>
<td>
<span>NC_000008.10:g.19863G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>   .   </span>
</td>
<td>
<span>     .    </span>
</td>
<td>
<span>VRT=1;<strong><span>CMT=”A variant ident SLC10A1 gene with possible correlation to disease susceptibilities(PMID: 124)</span></strong></span>
</td>
</tr>
</tbody>
</table>
<h5 id="linkout">
LinkOut (LKO)
</h5>
<p>
The optional “LKO” INFO tag allows you to point to this variant on your organizations web site or to other relevant online information about your submission.</p>
<h6>
LKO Tag/Value Description</h6>
<p>
<span>##INFO=&lt;ID=LKO,Number=1,Type=String,Description=”A link out URL for this variant on the submitting organization's website”&gt;<br />
</span>
</p>
<h6>
LKO Data Format Example:</h6>
<table border="1" cellpadding="1" cellspacing="1" height="63" width="1150">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>8</span>
</td>
<td>
<span>19863</span>
</td>
<td>
<span>NC_000008.10:g.19863G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span> .</span>
</td>
<td>
<span>   .</span>
</td>
<td>
<span>VRT=1;ANC=T;<strong><span>LKO=http://variantgps.nci08</span></strong>;</span>
</td>
</tr>
</tbody>
</table>
<h5 id="NIO">
Number of Independent Observations (NIO)
</h5>
<p>
The optional “NIO” INFO tag allows you to provide dbSNP with the number of times you observed this variant occur independently in your experimental analysis.</p>
<h6>
NIO Tag/Value Description</h6>
<p>
<span>##INFO=&lt;ID=NIO,Number=1,Type=Integer,Description=”Number of Independent Observations;the number of times the submitter observed this variant occurring independently”&gt;</span>
</p>
<h6>
NIO Tag/Value Example</h6>
<table border="1" cellpadding="1" cellspacing="1" height="63" width="1150">
<thead>
<tr>
<th scope="col">
<strong>#CHROM</strong>
</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>8</span>
</td>
<td>
<span>19863</span>
</td>
<td>
<span>NC_000008.10:g.19863G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;ANC=T;<strong><span>NIO=12</span></strong>;</span>
</td>
</tr>
</tbody>
</table>
<h5 id="OMIM">
OMIM and OMIA (OMIM/OMIA) Records
</h5>
<h6>
OMIM and OMIA Tag/Value Descriptions</h6>
<p>
The optional "OMIM" and "OMIA" INFO tags allow you to provide dbSNP with any available <a href="https://omim.org">OMIM</a> or <a href="https://www.ncbi.nlm.nih.gov/omia">OMIA</a> record and variant ID (if available) associated with a variant.</p>
<h6>
OMIM:</h6>
<p>
<span>##INFO=&lt;ID=OMIM,Number=1,Type=String,Description=”Provide OMIM record and variant ID if available i.e. 300746.0001”&gt;<br />
</span>
</p>
<h6>
OMIA:</h6>
<p>
<span>##INFO=&lt;ID=OMIA,Number=1,Type=String,Description=”Provide OMIA record and variant ID if available i.e. 000011-9615”&gt;<br />
OMIM and OMIA Data Format Example</span>
</p>
<table border="1" cellpadding="1" cellspacing="1" height="63" width="1150">
<thead>
<tr>
<th scope="col">
<strong>#CHROM</strong>
</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>16</span>
</td>
<td>
<span>919982</span>
</td>
<td>
<span>NC_000016.9:g.919982G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;ANC=T;<strong><span>OMIM=300746.0001</span></strong>;</span>
</td>
</tr>
</tbody>
</table>
<h4>
 </h4>
<h4 id="population_ID">Population IDs (for Allele Frequency, Genotype Frequency, or Observed Heterozygosity data submissions)</h4>
<p>
If you intend to report allele frequency, genotype frequency, or observed heterozygosity in your VCF formatted dbSNP submission, place the population ID for each assayed population in the VCF header after the INFO Tag/Value descriptions, and before your data table. The POP IDs you will provide in the VCF header are the same ones you placed in the ID field of the Meta File.</p>
<h5>
Population_ID Tag/Value Description</h5>
<p><span>##population_id=&lt;A unique local population ID e.g. “HapMap”, “Case”, “Control”, “Healthy Blood Donors”&gt;</span> Use the same value placed in the ID field of the Meta file POPULATION section where the population details are described.</p>
<h5>
Population_ID Example</h5>
<p>
<span>##INFO=&lt;ID=GEN_FRQ,Number=1,Type=string,Description=”Report population, sample size (number of distinct chromosomes assayed), and frequency for each genotype<br />
<span>##population_id=EUROPEAN<br />
##population_id=AFRICAN</span><br />
</span>
</p>
<h4 id="pmid">PubMed ID (PMID) INFO Tag</h4>
<p>
The optional “PMID” INFO tag allows you to provide dbSNP with the PubMed ID (if available) for an original publication associated with a variant. If multiple PubMed IDs (PMID) are available for a single variant, report them using a comma separated list (see example below). Report PMIDs for multiple variants as a batch in the <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#Assay">ASSAY</a> and <a href="/snp/docs/submission/hts_submission_formatting_intro_meta_formatting/#publication">PUBLICATION</a> meta files. </p>
<h5>
PMID Tag/Value Description</h5>
<p>
<span>##INFO=&lt;ID=PMID,Number=.,Type= Integer,Description=”PubMed ID linked to variation if available”&gt;<br />
</span>
</p>
<h5>
PMID Data Format Example</h5>
<table border="1" cellpadding="1" cellspacing="1" height="64" width="987">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>16</span>
</td>
<td>
<span>919982</span>
</td>
<td>
<span>NC_000016.9:g.919982G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;ANC=T;<strong><span>PMID=21840003</span></strong>;</span>
</td>
</tr>
</tbody>
</table>
<h4 id="sao">Variant Allele Origin (SAO) INFO Tag</h4>
<p>
The optional “SAO” or “Variant Allele Origin” INFO tag allows you to provide dbSNP with the source of the sample from which the variant was derived.</p>
<p><strong>Note:</strong> Although the name we use to refer to Allele Origin has changed from “SNP Allele Origin” (SAO) to “Variant Allele Origin” to emphasize that the dbSNP database contains both rare and polymorphic variants, the database itself still uses the acronym “SAO”.</p>
<h5>
SAO Tag/Value Description</h5>
<p>
<span>##INFO=&lt;ID=SAO,Number=.,Type=Integer,Description=”Variant Allele Origin: 0 - unspecified, 1 - Germline, 2 - Somatic, 3 Both”&gt;<br />
</span>
</p>
<h5>
SAO Data Format Example</h5>
<table border="1" cellpadding="1" cellspacing="1" height="64" width="1006">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>16</span>
</td>
<td>
<span>919982</span>
</td>
<td>
<span>NC_000016.9:g.919982G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;ANC=T;<strong><span>SAO=1</span></strong>;</span>
</td>
</tr>
</tbody>
</table>
<p><br /><strong>Note: </strong>If you are providing more than one allele origin value, place the allele origin values in a comma separated list in the order that they appear in the submission. List the value for the for the reference allele first, followed by the allele origin value for the 1st alternate allele, 2nd alternate allele, etc.</p>
<h4 id="ssr">Variant Suspect Reason (SSR) INFO Tag</h4>
<p>
The optional “SSR” or “SNP Suspect Reason” INFO tag allows you to provide dbSNP with the reason you suspect that a variant is a false positive. Evidence for false positives can include information indicating the presence of a paralogous sequence in the genome (<a href="https://www.ncbi.nlm.nih.gov/pubmed/19877174">Musumeci et al. 2010</a>) (<a href="https://www.ncbi.nlm.nih.gov/pubmed/21030649">Sudmant et al. 2010</a>), or evidence of sequencing error or computation artifacts.</p>
<p><strong>Note:</strong> Although the name we use to refer to the Suspect Reason code has changed from “SNP Suspect Reason” (SSR) to “Variant Suspect Reason” to emphasize that the dbSNP database contains both rare and polymorphic variants, the database itself still uses the acronym “SSR”.</p>
<h5>
SSR Tag Description</h5>
<p>
##INFO=&lt;ID=SSR,Number=.,Type=Integer,Description=”Variant Suspect Reason Code, 0 - unspecified, 1 - Paralog, 2 - byEST, 3 - Para_EST, 4 - oldAlign, 5 other”&gt;</p>
<h5>
SSR Data Format Example</h5>
<table border="1" cellpadding="1" cellspacing="1" height="64" width="1069">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>16</span>
</td>
<td>
<span>919982</span>
</td>
<td>
<span>NC_000016.9:g.919982G&gt;C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;ANC=T;<span><strong>SSR=1</strong></span>;</span>
</td>
</tr>
</tbody>
</table>
<h4>
 </h4>
<h4 id="genotype_format_example">Genotype Format Example</h4>
<p>
The format for reporting genotypes is found in the <a href="https://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41">genotype submission example</a> provided by 1000 Genome Project in their description of VCF version 4.1.</p>
<h4 id="allele_frequency_format_example">Allele Frequency Format Examples</h4>
<p>
The format for reporting allele frequency follows the convention for reporting for genotype.</p>
<ul>
<li>
Add a reporting FORMAT column to specify the data type and order. Suggested data types are listed below.</li>
</ul>
<p>
           <span>##FORMAT=&lt;ID=NA,Number=1,Type=Integer,Description="Number of alleles for the population."<br />
       ##FORMAT=&lt;ID=NS,Number=1,Type=Integer,Description="Number of samples for the population."<br />
       ##FORMAT=&lt;ID=FRQ,Number=.,Type=Float,Description="Frequency of each alternate allele."<br />
       ##FORMAT=&lt;ID=AC,Number=.,Type=Integer,Description="Allele count for each alternate allele."</span></p>
<ul>
<li>
Add additional column for each population</li>
<li>
Report under the population column the total allele count (NA) or population samples (NS) follow by the allele frequency (FRQ) or allele count (AC) separated by a colon :</li>
<li>
Below are examples for reporting allele frequencies for a novel variant (row 1) and for a known variant with a dbSNP RS number (row 2) reported in the ID column.</li>
</ul>
<table border="1" cellpadding="1" cellspacing="1" height="114" width="1052">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
<th scope="col">
FORMAT</th>
<th scope="col">
EUROPEAN</th>
<th scope="col">
AFRICAN</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>X</span>
</td>
<td>
<span>140879</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>A</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1; LID=SNV:chrX:140879</span>
</td>
<td>
<span>NA:FRQ</span>
</td>
<td>
<span>1424:0.001</span>
</td>
<td>
<span>1200:0.05</span>
</td>
</tr>
<tr>
<td>
<span>8</span>
</td>
<td>
<span>19962213</span>
</td>
<td>
<span>rs328</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>NA:FRQ</span>
</td>
<td>
<span>178:0.101</span>
</td>
<td>
<span>224:0.045</span>
</td>
</tr>
</tbody>
</table>
<h4 id="appendix_example_of_vcf_submission">Appendix: Example of a VCF Formatted dbSNP Submission</h4>
<p>
<br />
<span>##fileformat=VCFv4.1<br />
##fileDate=20120215<br />
##handle=MYSEQ_SNP<br />
##batch=Exome_SNP_Discovery<br />
##reference=GCF_000001405.12<br />
##INFO=&lt;ID=VRT,Number=1,Type=Integer,Description=”Variation type, 1 - SNV: single nucleotide variation, 2 - DIV: deletion/insertion variation, 3 - HETEROZYGOUS: variable, but undefined at nucleotide level, 4 - STR: short tandem repeat (microsatellite) variation, 5 - NAMED: insertion/deletion variation of named repetitive element, 6 - NO VARIATON: sequence scanned for variation, but none observed, 7 - MIXED: cluster contains submissions from 2 or more allelic classes, 8 - MNV: multiple nucleotide variation with alleles of common length greater than 1, 9 Exception”&gt;<br />
##INFO=&lt;ID=LID, Number=1,Type=string, Description=”Unique local variation ID or name for display. The LID provided here combined with the handle must be unique for a particular submitter. An HGVS expression (http://www.hgvs.org/mutnomen/recs.html) can be used here”&gt;<br />
##FORMAT=&lt;ID=NA,Number=1,Type=Integer,Description="Number of alleles for the population."<br />
##FORMAT=&lt;ID=NS,Number=1,Type=Integer,Description="Number of samples for the population."<br />
##FORMAT=&lt;ID=FRQ,Number=.,Type=Float,Description="Frequency of each alternate allele."<br />
##FORMAT=&lt;ID=AC,Number=.,Type=Integer,Description="Allele count for each alternate allele."<br />
##population_id=EUROPEAN<br />
##population_id=AFRICAN</span>
</p>
<table border="1" cellpadding="1" cellspacing="1" height="155" width="1082">
<thead>
<tr>
<th scope="col">
#CHROM</th>
<th scope="col">
POS</th>
<th scope="col">
ID</th>
<th scope="col">
REF</th>
<th scope="col">
ALT</th>
<th scope="col">
QUAL</th>
<th scope="col">
FILTER</th>
<th scope="col">
INFO</th>
<th scope="col">
FORMAT</th>
<th scope="col">
EUROPEAN</th>
<th scope="col">
AFRICAN</th>
</tr>
</thead>
<tbody>
<tr>
<td>
<span>X</span>
</td>
<td>
<span>140860</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;LID=SNV:chrX:140860;</span>
</td>
<td>
<span>NA:FRQ</span>
</td>
<td>
<span>1424:0.056</span>
</td>
<td>
<span>.</span>
</td>
</tr>
<tr>
<td>
<span>X</span>
</td>
<td>
<span>140879</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>A</span>
</td>
<td>
<span>G</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;LID=SNV:chrX:140879;</span>
</td>
<td>
<span>NA:FRQ</span>
</td>
<td>
<span>1424:0.001</span>
</td>
<td>
<span>1200:0.05</span>
</td>
</tr>
<tr>
<td>
<span>X</span>
</td>
<td>
<span>140921</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;LID=SNV:chrX:140921;</span>
</td>
<td>
<span>NA:FRQ</span>
</td>
<td>
<span>1424:0.003</span>
</td>
<td>
<span>1200:0.002</span>
</td>
</tr>
<tr>
<td>
<span>X</span>
</td>
<td>
<span>140939</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>C</span>
</td>
<td>
<span>T</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>.</span>
</td>
<td>
<span>VRT=1;LID=SNV:chrX:140939;</span>
</td>
<td>
<span>NA:FRQ</span>
</td>
<td>
<span>1424:0.01</span>
</td>
<td>
<span>.</span>
</td>
</tr>
</tbody>
</table>
</div>
<!--/.col1-->
<div class="col2">
</div>
<!--/.col2-->
<div class="col3">
</div>
<!--/.col3-->
<div class="col4">
</div>
<!--/.col4-->
<div class="col5">
</div>
<div class="col6">
</div>
<div class="col7">
</div>
<div class="col8">
</div>
<div class="col9">
</div>
</div><!--/.content-->
</div><!--/.container-->
<div id="NCBIFooter_dynamic">
<div class="breadcrumbs">You are here:
<span id="breadcrumb_text"><a href="/guide/">NCBI</a></span></div>
<a id="help-desk-link" class="help_desk" href="https://support.ncbi.nlm.nih.gov/ics/support/default.asp?Time=2025-03-06T10:14:13-05:00&amp;Snapshot=%2Fprojects%2Fstaticsites%2Fsnp%2Fsnpdoc@1.10&amp;Host=portal107&amp;ncbi_phid=CE8EEC6D7C9B8A0100000000006B005B&amp;ncbi_session=CE8B5AF87C7FFCB1_0191SID&amp;from=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fsnp%2Fdocs%2Fsubmission%2Fvcf_submission_guidelines%2F&amp;Ncbi_App=snp&amp;Page=static&amp;style=classic&amp;deptID=28049" target="_blank">Support Center</a>
<noscript><img alt="" src="/stat?jsdisabled=true&amp;ncbi_app=snp&amp;ncbi_db=&amp;ncbi_pdid=static&amp;ncbi_phid=CE8EEC6D7C9B8A0100000000006B005B" /></noscript>
</div>
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" class="footer" id="footer" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="icon-section">
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
<div class="grid-container container">
<div class="icon-section_container">
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter">
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
<title>Twitter</title>
<g id="twitterx1008">
<path id="path1008" d="M6.06736 7L16.8778 20.8991L6.00001 32.2H10.2L18.6 23.1L25.668 32.2H34L22.8 17.5L31.9 7H28.4L20.7 15.4L14.401 7H6.06898H6.06736ZM9.66753 8.73423H12.9327L29.7327 30.4658H26.5697L9.66753 8.73423Z" fill="#5B616B"></path>
</g>
</svg>
</a>
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>Facebook</title>
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>LinkedIn</title>
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11,
.cls-12 {
fill: #737373;
}
.cls-11 {
fill-rule: evenodd;
}
</style>
</defs>
<title>GitHub</title>
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
</path>
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
</path>
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
</path>
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
</path>
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
</path>
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
</path>
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
</svg></a>
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
<defs><style>.cls-1{fill:#737373;}</style></defs>
<title>NCBI Insights Blog</title>
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
</svg>
</a>
</div>
</div>
</section>
<section class="container-fluid bg-primary">
<div class="container pt-5">
<div class="row mt-3">
<div class="col-lg-3 col-12">
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
<ul class="list-inline social_media">
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Twitter</title>
<g id="twitterx1009" clip-path="url(#clip0_65276_3946)">
<path id="Vector_Twitter" d="M17.5006 34.6565C26.9761 34.6565 34.6575 26.9751 34.6575 17.4996C34.6575 8.02416 26.9761 0.342773 17.5006 0.342773C8.02514 0.342773 0.34375 8.02416 0.34375 17.4996C0.34375 26.9751 8.02514 34.6565 17.5006 34.6565Z" fill="#205493" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
<path id="path1009" d="M8.54811 8.5L16.2698 18.4279L8.50001 26.5H11.5L17.5 20L22.5486 26.5H28.5L20.5 16L27 8.5H24.5L19 14.5L14.5007 8.5H8.54927H8.54811ZM11.1197 9.73873H13.4519L25.4519 25.2613H23.1926L11.1197 9.73873Z" fill="white"></path>
</g>
<defs>
<clipPath id="clip0_65276_3946">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Facebook</title>
<g id="Facebook" clip-path="url(#clip0_1717_1086)">
<path id="Vector_Facebook" d="M15.1147 29.1371C15.1147 29.0822 15.1147 29.0296 15.1147 28.9747V18.9414H11.8183C11.6719 18.9414 11.6719 18.9414 11.6719 18.8018C11.6719 17.5642 11.6719 16.3289 11.6719 15.0937C11.6719 14.9793 11.7062 14.9518 11.816 14.9518C12.8683 14.9518 13.9206 14.9518 14.9751 14.9518H15.1215V14.8329C15.1215 13.8057 15.1215 12.774 15.1215 11.7492C15.1274 10.9262 15.3148 10.1146 15.6706 9.37241C16.1301 8.38271 16.9475 7.60378 17.9582 7.19235C18.6492 6.90525 19.3923 6.76428 20.1405 6.7783C21.0029 6.79202 21.8653 6.83091 22.7278 6.86065C22.8879 6.86065 23.048 6.89496 23.2082 6.90182C23.2974 6.90182 23.3271 6.94071 23.3271 7.02993C23.3271 7.54235 23.3271 8.05477 23.3271 8.5649C23.3271 9.16882 23.3271 9.77274 23.3271 10.3767C23.3271 10.4819 23.2974 10.5139 23.1921 10.5116C22.5379 10.5116 21.8814 10.5116 21.2271 10.5116C20.9287 10.5184 20.6316 10.5528 20.3395 10.6146C20.0822 10.6619 19.8463 10.7891 19.6653 10.9779C19.4842 11.1668 19.3672 11.4078 19.3307 11.6669C19.2857 11.893 19.2612 12.1226 19.2575 12.3531C19.2575 13.1904 19.2575 14.0299 19.2575 14.8695C19.2575 14.8946 19.2575 14.9198 19.2575 14.9564H23.0229C23.1807 14.9564 23.183 14.9564 23.1624 15.1074C23.0778 15.7662 22.9885 16.425 22.9039 17.0816C22.8322 17.6321 22.7636 18.1827 22.698 18.7332C22.6729 18.9437 22.6797 18.9437 22.4693 18.9437H19.2644V28.8992C19.2644 28.9793 19.2644 29.0593 19.2644 29.1394L15.1147 29.1371Z" fill="white"></path>
<path id="Vector_2_Facebook" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1086">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Youtube</title>
<g id="YouTube" clip-path="url(#clip0_1717_1101)">
<path id="Vector_Youtube" d="M26.2571 11.4791C25.9025 11.1589 25.5709 10.9576 24.228 10.834C22.5512 10.6785 20.2797 10.6556 18.564 10.6533H16.4365C14.7208 10.6533 12.4493 10.6785 10.7725 10.834C9.43196 10.9576 9.09798 11.1589 8.7434 11.4791C7.81464 12.321 7.6202 14.6268 7.59961 16.8938C7.59961 17.3178 7.59961 17.741 7.59961 18.1635C7.62706 20.4121 7.82837 22.686 8.7434 23.521C9.09798 23.8412 9.42967 24.0425 10.7725 24.1661C12.4493 24.3216 14.7208 24.3445 16.4365 24.3468H18.564C20.2797 24.3468 22.5512 24.3216 24.228 24.1661C25.5686 24.0425 25.9025 23.8412 26.2571 23.521C27.1722 22.6929 27.3735 20.451 27.4009 18.2206C27.4009 17.7402 27.4009 17.2599 27.4009 16.7795C27.3735 14.5491 27.1699 12.3072 26.2571 11.4791ZM15.5604 20.5311V14.652L20.561 17.5001L15.5604 20.5311Z" fill="white"></path>
<path id="Vector_2_Youtube" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1101">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
</ul>
</div>
<div class="col-lg-3 col-12">
<p class="address_footer text-white">National Library of Medicine<br />
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
Bethesda, MD 20894</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
</div>
</div>
<div class="row">
<div class="col-lg-12 centered-lg">
<nav class="bottom-links">
<ul class="mt-3">
<li>
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
</li>
<li>
<a class="text-white" href="https://www.nih.gov/">NIH</a>
</li>
<li>
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
</li>
<li>
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
</section>
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
</div>
<!--/.footer-->
<p class="last-updated small">Last updated: 2015-12-26T12:28:43-05:00</p>
</div>
<!--/.page-->
</div>
<!--/.wrap-->
<span class="PAFAppResources"></span>
</div><!-- /.twelve_col -->
</div>
<!-- /.grid -->
<!-- usually for JS scripts at page bottom -->
<span class="pagefixtures"></span>
<!-- CE8B5AF87C7FFCB1_0191SID /projects/staticsites/snp/snpdoc@1.10 portal107 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type='text/javascript' src='/portal/js/portal.js'></script><script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4206108/js/3879255/4121861/4206110/4087685.js" snapshot="snp"></script></body>
</html>