nih-gov/www.ncbi.nlm.nih.gov/dbvar/content/expected_data

608 lines
44 KiB
XML
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<!-- AppResources meta begin -->
<meta name="paf-app-resources" content="" />
<!-- AppResources meta end -->
<!-- TemplateResources meta begin -->
<meta name="paf_template" content="StdNCol" />
<!-- TemplateResources meta end -->
<!-- Page meta begin -->
<!-- Page meta end -->
<!-- Logger begin -->
<meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="dbvar" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="static" />
<!-- Logger end -->
<title>Data We Expect</title>
<!-- PageFixtures headcontent begin -->
<meta name="ncbi_app" content="dbvar" />
<!-- PageFixtures headcontent end -->
<!-- AppResources external_resources begin -->
<script type="text/javascript" src="/core/jig/1.15.3/js/jig.min.js"></script>
<!-- AppResources external_resources end -->
<!-- Page headcontent begin -->
<meta name="subsite" content="dbvar" />
<meta name="path" content="dbvar/content/expected_data" />
<meta name="modified" content="2018-07-12T20:39:21Z" />
<!-- Page headcontent end -->
<!-- PageFixtures resources begin -->
<link xmlns="http://www.w3.org/1999/xhtml" type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218191/css/4207974/4206132.css" xml:base="http://127.0.0.1/sites/static/header_footer" />
<!-- PageFixtures resources end -->
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8CA2A57D1FE631000000000090007C.m_5" /><script type="text/javascript"><!--
var ScriptPath = '/portal/';
var objHierarchy = {"name":"PAFAppLayout","type":"Layout","realname":"PAFAppLayout",
"children":[{"name":"PAFAppLayout.AppController","type":"Cluster","realname":"PAFAppLayout.AppController",
"children":[{"name":"PAFAppLayout.AppController.AppResources","type":"Portlet","realname":"PAFAppLayout.AppController.AppResources","shortname":"AppResources"},
{"name":"PAFAppLayout.AppController.RequestProcessor","type":"Portlet","realname":"PAFAppLayout.AppController.RequestProcessor","shortname":"RequestProcessor"},
{"name":"PAFAppLayout.AppController.Controller","type":"Cluster","realname":"PAFAppLayout.AppController.Controller",
"children":[{"name":"PAFAppLayout.AppController.Controller.PAFStaticContentController","type":"Portlet","realname":"PAFAppLayout.AppController.Controller.PAFControllerSelector.PAFStaticContentController","shortname":"PAFStaticContentController"}]},
{"name":"PAFAppLayout.AppController.Page","type":"Cluster","realname":"PAFAppLayout.AppController.Page",
"children":[{"name":"PAFAppLayout.AppController.Page.PAFPageSelectorData","type":"Portlet","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFPageSelectorData","shortname":"PAFPageSelectorData"},
{"name":"PAFAppLayout.AppController.Page.PAFStaticPage","type":"Cluster","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFStaticPage",
"children":[{"name":"PAFAppLayout.AppController.Page.PAFStaticPage.MainPortlet","type":"Portlet","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFStaticPage.MainPortlet","shortname":"MainPortlet"}]}]},
{"name":"PAFAppLayout.AppController.PageFixtures","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.PageFixturesP","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.PageFixturesP","shortname":"PageFixturesP"},
{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar.SearchBarChooser","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar.SearchBarChooser","shortname":"SearchBarChooser"},
{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar.PAFSearchBar","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar.PAFSearchBar","shortname":"PAFSearchBar"}]},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIBreadcrumbs","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIBreadcrumbs","shortname":"NCBIBreadcrumbs"},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIHelpDesk","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIHelpDesk","shortname":"NCBIHelpDesk"},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIApplog_NoScript_Ping","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIApplog_NoScript_Ping","shortname":"NCBIApplog_NoScript_Ping"}]},
{"name":"PAFAppLayout.AppController.PageFixtures.LocalNavPortlet","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.LocalNavPortlet","shortname":"LocalNavPortlet"}]},
{"name":"PAFAppLayout.AppController.TemplateResources","type":"Cluster","realname":"PAFAppLayout.AppController.TemplateResources",
"children":[{"name":"PAFAppLayout.AppController.TemplateResources.StdNColResources","type":"Portlet","realname":"PAFAppLayout.AppController.TemplateResources.PAFTemplateResources.StdNColResources","shortname":"StdNColResources"}]},
{"name":"PAFAppLayout.AppController.Logger","type":"Portlet","realname":"PAFAppLayout.AppController.Logger","shortname":"Logger"},
{"name":"PAFAppLayout.AppController.DebugConsole","type":"Portlet","realname":"PAFAppLayout.AppController.DebugConsole","shortname":"DebugConsole"}]}]};
--></script>
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4206115/css/4121862/3974050/3917732/251717/4139921/3428953/14534/45193/3534283/4128070/4062871/4005757.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4206115/css/3529741/3529739.css" media="print" /><script type="text/javascript">
var ObjectLinks=[{i:0, ename: "p$ExL", esid:"*", sname: "p$ExL", ssid:"*", dname:"p$el", dsid:"0",m:"CopyValue",p:[],f: function(src, dst) {fn_CopyValue(src, dst);}}]
var ActiveNames = {"p$ExL":1};
</script></head>
<body class=" static">
<div class="grid">
<div class="col twelve_col nomargin shadow">
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
<div class="sysmessages">
<noscript>
<p class="nojs">
<strong>Warning:</strong>
The NCBI web site requires JavaScript to function.
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
</p>
</noscript>
</div>
<!--/.sysmessage-->
<div class="wrap">
<div class="page">
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" id="universal_header" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="usa-banner">
<div class="usa-accordion">
<header class="usa-banner-header">
<div class="usa-grid usa-banner-inner">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
<p>An official website of the United States government</p>
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
<span class="usa-banner-button-text">Here's how you know</span>
</button>
</div>
</header>
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
<div class="usa-banner-guidance-gov usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
<div class="usa-media_block-body">
<p>
<strong>The .gov means it's official.</strong>
<br />
Federal government websites often end in .gov or .mil. Before
sharing sensitive information, make sure you're on a federal
government site.
</p>
</div>
</div>
<div class="usa-banner-guidance-ssl usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
<div class="usa-media_block-body">
<p>
<strong>The site is secure.</strong>
<br />
The <strong>https://</strong> ensures that you are connecting to the
official website and that any information you provide is encrypted
and transmitted securely.
</p>
</div>
</div>
</div>
</div>
</section>
<div class="usa-overlay"></div>
<header class="ncbi-header" role="banner" data-section="Header">
<div class="usa-grid">
<div class="usa-width-one-whole">
<div class="ncbi-header__logo">
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
</a>
</div>
<div class="ncbi-header__account">
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
<span class="fa fa-user" aria-hidden="true">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
<g style="fill: #fff">
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
</g>
</svg>
</span>
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
<span class="sr-only">Show account info</span>
</button>
</div>
<div class="ncbi-popup-anchor">
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
<div class="ncbi-popup-head">
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
<span class="fa fa-times">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
</svg>
</span>
<span class="usa-sr-only">Close</span></button>
<h4>Account</h4>
</div>
<div class="account-user-info">
Logged in as:<br />
<b><span class="username" id="uname_long">username</span></b>
</div>
<div class="account-links">
<ul class="usa-unstyled-list">
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
</ul>
</div>
</div>
</div>
</div>
</div>
</header>
<div role="navigation" aria-label="access keys">
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
</div>
<section data-section="Alerts">
<div class="ncbi-alerts-placeholder"></div>
</section>
</div>
</div>
<!--/.header-->
<div class="header">
<div class="res_logo"><h1 class="res_name"><a href="/dbvar/" title="dbVar home">dbVar</a></h1><h2 class="res_tagline">Database of genomic structural variation</h2></div>
<div class="search"><form method="get" action="/dbvar/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="dbvar" selected="selected">dbVar</option><option value="gap">dbGaP</option><option value="clinvar">ClinVar</option><option value="medgen" class="last">MedGen</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search dbVar" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div><input type="hidden" name="p$a" id="p$a" /><input type="hidden" name="p$l" id="p$l" value="PAFAppLayout" /><input type="hidden" name="p$st" id="p$st" value="dbvar" /><input name="SessionId" id="SessionId" value="CE8B5AF87C7FFCB1_0191SID" disabled="disabled" type="hidden" /><input name="Snapshot" id="Snapshot" value="/projects/dbVar/dbVar@9.7" disabled="disabled" type="hidden" /></form><ul class="inline_list searchlinks"><li>
<a href="/dbvar/advanced">Advanced</a>
</li><li>
<a class="help" href="/dbvar/content/help">Help</a>
</li></ul></div>
</div>
<div class="nav_and_browser">
</div>
<!-- was itemctrl -->
<div class="container">
<div id="maincontent" class="content col twelve_col last">
<div class="col1">
<h2 id="data-we-expect">Data We Expect</h2>
<h3 id="table-of-contents">Table of Contents</h3>
<ol>
<li><a href="#intro">Introduction</a></li>
<li><a href="#examples">Examples</a><ol>
<li><a href="#probe">Probe-based methods</a></li>
<li><a href="#map">Mapping-based methods</a></li>
<li><a href="#sequence">Sequencing-based methods</a></li>
</ol>
</li>
</ol>
<h3 id="introduction"><span id="intro"></span><a href="">Introduction</a></h3>
<p>Structural Variation (SV) can be complex to represent. Current technologies rarely provide base pair resolution for variant breakpoints. The experimental methods used determine the extent of this uncertainty. This in turn affects the way you report variants. Data representation can largely be broken down into the following categories:</p>
<ol>
<li>Variants for which we can define a minimal region that is definitely affected, but are unable to define precise breakpoints - only a range of coordinates within which the breakpoints likely occur (e.g., array CGH, SNP array)</li>
<li>Variants for which a defined region of the genome is known to contain the variant, but the exact location of the variant and the breakpoints within this region are unknown (e.g., paired-end mapping, optical mapping)</li>
<li>Variants for which we have basepair resolution for the breakpoints (e.g., sequencing)</li>
</ol>
<p>When reporting variants there is a core set of data that will capture all the available information on your variant, including the degree of uncertainty present in the location of breakpoints. This data set includes:</p>
<blockquote>
<p><strong>start-stop coordinates:</strong> used to define events where breakpoints are known to basepair resolution.
<strong>inner start-stop coordinates:</strong> used to define regions that are <em>known to be affected</em> by a variant, but do not define the actual breakpoints. The breakpoints lie <em>outside</em> of the defined region.
<strong>outer start-stop coordinates:</strong> used to define the absolute outer boundary of a variation event but do not define the actual breakpoints. The breakpoints lie <em>inside</em> of the defined region.
<strong>allele length:</strong> the length of the affected variant. For example, paired-end mapping may indetify a 5-kb deletion, but breakpoints are not known. Allele length does not have to be exact - approximations are acceptable, depending on the method.</p>
</blockquote>
<p>The two structural variation archives (dbVar and DGVa) store data in a hierarchical fashion in an attempt to better capture the experimental process used to support variation features. There are basically two levels in the hierarchy, denoted by '<strong>sv</strong>' and '<strong>ssv</strong>'. Asserted variant regions (<strong>sv</strong>) are represent an authors assertion about a variant region. Supporting variants (<strong>ssv</strong>) are meant to capture the experimental support for the asserted variant regions. <strong>ssv</strong> variant calls are children of <strong>sv</strong> calls. Additionally, <strong>sv</strong> calls can be merged with one another to add an additional layer to the hierarchy. There is no one standard for representing data of this complexity.</p>
<p>Your dbVar submission should report asserted structural variant regions (<strong>sv</strong>) and/or supporting variants (<strong>ssv</strong>).</p>
<p>In addition, dbVar works in close cooperation with the Database of Genomic Variants Archive (DGVa) at the EBI in Cambridge, UK. Unique identifiers ("accessions") are assigned to variants based upon which institution received the submission. Data are exchanged between the two resources on a regular basis. Identifiers,  <strong>sv</strong> and <strong>ssv</strong>, are prefixed with <strong>n</strong> if the study was submitted to NCBI, or <strong>e</strong> if it was submitted to EBI. Every dbVar variant will therefore have one of four (4) prefixes:</p>
<ul>
<li><strong>nsv</strong> - asserted variant region submitted to NCBI</li>
<li><strong>nssv</strong> - supporting variant submitted to NCBI</li>
<li><strong>esv</strong> - asserted variant region submitted to EBI</li>
<li><strong>essv</strong> - supporting variant submitted to EBI</li>
</ul>
<p>Likewise, study accessions are prefixed by '<strong>nstd</strong>' if the study was submitted to dbVar, or '<strong>estd</strong>' if it was submitted to DGVa.</p>
<h3 id="examples"><span id="examples"></span><a href="">Examples</a></h3>
<p>Representing this information robustly is important for understanding the data within a particular study, for doing meta-analysis across studies, and for graphical rendering. Accurate reporting and use of structural variation (SV) data requires a familiarity with the methods used to generate the data and the lack of precision introduced by a given method. With the exception of some forms of sequencing data, precise variant breakpoints are rarely determined. The type and extent of uncertainty in defining breakpoints are an essential part of the variant data, and are required if the data is to be accurate and useful.</p>
<p>The following examples describe popular experimental methods currently in use to identify structural variants. Please read them carefully, as they explain exactly what data we expect to receive in your submission, depending on the experimental method(s) you used.</p>
<p>Current SV detection methods can be grouped into three types:</p>
<ol>
<li><strong>Probe-based Methods</strong> (e.g., BAC array CGH, Oligo array aCGH, SNP arrays, etc)</li>
<li><strong>Mapping-based Methods</strong> (e.g., Paired-end mapping, Optical mapping, etc.)</li>
<li><strong>Sequencing-based Methods</strong> (e.g., Sanger sequencing, Next-gen sequencing, Sequence alignment, Read depth analysis, etc.)</li>
</ol>
<p>If your method does not fall into one of the above categories, or if you used a combination of methods from more than one category, please use the information in the sections below as a guide. The goal is for you to provide complete variant descriptions, including the nature and extent of uncertainty in the location of variant boundaries.</p>
<h4 id="probebased-methods-bac-array-cgh"><span id="probe"></span><a href="">Probe-based methods   (BAC array CGH, oligo array aCGH, SNP arrays, read depth)</a></h4>
<p>In this type of experiment, probes are arrayed across the genome (or genomic region) and signal intensity is used to identify regions that vary in copy number from what is expected. The density of probes at a given locus will determine resolution and therefore the extent of uncertainty. Because DNA between probes is not assayed by probe-based experiments, there is no way to capture the precise location of variant boundaries. Typically, the best one can do is identify the two probes that flank the breakpoin, one of which is affected by the variant, and the other of which is not. This necessarily results in “fuzzy” endpoints:</p>
<p><img src="/projects/dbvar/images/Expect_fig1.png" title="Figure 1: Uncertainty in Probe Data" alt="Figure 1: Uncertainty in Probe Data" width="638" /></p>
<p>Figure 1: Uncertainty in Probe Data</p>
<p>In the example above, a group of probes are deleted (indicated by red Xs). The region between the <strong>inner_start</strong> and <strong>inner_stop</strong> coordinates is known to be deleted. Similarly, the region outside the <strong>outer_start</strong> and <strong>outer_stop</strong> coordinates is known to be present (i.e., not deleted, or unaffected by the variant). To capture all of the information it is necessary to report:</p>
<ul>
<li>inner_start = the 5-most nucleotide of the first affected probe</li>
<li>inner_stop = the 3-most nucleotide of the last affected probe</li>
<li>outer_start = the 3-most nucleotide of the last unaffected probe preceding the variant</li>
<li>outer_stop = the 5-most nucleotide of the first unaffected probe following the variant</li>
</ul>
<p>The same rules apply when there is an <em>increase</em> in probe intensity across a region (gain). However, there are important conceptual differences in the interpretation of gains and losses (see “An important note about Gains and Insertions” below.</p>
<p>If you are submitting SV data from a probe-based experiment (e.g., aCGH, SNP array) to dbVar, please include the following coordinates for each <strong>ssv</strong>, or supporting variant:</p>
<p>Included file 'expected_data-table1.inc' not found</p>
<p>Submissions based on probe-type experiments that lack either inner or outer start and stop coordinates are incomplete, but can be processed. The accurate identification and reporting of the extent of (un)certainty of regions that are known to be affected or unaffected are important aspects of the data. We ask that you submitt outer and inner coordinates wherever possible for probe-based experiments.</p>
<p><strong>An important note about Gains and Insertions</strong></p>
<ol>
<li>For probe-based gains, one does not know where in the genome the gained sequence is represented; it may be at the location represented in the array (e.g., a tandem duplication), or it may be elsewhere in the genome. However, like the deletion event, we do know which probes are part of the GAIN and can still provide precise information for the inner start/stop.</li>
<li>One must distinguish between gains and insertions. Insertions can only be represented by mapping-based or sequencing-based data they cannot be represented by probe data. A probe-based <strong><em>gain</em></strong> can be thought of as a duplicated (or “extra”) copy of known sequence content but unknown location.  A mapping-based <strong><em>insertion</em></strong> can be thought of as extra sequence of unknown content but known, unknown or imprecise location. Gains are represented in probe data; insertions are represented in mapping data.</li>
</ol>
<h4 id="mappingbased-methods"><span id="map"></span><a href="">Mapping-based methods</a></h4>
<p>In mapping based technologies (e.g., paired-end mapping, optical mapping), one knows with precision the outer boundaries between which the variant breakpoints must fall. In the case of paired-end mapping these are provided by paired-end sequences, while in optical mapping they correspond to restriction sites. The distance between outer_start and outer_stop coordinates can vary between 300bp and 40kb, depending on the method used. The size of the fragment also heavily influences the type and size of variant that can be detected.</p>
<p>In addition to outer start and outer stop, mapping data should provide the approximate size of the variant (<strong>allele_length</strong>). This is typically calculated by comparing the size of the experimental fragment to an expected size provided by a reference.</p>
<p><img src="/projects/dbvar/images/Expect_fig2.png" title="Figure 2: Uncertainty in Mapping Data" alt="Figure 2: Uncertainty in Mapping Data" width="638" /></p>
<p>Figure 2: Uncertainty in Mapping Data</p>
<p>Note that the size of the variant (<strong>allele_length</strong>) often cannot be determined precisely. This lack of precision of the <strong>length</strong> of the variant is a separate issue from the fuzziness of the variants placement between the known endpoints (<strong>outer_start</strong> and <strong>outer_stop</strong>). It is not necessary to provide the degree of uncertainty in estimating allele length for every variant; rather, please include this resolution, and its rationale, in the method description.</p>
<p>The data we expect to receive with a <strong>mapping-based</strong> submission includes:</p>
<p>Included file 'expected_data-table2.inc' not found</p>
<h4 id="sequencing-based-methods"><span id="sequence"></span><a href="">Sequencing Based methods</a></h4>
<p>In many cases, using either long reads or 2<sup>nd</sup> generation sequencing reads, breakpoint resolution of variants can be achieved. In such cases, we would expect the following data:</p>
<p>Included file 'expected_data-table3.inc' not found</p>
<p>However in many cases, 2<sup>nd</sup> generation sequencing may not give precise breakpoints and the user should give inner/outer starts as necessary.</p>
<p>Included file 'expected_data-table4.inc' not found</p>
</div>
<!--/.col1-->
<div class="col2">
</div>
<!--/.col2-->
<div class="col3">
</div>
<!--/.col3-->
<div class="col4">
</div>
<!--/.col4-->
<div class="col5">
</div>
<div class="col6">
</div>
<div class="col7">
</div>
<div class="col8">
</div>
<div class="col9">
</div>
</div><!--/.content-->
</div><!--/.container-->
<div id="NCBIFooter_dynamic">
<div class="breadcrumbs">You are here:
<span id="breadcrumb_text"><a href="/guide/">NCBI</a></span></div>
<a id="help-desk-link" class="help_desk" href="https://support.ncbi.nlm.nih.gov/ics/support/default.asp?Time=2025-03-12T18:14:18-04:00&amp;Snapshot=%2Fprojects%2FdbVar%2FdbVar@9.7&amp;Host=portal105&amp;ncbi_phid=CE8CA2A57D1FE631000000000090007C&amp;ncbi_session=CE8B5AF87C7FFCB1_0191SID&amp;from=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fdbvar%2Fcontent%2Fexpected_data%2F&amp;Ncbi_App=dbvar&amp;Page=static&amp;style=classic&amp;deptID=28049" target="_blank">Support Center</a>
<noscript><img alt="" src="/stat?jsdisabled=true&amp;ncbi_app=dbvar&amp;ncbi_db=&amp;ncbi_pdid=static&amp;ncbi_phid=CE8CA2A57D1FE631000000000090007C" /></noscript>
</div>
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" class="footer" id="footer" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="icon-section">
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
<div class="grid-container container">
<div class="icon-section_container">
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter">
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
<title>Twitter</title>
<g id="twitterx1008">
<path id="path1008" d="M6.06736 7L16.8778 20.8991L6.00001 32.2H10.2L18.6 23.1L25.668 32.2H34L22.8 17.5L31.9 7H28.4L20.7 15.4L14.401 7H6.06898H6.06736ZM9.66753 8.73423H12.9327L29.7327 30.4658H26.5697L9.66753 8.73423Z" fill="#5B616B"></path>
</g>
</svg>
</a>
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>Facebook</title>
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>LinkedIn</title>
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11,
.cls-12 {
fill: #737373;
}
.cls-11 {
fill-rule: evenodd;
}
</style>
</defs>
<title>GitHub</title>
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
</path>
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
</path>
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
</path>
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
</path>
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
</path>
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
</path>
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
</svg></a>
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
<defs><style>.cls-1{fill:#737373;}</style></defs>
<title>NCBI Insights Blog</title>
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
</svg>
</a>
</div>
</div>
</section>
<section class="container-fluid bg-primary">
<div class="container pt-5">
<div class="row mt-3">
<div class="col-lg-3 col-12">
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
<ul class="list-inline social_media">
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Twitter</title>
<g id="twitterx1009" clip-path="url(#clip0_65276_3946)">
<path id="Vector_Twitter" d="M17.5006 34.6565C26.9761 34.6565 34.6575 26.9751 34.6575 17.4996C34.6575 8.02416 26.9761 0.342773 17.5006 0.342773C8.02514 0.342773 0.34375 8.02416 0.34375 17.4996C0.34375 26.9751 8.02514 34.6565 17.5006 34.6565Z" fill="#205493" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
<path id="path1009" d="M8.54811 8.5L16.2698 18.4279L8.50001 26.5H11.5L17.5 20L22.5486 26.5H28.5L20.5 16L27 8.5H24.5L19 14.5L14.5007 8.5H8.54927H8.54811ZM11.1197 9.73873H13.4519L25.4519 25.2613H23.1926L11.1197 9.73873Z" fill="white"></path>
</g>
<defs>
<clipPath id="clip0_65276_3946">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Facebook</title>
<g id="Facebook" clip-path="url(#clip0_1717_1086)">
<path id="Vector_Facebook" d="M15.1147 29.1371C15.1147 29.0822 15.1147 29.0296 15.1147 28.9747V18.9414H11.8183C11.6719 18.9414 11.6719 18.9414 11.6719 18.8018C11.6719 17.5642 11.6719 16.3289 11.6719 15.0937C11.6719 14.9793 11.7062 14.9518 11.816 14.9518C12.8683 14.9518 13.9206 14.9518 14.9751 14.9518H15.1215V14.8329C15.1215 13.8057 15.1215 12.774 15.1215 11.7492C15.1274 10.9262 15.3148 10.1146 15.6706 9.37241C16.1301 8.38271 16.9475 7.60378 17.9582 7.19235C18.6492 6.90525 19.3923 6.76428 20.1405 6.7783C21.0029 6.79202 21.8653 6.83091 22.7278 6.86065C22.8879 6.86065 23.048 6.89496 23.2082 6.90182C23.2974 6.90182 23.3271 6.94071 23.3271 7.02993C23.3271 7.54235 23.3271 8.05477 23.3271 8.5649C23.3271 9.16882 23.3271 9.77274 23.3271 10.3767C23.3271 10.4819 23.2974 10.5139 23.1921 10.5116C22.5379 10.5116 21.8814 10.5116 21.2271 10.5116C20.9287 10.5184 20.6316 10.5528 20.3395 10.6146C20.0822 10.6619 19.8463 10.7891 19.6653 10.9779C19.4842 11.1668 19.3672 11.4078 19.3307 11.6669C19.2857 11.893 19.2612 12.1226 19.2575 12.3531C19.2575 13.1904 19.2575 14.0299 19.2575 14.8695C19.2575 14.8946 19.2575 14.9198 19.2575 14.9564H23.0229C23.1807 14.9564 23.183 14.9564 23.1624 15.1074C23.0778 15.7662 22.9885 16.425 22.9039 17.0816C22.8322 17.6321 22.7636 18.1827 22.698 18.7332C22.6729 18.9437 22.6797 18.9437 22.4693 18.9437H19.2644V28.8992C19.2644 28.9793 19.2644 29.0593 19.2644 29.1394L15.1147 29.1371Z" fill="white"></path>
<path id="Vector_2_Facebook" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1086">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Youtube</title>
<g id="YouTube" clip-path="url(#clip0_1717_1101)">
<path id="Vector_Youtube" d="M26.2571 11.4791C25.9025 11.1589 25.5709 10.9576 24.228 10.834C22.5512 10.6785 20.2797 10.6556 18.564 10.6533H16.4365C14.7208 10.6533 12.4493 10.6785 10.7725 10.834C9.43196 10.9576 9.09798 11.1589 8.7434 11.4791C7.81464 12.321 7.6202 14.6268 7.59961 16.8938C7.59961 17.3178 7.59961 17.741 7.59961 18.1635C7.62706 20.4121 7.82837 22.686 8.7434 23.521C9.09798 23.8412 9.42967 24.0425 10.7725 24.1661C12.4493 24.3216 14.7208 24.3445 16.4365 24.3468H18.564C20.2797 24.3468 22.5512 24.3216 24.228 24.1661C25.5686 24.0425 25.9025 23.8412 26.2571 23.521C27.1722 22.6929 27.3735 20.451 27.4009 18.2206C27.4009 17.7402 27.4009 17.2599 27.4009 16.7795C27.3735 14.5491 27.1699 12.3072 26.2571 11.4791ZM15.5604 20.5311V14.652L20.561 17.5001L15.5604 20.5311Z" fill="white"></path>
<path id="Vector_2_Youtube" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1101">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
</ul>
</div>
<div class="col-lg-3 col-12">
<p class="address_footer text-white">National Library of Medicine<br />
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
Bethesda, MD 20894</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
</div>
</div>
<div class="row">
<div class="col-lg-12 centered-lg">
<nav class="bottom-links">
<ul class="mt-3">
<li>
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
</li>
<li>
<a class="text-white" href="https://www.nih.gov/">NIH</a>
</li>
<li>
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
</li>
<li>
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
</section>
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
</div>
<!--/.footer-->
<p class="last-updated small">Last updated: 2018-07-12T20:39:21Z</p>
</div>
<!--/.page-->
</div>
<!--/.wrap-->
<span class="PAFAppResources"></span>
</div><!-- /.twelve_col -->
</div>
<!-- /.grid -->
<!-- usually for JS scripts at page bottom -->
<span class="pagefixtures"></span>
<!-- CE8B5AF87C7FFCB1_0191SID /projects/dbVar/dbVar@9.7 portal105 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type='text/javascript' src='/portal/js/portal.js'></script><script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4206115/js/3879255/4121861/4206117/4087685.js" snapshot="dbvar"></script></body>
</html>