nih-gov/www.ncbi.nlm.nih.gov/books/n/handbook/ch5/index.html

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">

    <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
        <!-- AppResources meta begin -->
        <meta name="paf-app-resources" content="" />
                 <script type="text/javascript">var ncbi_startTime = new Date();</script>

        <!-- AppResources meta end -->

        <!-- TemplateResources meta begin -->
        <meta name="paf_template" content="" />

        <!-- TemplateResources meta end -->

        <!-- Logger begin -->
        <meta name="ncbi_db" content="books" /><meta name="ncbi_pdid" content="book-part" /><meta name="ncbi_acc" content="NBK21088" /><meta name="ncbi_domain" content="handbook" /><meta name="ncbi_report" content="record" /><meta name="ncbi_type" content="fulltext" /><meta name="ncbi_objectid" content="" /><meta name="ncbi_pcid" content="/NBK21088/" /><meta name="ncbi_pagename" content="The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation - The NCBI Handbook - NCBI Bookshelf" /><meta name="ncbi_bookparttype" content="chapter" /><meta name="ncbi_app" content="bookshelf" />
        <!-- Logger end -->

        <title>The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation - The NCBI Handbook - NCBI Bookshelf</title>

        <!-- AppResources external_resources begin -->
        <link rel="stylesheet" href="/core/jig/1.15.2/css/jig.min.css" /><script type="text/javascript" src="/core/jig/1.15.2/js/jig.min.js"></script>

        <!-- AppResources external_resources end -->

        <!-- Page meta begin -->
        <meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX" /><meta name="citation_inbook_title" content="The NCBI Handbook [Internet]" /><meta name="citation_title" content="The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation" /><meta name="citation_publisher" content="National Center for Biotechnology Information (US)" /><meta name="citation_date" content="2011/02/02" /><meta name="citation_author" content="Adrienne Kitts" /><meta name="citation_author" content="Stephen Sherry" /><meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK21088/" /><link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" /><meta name="DC.Title" content="The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation" /><meta name="DC.Type" content="Text" /><meta name="DC.Publisher" content="National Center for Biotechnology Information (US)" /><meta name="DC.Contributor" content="Adrienne Kitts" /><meta name="DC.Contributor" content="Stephen Sherry" /><meta name="DC.Date" content="2011/02/02" /><meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK21088/" /><meta name="description" content="Sequence variations exist at defined positions within genomes and are responsible for individual phenotypic characteristics, including a person's propensity toward complex disorders such as heart disease and cancer. As tools for understanding human variation and molecular genetics, sequence variations can be used for gene mapping, definition of population structure, and performance of functional studies." /><meta name="og:title" content="The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation" /><meta name="og:type" content="book" /><meta name="og:description" content="Sequence variations exist at defined positions within genomes and are responsible for individual phenotypic characteristics, including a person's propensity toward complex disorders such as heart disease and cancer. As tools for understanding human variation and molecular genetics, sequence variations can be used for gene mapping, definition of population structure, and performance of functional studies." /><meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK21088/" /><meta name="og:site_name" content="NCBI Bookshelf" /><meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" /><meta name="twitter:card" content="summary" /><meta name="twitter:site" content="@ncbibooks" /><meta name="warning" content="This publication is provided for historical reference only and the information may be out of date." /><meta name="bk-non-canon-loc" content="/books/n/handbook/ch5/" /><link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK21088/" /><link rel="stylesheet" href="/corehtml/pmc/css/figpopup.css" type="text/css" media="screen" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_print.min.css" type="text/css" media="print" /><style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style><style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace}  .first-line-outdent .bk_ref {display: inline}  .body-content h2, .body-content .h2  {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list  .graphic {display:inline-block !important} .temp-labeled-list  img{width:100%}</style><script type="text/javascript" src="/corehtml/pmc/js/jquery.hoverIntent.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/common.min.js?_=3.18"> </script><script type="text/javascript" src="/corehtml/pmc/js/large-obj-scrollbars.min.js"> </script><script type="text/javascript">window.name="mainwindow";</script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/book-toc.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/books.min.js"> </script><meta name="book-collection" content="NONE" />

        <!-- Page meta end -->
    <link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8BDFC27C82417100000000001F0018.m_13" />
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3985586/3808861/4121862/3974050/3917732/251717/4216701/14534/45193/4113719/3849091/3984811/3751656/4033350/3840896/3577051/3852958/4008682/4207974/4206132/4062871/12930/3964959/3854974/36029/4128070/9685/3549676/3609192/3609193/3609213/3395586.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3411343/3882866.css" media="print" /></head>
    <body class="book-part">
        <div class="grid">
            <div class="col twelve_col nomargin shadow">
                <!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
                <div class="sysmessages">
                    <noscript>
	<p class="nojs">
	<strong>Warning:</strong>
	The NCBI web site requires JavaScript to function.
	<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
	</p>
	</noscript>
                </div>
                <!--/.sysmessage-->
                <div class="wrap">
                    <div class="page">
                        <div class="top">
                            <div id="universal_header">
	<section class="usa-banner">
		<div class="usa-accordion">
			<header class="usa-banner-header">
				<div class="usa-grid usa-banner-inner">
					<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
					<p>An official website of the United States government</p>
					<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
						<span class="usa-banner-button-text">Here's how you know</span>
					</button>
				</div>
			</header>
			<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
				<div class="usa-banner-guidance-gov usa-width-one-half">
					<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
					<div class="usa-media_block-body">
						<p>
							<strong>The .gov means it's official.</strong>
							<br />
							Federal government websites often end in .gov or .mil. Before
							sharing sensitive information, make sure you're on a federal
							government site.
						</p>
					</div>
				</div>
				<div class="usa-banner-guidance-ssl usa-width-one-half">
					<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
					<div class="usa-media_block-body">
						<p>
							<strong>The site is secure.</strong>
							<br />
							The <strong>https://</strong> ensures that you are connecting to the
							official website and that any information you provide is encrypted
							and transmitted securely.
						</p>
					</div>
				</div>
			</div>
		</div>
	</section>
	<div class="usa-overlay"></div>
	<header class="ncbi-header" role="banner" data-section="Header">

		<div class="usa-grid">
			<div class="usa-width-one-whole">

				<div class="ncbi-header__logo">
					<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
						<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
					</a>
				</div>

				<div class="ncbi-header__account">
					<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
					<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
						<span class="fa fa-user" aria-hidden="true">
							<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
								<g style="fill: #fff">
									<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
									<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9    C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
								</g>
							</svg>
						</span>
						<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
						<span class="sr-only">Show account info</span>
					</button>
				</div>

				<div class="ncbi-popup-anchor">
					<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
						<div class="ncbi-popup-head">
							<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
								<span class="fa fa-times">
									<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
										<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
									</svg>
								</span>
								<span class="usa-sr-only">Close</span></button>
							<h4>Account</h4>
						</div>
						<div class="account-user-info">
							Logged in as:<br />
							<b><span class="username" id="uname_long">username</span></b>
						</div>
						<div class="account-links">
							<ul class="usa-unstyled-list">
								<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
								<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
								<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
								<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
							</ul>
						</div>
					</div>
				</div>

			</div>
		</div>
	</header>
	<div role="navigation" aria-label="access keys">
		<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
		<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
		<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
		<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
		<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
	</div>
	<section data-section="Alerts">
		<div class="ncbi-alerts-placeholder"></div>
	</section>
</div>
                            <div class="header">
    <div class="res_logo"><h1 class="res_name"><a href="/books/" title="Bookshelf home">Bookshelf</a></h1><h2 class="res_tagline"></h2></div>
    <div class="search"><form method="get" action="/books/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="books" selected="selected" data-ac_dict="bookshelf-search">Books</option><option value="nuccore">Nucleotide</option><option value="gquery">All Databases</option><option value="sra" class="last">SRA</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books" data-ac_dict="bookshelf-search">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Books. Use up and down arrows to choose an item from the autocomplete." value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="dictionary:'bookshelf-search',disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'no'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class="searchlinks inline_list"><li>
                        <a href="/books/browse/">Browse Titles</a>
                    </li><li>
                        <a href="/books/advanced/">Advanced</a>
                    </li><li class="help">
                        <a href="/books/NBK3833/">Help</a>
                    </li><li class="disclaimer">
                        <a target="_blank" data-ga-category="literature_resources" data-ga-action="link_click" data-ga-label="disclaimer_link" href="https://www.ncbi.nlm.nih.gov/books/about/disclaimer/">Disclaimer</a>
                    </li></ul></div>
</div>


                        <!--<component id="Page" label="headcontent"/>-->

                        </div>
                        <div class="content">
                            <!-- site messages -->
                            <!-- Custom content 1 -->
<div class="col1">

</div>

<div class="container">
    <div id="maincontent" class="content eight_col col">
        <!-- Custom content in the left column above book nav -->
        <div class="col2">

        </div>

        <!-- Book content -->


        <!-- Custom content between navigation and content -->
        <div class="col3">

        </div>

        <div class="document">
            <div class="pre-content"><div><div class="bk_prnt"><p class="small">NCBI Bookshelf. A service of the National Library of Medicine, National Institutes of Health.</p><p>McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. </p></div><div class="bk_msg_box bk_bttm_mrgn clearfix bk_noprnt"><div class="iconblock clearfix"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook2e/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook2e-lrg.png" alt="Cover" height="100px" width="80px" /></a><div class="icnblk_cntnt"><ul class="messages"><li class="info icon"><span class="icon"><a href="/books/n/handbook2e/">See "The NCBI Handbook, 2nd Edition"</a></span></li></ul></div></div></div><div class="messagearea bk_noprnt" style="margin-bottom:1.3846em "><ul class="messages"><li class="warn icon"><span class="icon">This publication is provided for historical reference only and the information may be out of date.</span></li></ul></div><div class="bk_prnt"><p style="color:red;"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p></div><div class="iconblock clearfix whole_rhythm no_top_margin bk_noprnt"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" alt="Cover of The NCBI Handbook" height="100px" width="80px" /></a><div class="icnblk_cntnt eight_col"><h2>The NCBI Handbook [Internet].</h2><a data-jig="ncbitoggler" href="#__NBK21088_dtls__">Show details</a><div style="display:none" class="ui-widget" id="__NBK21088_dtls__"><div>McEntyre J, Ostell J, editors.</div><div>Bethesda (MD): <a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&amp;targetsite=external&amp;targetcat=link&amp;targettype=publisher">National Center for Biotechnology Information (US)</a>; 2002-.</div></div><div class="half_rhythm"><ul class="inline_list"><li style="margin-right:1em"><a class="bk_cntns" href="/books/n/handbook/">Contents</a></li></ul></div></div><div class="icnblk_cntnt two_col"><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch4/" title="Previous page in this title">&lt; Prev</a><a class="active page_link next" href="/books/n/handbook/ch6/" title="Next page in this title">Next &gt;</a></div></div></div></div></div>
            <div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><h1 id="_NBK21088_"><span class="label">Chapter 5</span><span class="title" itemprop="name">The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation</span></h1><p class="contrib-group"><span itemprop="author">Adrienne Kitts</span> and <span itemprop="author">Stephen Sherry</span>.</p><p class="small">Created: <span itemprop="datePublished">October 9, 2002</span>; Last Update: <span itemprop="dateModified">February 2, 2011</span>.</p><p><em>Estimated reading time: 39 minutes</em></p></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><div id="_abs_rndgid_" itemprop="description"><h2 id="__abs_rndgid__">Summary</h2><p>Sequence variations exist at defined positions within genomes and are responsible for individual phenotypic characteristics, including a person's propensity toward complex disorders such as heart disease and cancer. As tools for understanding human variation and molecular genetics, sequence variations can be used for gene mapping, definition of population structure, and performance of functional studies.</p><p>The Single Nucleotide Polymorphism database (dbSNP) is a public-<a class="def" href="/books/n/handbook/A1237/def-item/app38/">domain</a> archive for a broad collection of simple genetic polymorphisms. This collection of polymorphisms includes single-base nucleotide substitutions (also known as single nucleotide polymorphisms or SNPs), small-scale multi-base deletions or insertions (also called deletion insertion polymorphisms or DIPs), and retroposable element insertions and <a class="def" href="/books/n/handbook/A1237/def-item/app110/">microsatellite</a> repeat variations (also called short tandem repeats or STRs). Please note that in this chapter, you can substitute any class of variation for the term <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a>. Each dbSNP entry includes the sequence context of the <a class="def" href="/books/n/handbook/A1237/def-item/app142/">polymorphism</a> (i.e., the surrounding sequence), the occurrence frequency of the polymorphism (by population or individual), and the experimental method(s), protocols, and conditions used to assay the variation.</p><p>dbSNP accepts submissions for variations in any species and from any part of a genome. This document will provide you with options for finding SNPs in dbSNP, discuss dbSNP content and organization, and furnish instructions to help you create your own (local) copy of dbSNP.</p></div><div id="ch5.ch5_s1"><h2 id="_ch5_ch5_s1_">Introduction</h2><p>The dbSNP has been designed to support submissions and research into a broad range of biological problems. These include physical mapping, functional analysis, pharmacogenomics, association studies, and evolutionary studies. Because <a href="/SNP/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">dbSNP</a> was developed to complement <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a>, it may contain nucleotide sequences (<a class="figpopup" href="/books/NBK21088/figure/ch5.ch5_f1/?report=objectonly" target="object" rid-figpopup="figch5ch5f1" rid-ob="figobch5ch5f1">Figure 1</a>) from any organism.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch5ch5f1" co-legend-rid="figlgndch5ch5f1"><a href="/books/NBK21088/figure/ch5.ch5_f1/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5f1" rid-ob="figobch5ch5f1"><img class="small-thumb" src="/books/NBK21088/bin/ch5f1.gif" src-large="/books/NBK21088/bin/ch5f1.jpg" alt="Figure 1" /></a><div class="icnblk_cntnt" id="figlgndch5ch5f1"><h4 id="ch5.ch5_f1"><a href="/books/NBK21088/figure/ch5.ch5_f1/?report=objectonly" target="object" rid-ob="figobch5ch5f1">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 1. The structure of the flanking sequence in dbSNP is a composite of bases either assayed for variation or included from published sequence. We make the distinction to distinguish regions of sequence that have been experimentally surveyed for variation <a href="/books/NBK21088/figure/ch5.ch5_f1/?report=objectonly" target="object" rid-ob="figobch5ch5f1">(more...)</a></p></div></div><div id="ch5.ch5_1_1"><h3>Physical Mapping</h3><p>In the physical mapping of nucleotide sequences, variations are used as positional markers. When mapped to a unique location in a genome, variation markers work with the same logic as Sequence Tagged Sites (STSs) or framework <a class="def" href="/books/n/handbook/A1237/def-item/app110/">microsatellite</a> markers. As is the case for STSs, the position of a variation is defined by its unique flanking sequence, and hence, variations can serve as stable landmarks in the genome, even if the variation is fixed for one <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> in a sample. When multiple alleles are observed in a sample pedigree, pedigree members can be tested for variation genotypes as in traditional physical mapping studies.</p></div><div id="ch5.ch5_1_2"><h3>Functional Analysis</h3><p>Variations that occur in functional regions of genes or in conserved non-coding regions might cause significant changes in the complement of transcribed sequences. This can lead to changes in protein expression that can affect aspects of the <a class="def" href="/books/n/handbook/A1237/def-item/app132/">phenotype</a> such as metabolism or cell signaling. We note possible functional implications of <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> sequence variations in dbSNP in terms of how the variation alters <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> transcripts.</p></div><div id="ch5.ch5_1_3"><h3>Association Studies</h3><p>The associations between variations and complex genetic traits are more ambiguous than simple, single-gene mutations that lead to a phenotypic change. When multiple genes are involved in a trait, then the identification of the genetic causes of the trait requires the identification of the chromosomal segment combinations, or haplotypes, that carry the putative gene variants.</p></div><div id="ch5.ch5_1_4"><h3>Evolutionary Studies</h3><p>The variations in dbSNP currently represent an uneven but large sampling of genome diversity. The human data in dbSNP include submissions from the <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> Consortium, variations mined from genome sequence as part of the human genome project, and individual lab contributions of variations in specific genes, mRNAs, ESTs, or genomic regions.</p></div><div id="ch5.ch5_1_5"><h3>Null Results Are Important</h3><p>Systematic surveys of sequence variation will undoubtedly reveal sequences that are invariant in the sample. These observations can be submitted to dbSNP as NoVariation records that record the sequence, the population, and the sample size that were used in the survey.</p></div></div><div id="ch5.ch5_s2"><h2 id="_ch5_ch5_s2_">Searching dbSNP</h2><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> database can be queried from the <a href="/SNP/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">dbSNP homepage</a> (Figures <a class="figpopup" href="/books/NBK21088/figure/ch5.ch5figure2a/?report=objectonly" target="object" rid-figpopup="figch5ch5figure2a" rid-ob="figobch5ch5figure2a">2a</a> and <a class="figpopup" href="/books/NBK21088/figure/ch5.ch5figure2b/?report=objectonly" target="object" rid-figpopup="figch5ch5figure2b" rid-ob="figobch5ch5figure2b">2b</a>), by using <a href="/entrez/query.fcgi?db=Snp" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez SNP</a>, or by using the links to the six basic dbSNP search options located just below the text box at the top of the dbSNP homepage. Each of these six search options is described below.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch5ch5figure2a" co-legend-rid="figlgndch5ch5figure2a"><a href="/books/NBK21088/figure/ch5.ch5figure2a/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5figure2a" rid-ob="figobch5ch5figure2a"><img class="small-thumb" src="/books/NBK21088/bin/ch5f2a.gif" src-large="/books/NBK21088/bin/ch5f2a.jpg" alt="Figure 2a" /></a><div class="icnblk_cntnt" id="figlgndch5ch5figure2a"><h4 id="ch5.ch5figure2a"><a href="/books/NBK21088/figure/ch5.ch5figure2a/?report=objectonly" target="object" rid-ob="figobch5ch5figure2a">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 2a. We organized the dbSNP homepage with links to documentation, FTP, and sub-query pages on the <i>left sidebar</i> and a selection of query modules on the <i>right sidebar</i>. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch5ch5figure2b" co-legend-rid="figlgndch5ch5figure2b"><a href="/books/NBK21088/figure/ch5.ch5figure2b/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5figure2b" rid-ob="figobch5ch5figure2b"><img class="small-thumb" src="/books/NBK21088/bin/ch5f2b.gif" src-large="/books/NBK21088/bin/ch5f2b.jpg" alt="Figure 2b" /></a><div class="icnblk_cntnt" id="figlgndch5ch5figure2b"><h4 id="ch5.ch5figure2b"><a href="/books/NBK21088/figure/ch5.ch5figure2b/?report=objectonly" target="object" rid-ob="figobch5ch5figure2b">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 2b. We organized the dbSNP homepage with links to documentation, FTP, and sub-query pages on the left sidebar and a selection of query modules on the right sidebar. </p></div></div><div id="ch5.ch5_2_1"><h3>Entrez SNP</h3><p>dbSNP is a part of the <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> integrated information retrieval system (<a href="/books/n/handbook/ch15/">Chapter 15</a>) and may be searched using either qualifiers (aliases) or a combination of 25 different search fields. A complete list of the qualifiers and search fields can be found on the <a href="/entrez/query.fcgi?db=snp" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez SNP site</a>.</p></div><div id="ch5.ch5_2_2"><h3>Single Record (Search by ID Number) Query in dbSNP</h3><p>Use this query module to select SNPs based on dbSNP record identifiers. These include reference <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> (refSNP) <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> ID numbers (rs#), submitted SNP Accession numbers (ss#), local (or submitter) IDs, Celera IDs, Genbank accession numbers, and <a class="def" href="/books/n/handbook/A1237/def-item/app173/">STS</a> accession numbers.</p></div><div id="ch5.ch5_2_3"><h3>SNP Submission Information Queries</h3><p>Use this module to construct a query that will select SNPs based on submission records by laboratory (submitter), new data (called &#x0201c;new batches&#x0201d; &#x02014; this query limitation is more recent than a user-specified date), the methods used to assay for variation (<a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t1/?report=objectonly" target="object" rid-figpopup="figch5ch5t1" rid-ob="figobch5ch5t1">Table 1</a>) populations of interest (<a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t2/?report=objectonly" target="object" rid-figpopup="figch5ch5t2" rid-ob="figobch5ch5t2">Table 2</a>), and publication information.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch5ch5t1"><a href="/books/NBK21088/table/ch5.ch5_t1/?report=objectonly" target="object" title="Table" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5t1" rid-ob="figobch5ch5t1"><img class="small-thumb" src="/books/NBK21088/table/ch5.ch5_t1/?report=thumb" src-large="/books/NBK21088/table/ch5.ch5_t1/?report=previmg" alt="Table 1" /></a><div class="icnblk_cntnt"><h4 id="ch5.ch5_t1"><a href="/books/NBK21088/table/ch5.ch5_t1/?report=objectonly" target="object" rid-ob="figobch5ch5t1">Table</a></h4><p class="float-caption no_bottom_margin">Table 1. Method classes organize submissions by a general methodological or experimental approach to assaying for variation in the DNA sequence. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch5ch5t2"><a href="/books/NBK21088/table/ch5.ch5_t2/?report=objectonly" target="object" title="Table" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5t2" rid-ob="figobch5ch5t2"><img class="small-thumb" src="/books/NBK21088/table/ch5.ch5_t2/?report=thumb" src-large="/books/NBK21088/table/ch5.ch5_t2/?report=previmg" alt="Table 2" /></a><div class="icnblk_cntnt"><h4 id="ch5.ch5_t2"><a href="/books/NBK21088/table/ch5.ch5_t2/?report=objectonly" target="object" rid-ob="figobch5ch5t2">Table</a></h4><p class="float-caption no_bottom_margin">Table 2. Population classes organize population samples by geographic region. </p></div></div></div><div id="ch5.ch5_2_4"><h3>dbSNP Batch Query</h3><p>Use sets of variation IDs (including RefSNP (rs) IDs, Submitted <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> (ss) IDs, and Local SNP IDs) collected from other queries to generate a variety of SNP reports.</p></div><div id="ch5.ch5_2_5"><h3>Locus Information Query</h3><p>This search was originally accomplished by LocusLink, which has now been replaced by <a href="/entrez/query.fcgi?CMD=search&#x00026;DB=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez Gene</a>.&#x000a0;&#x000a0;Entrez Gene is the successor to LocusLink and has two major differences that differentiate it from Locus Link: Entrez Gene is greater in scope (more of the genomes represented by <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> Reference Sequences or RefSeqs) and Entrez Gene has been integrated for indexing and query in NCBI's Entrez system. </p></div><div id="ch5.ch5_2_7"><h3>Between-Markers Positional Query</h3><p>Use this query approach if you are interested in retrieving variations that have been mapped to a specific region of the genome bounded by two <a class="def" href="/books/n/handbook/A1237/def-item/app173/">STS</a> markers. Other map-based queries are available through the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> <a class="def" href="/books/n/handbook/A1237/def-item/app99/">Map Viewer</a> tool.</p></div><div id="ch5.ch5_2_8"><h3>ADA Section 508-compliant Link</h3><p>All links located on the left sidebar of the dbSNP homepage are also provided in text format at the bottom of the page to support browsing by text-based Web browsers. Suggestions for improving database access by disabled persons should be sent to the dbSNP development group at snp-admin@ncbi.nlm.nih.gov.</p></div></div><div id="ch5.ch5_s3"><h2 id="_ch5_ch5_s3_">Submitted Content</h2><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> database has two major classes of content: the first class is submitted data, i.e., original observations of sequence variation; and the second class is computed content, i.e., content generated during the dbSNP &#x0201c;<a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a>&#x0201d; cycle by computation on original submitted data. Computed content consists of refSNPs, other computed data, and links that increase the utility of dbSNP.</p><p>A complete copy of the <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> database is publicly available and can be downloaded from the SNP <a href="ftp://ftp.ncbi.nih.gov/snp/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">FTP</a> site (see the section <i>How to Create a Local Copy of dbSNP</i>). dbSNP accepts submissions from public laboratories and private organizations. (There are online <a href="/SNP/get_html.cgi?whichHtml=how_to_submit" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">instructions</a> for preparing a submission to dbSNP.) A short tag or abbreviation called Submitter HANDLE uniquely defines each submitting laboratory and groups the submissions within the database. The 10 major data elements of a submission follow.</p><div id="ch5.ch5_3_1"><h3>Flanking Sequence Context DNA or cDNA</h3><p>The essential component of a submission to dbSNP is the nucleotide sequence itself. dbSNP accepts submissions as either genomic <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> or <a class="def" href="/books/n/handbook/A1237/def-item/app21/">cDNA</a> (i.e., sequenced <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> transcript) sequence. Sequence submissions have a minimum length requirement to maximize the specificity of the sequence in larger contexts, such as a reference genome sequence. We also structure submissions so that the user can distinguish regions of sequence actually surveyed for variation from regions of sequence that are cut and pasted from a published reference sequence to satisfy the minimum-length requirements. <a class="figpopup" href="/books/NBK21088/figure/ch5.ch5_f1/?report=objectonly" target="object" rid-figpopup="figch5ch5f1" rid-ob="figobch5ch5f1">Figure 1</a> shows the details of flanking sequence structure.</p></div><div id="ch5.ch5_3_2"><h3>Alleles</h3><p>Alleles define variation class (<a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t3/?report=objectonly" target="object" rid-figpopup="figch5ch5t3" rid-ob="figobch5ch5t3">Table 3</a>). In the dbSNP submission scheme, we define single-nucleotide variants as G, A, T, or C. We do not permit ambiguous IUPAC codes, such as N, in the <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> definition of a variation. In cases where variants occur in close proximity to one another, we permit IUPAC codes such as N, and in the flanking sequence of a variation, we actually encourage them. See <a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t3/?report=objectonly" target="object" rid-figpopup="figch5ch5t3" rid-ob="figobch5ch5t3">Table 3</a> for the rules that guide dbSNP post-submission processing in assigning allele classes to each variation.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch5ch5t3"><a href="/books/NBK21088/table/ch5.ch5_t3/?report=objectonly" target="object" title="Table" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5t3" rid-ob="figobch5ch5t3"><img class="small-thumb" src="/books/NBK21088/table/ch5.ch5_t3/?report=thumb" src-large="/books/NBK21088/table/ch5.ch5_t3/?report=previmg" alt="Table 3" /></a><div class="icnblk_cntnt"><h4 id="ch5.ch5_t3"><a href="/books/NBK21088/table/ch5.ch5_t3/?report=objectonly" target="object" rid-ob="figobch5ch5t3">Table</a></h4><p class="float-caption no_bottom_margin">Table 3. Allele definitions define the class of the variation in dbSNP. </p></div></div></div><div id="ch5.ch5_3_3"><h3>Method</h3><p>Each submitter defines the methods in their submission as either the techniques used to assay variation or the techniques used to estimate <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> frequencies. We group methods by method class (<a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t1/?report=objectonly" target="object" rid-figpopup="figch5ch5t1" rid-ob="figobch5ch5t1">Table 1</a>) to facilitate queries using general experimental technique as a query field. The submitter provides all other details of the techniques in a free-text description of the method. Submitters can also use the METHOD_EXCEPTION_ field to describe changes to a general protocol for particular sets of data (batch-specific details). Submitters generally define methods only once in the database.</p></div><div id="ch5.ch5_3_4"><h3>Population</h3><p>Each submitter defines population samples either as the group used to initially identify variations or as the group used to identify population-specific measures of <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> frequencies. These populations may be one and the same in some experimental designs. We assign populations a population class (<a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t2/?report=objectonly" target="object" rid-figpopup="figch5ch5t2" rid-ob="figobch5ch5t2">Table 2</a>) based on the geographic origin of the sample. These broad categories provide a general framework for organizing the approximately 700 (as of this writing) sample descriptions in dbSNP. Similar to method descriptions, population descriptions minimally require the submitter to provide a Population ID and a free-text description of the sample.</p></div><div id="ch5.ch5_3_5"><h3>Sample Size</h3><p>There are two sample-size fields in dbSNP. One field is called SNPASSAY SAMPLE SIZE, and it reports the number of chromosomes in the sample used to initially ascertain or discover the variation. The other sample size field is called SNPPOPUSE SAMPLE SIZE, and it reports the number of chromosomes used as the denominator in computing estimates of <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> frequencies. These two measures need not be the same.</p></div><div id="ch5.ch5_3_6"><h3>Population-specific Allele Frequencies</h3><p>Alleles typically exist at different frequencies in different populations; a very common <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> in one population may be quite rare in another population. Also, allelic variants can emerge as private polymorphisms when particular populations have been reproductively isolated from neighboring groups, as is the case with religious isolates or island populations. Frequency data are submitted to dbSNP as allele counts or binned frequency intervals, depending on the precision of the experimental method used to make the measurement. dbSNP contains records of allele frequencies for specific population samples defined by each submitter (<a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t4/?report=objectonly" target="object" rid-figpopup="figch5ch5t4" rid-ob="figobch5ch5t4">Table 4</a>).</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch5ch5t4"><a href="/books/NBK21088/table/ch5.ch5_t4/?report=objectonly" target="object" title="Table" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5t4" rid-ob="figobch5ch5t4"><img class="small-thumb" src="/books/NBK21088/table/ch5.ch5_t4/?report=thumb" src-large="/books/NBK21088/table/ch5.ch5_t4/?report=previmg" alt="Table 4" /></a><div class="icnblk_cntnt"><h4 id="ch5.ch5_t4"><a href="/books/NBK21088/table/ch5.ch5_t4/?report=objectonly" target="object" rid-ob="figobch5ch5t4">Table</a></h4><p class="float-caption no_bottom_margin">Table 4. Validation status codes summarize the available validation data in assay reports and refSNP clusters. </p></div></div></div><div id="ch5.ch5_3_7"><h3>Population-specific Genotype Frequencies</h3><p>Similar to alleles, genotypes have frequencies in populations that can be submitted to dbSNP.</p></div><div id="ch5.ch5_3_8"><h3>Population-specific Heterozygosity Estimates</h3><p>Some methods for detection of variation (e.g., denaturing high-pressure liquid chromatography or DHPLC) can recognize when <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> fragments contain a variation without resolving the precise nature of the sequence change. These data define an empirical measure of <a class="def" href="/books/n/handbook/A1237/def-item/app69/">heterozygosity</a> when submitted to dbSNP.</p></div><div id="ch5.ch5_3_9"><h3>Individual Genotypes</h3><p>dbSNP accepts individual genotypes for samples from publicly available repositories such as <a class="def" href="/books/n/handbook/A1237/def-item/app216/">CEPH</a> or <a class="def" href="/books/n/handbook/A1237/def-item/app217/">Coriell</a>. Genotypes reported in dbSNP contain links to population and method descriptions. General <a class="def" href="/books/n/handbook/A1237/def-item/app65/">genotype</a> data provide the foundation for individual haplotype definitions and are useful for selecting positive and negative control reagents in new experiments.</p></div><div id="ch5.ch5_3_10"><h3>Validation Information</h3><p>dbSNP accepts individual assay records (ss numbers) without validation evidence. When possible, however, we try to distinguish high-quality validated data from unconfirmed (usually computational) variation reports. Assays validated directly by the submitter through the VALIDATION section show the type of evidence used to confirm the variation. Additionally, dbSNP will flag an assay as validated (<a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t4/?report=objectonly" target="object" rid-figpopup="figch5ch5t4" rid-ob="figobch5ch5t4">Table 4</a>) when we observe frequency or <a class="def" href="/books/n/handbook/A1237/def-item/app65/">genotype</a> data for the record.</p></div></div><div id="ch5.ch5_s4"><h2 id="_ch5_ch5_s4_">Computed Content (The dbSNP Build Cycle)</h2><p>We release the content of dbSNP to the public in periodic &#x0201c;builds&#x0201d; that we synchronize with the release of new genome assemblies for each organism (<a href="/books/n/handbook/ch14/">Chapter 14</a>). During each <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a>, we map both the data submitted since the last build and the current refSNP set to the genome. The following 7 tasks define the sequence of steps in the dbSNP build cycle (<a class="figpopup" href="/books/NBK21088/figure/ch5.ch5ch5f3/?report=objectonly" target="object" rid-figpopup="figch5ch5ch5f3" rid-ob="figobch5ch5ch5f3">Figure 3</a>).</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch5ch5ch5f3" co-legend-rid="figlgndch5ch5ch5f3"><a href="/books/NBK21088/figure/ch5.ch5ch5f3/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5ch5f3" rid-ob="figobch5ch5ch5f3"><img class="small-thumb" src="/books/NBK21088/bin/ch5f3.gif" src-large="/books/NBK21088/bin/ch5f3.jpg" alt="Figure 3" /></a><div class="icnblk_cntnt" id="figlgndch5ch5ch5f3"><h4 id="ch5.ch5ch5f3"><a href="/books/NBK21088/figure/ch5.ch5ch5f3/?report=objectonly" target="object" rid-ob="figobch5ch5ch5f3">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 3. The dbSNP build cycle starts with close of data for new submissions. We map all data, including existing refSNP clusters and new submissions, to reference genome sequence if available for the organism. Otherwise, we map them to non-redundant <a href="/books/NBK21088/figure/ch5.ch5ch5f3/?report=objectonly" target="object" rid-ob="figobch5ch5ch5f3">(more...)</a></p></div></div><div id="ch5.Submitted_SNPs_and_Reference_SNP_Clu"><h3>Submitted SNPs and Reference SNP Clusters</h3><p>Once a new <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> is submitted to dbSNP, it is assigned a unique <u>s</u>ubmitted <u>S</u>NP ID number (ss#). Once the ss number is assigned, we align the flanking sequence of each submitted SNP to its appropriate genomic <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a>. If several ss numbers map to the same position on the contig, we <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> them together, call the cluster a &#x0201c;reference SNP cluster&#x0201d;, or &#x0201c;refSNP&#x0201d;, and provide the cluster with a unique RefSNP ID number (<u>rs#)</u>. If only one ss number maps to a specific position, then that ss is assigned an rs number and is the only member of its RefSNP cluster until another submitted SNP is found that maps to the same position.</p><p>A refSNP has a number of summary properties that are computed over all <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> members (<a class="figpopup" href="/books/NBK21088/figure/ch5.ch5ch5f4/?report=objectonly" target="object" rid-figpopup="figch5ch5ch5f4" rid-ob="figobch5ch5ch5f4">Figure 4</a>), and are used to annotate the variations contained in other <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> resources. We export the entire dbSNP refSNP set in many report formats on the <a href="http://ftp:/ftp.ncbi.nih.gov/snp/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">FTP</a> site, and deliver them as sets of results when a user conducts a dbSNP batch query. We also maintain both refSNPs and submitted SNPs in <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> databases for use in <a href="/SNP/snpblastByChr.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLAST searches</a> of dbSNP.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figch5ch5ch5f4" co-legend-rid="figlgndch5ch5ch5f4"><a href="/books/NBK21088/figure/ch5.ch5ch5f4/?report=objectonly" target="object" title="Figure" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5ch5f4" rid-ob="figobch5ch5ch5f4"><img class="small-thumb" src="/books/NBK21088/bin/ch5f4.gif" src-large="/books/NBK21088/bin/ch5f4.jpg" alt="Figure 4" /></a><div class="icnblk_cntnt" id="figlgndch5ch5ch5f4"><h4 id="ch5.ch5ch5f4"><a href="/books/NBK21088/figure/ch5.ch5ch5f4/?report=objectonly" target="object" rid-ob="figobch5ch5ch5f4">Figure</a></h4><p class="float-caption no_bottom_margin">Figure 4. rs7412 has an average heterozygosity of 18.3% based on the frequency data provided by seven submissions, and the cluster as a whole is validated because at least one of the underlying submissions has been experimentally validated. rs7412 is <a href="/books/NBK21088/figure/ch5.ch5ch5f4/?report=objectonly" target="object" rid-ob="figobch5ch5ch5f4">(more...)</a></p></div></div></div><div id="ch5.ch5_4_1"><h3>New Submissions and the Start of a New Build </h3><p>Each <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> starts with a &#x0201c;close of data&#x0201d; that defines the set of new submissions that will be mapped to genome sequence by multiple cycles of <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> and <a class="def" href="/books/n/handbook/A1237/def-item/app103/">MegaBLAST</a> for subsequent annotation and grouping of the SNPs into refSNPs. The set of new data entering each build typically includes all submissions received since the close of data in the previous build.</p></div><div id="ch5.ch5_4_6"><h3>Mapping to a Genome Sequence</h3><p>When a new genome <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> is ready, dbSNP obtains the <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> files for submitted SNPs that were submitted prior to the &#x0201c;close of data&#x0201d;, as well as the FASTA files for the refSNPs in the current build, and then maps the submitted SNPs and refSNPs to the genome sequence using the procedure described in <a href="#ch5.ch5_s8">Appendix 2</a>. The refSNP set is also mapped to the previous genome assembly to support users who require older mapping data (e.g. during the production cycle for dbSNP human build 126, The refSNP set was mapped to both human build 36.1 and human build 35.1).</p><p>It should also be mentioned that during a <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> cycle, some organisms have refSNPs mapped to multiple assemblies (e.g. human has two major assemblies: the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> Reference Genome assembly and the Celera assembly). </p><div id="ch5.ch5_4_5_1"><h4>refSNP Clustering and refSNP Orientation</h4><p>Since submitters to dbSNP can arbitrarily define variations on either strand of <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> sequence, submissions in a refSNP <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> can be reported on the forward or reverse strand. The orientation of the refSNP, and hence its sequence and <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> string, is set by a cluster exemplar. By convention, the cluster exemplar is the member of a cluster that has longest sequence. In subsequent builds, this sequence may be in reverse orientation to the current orientation of the refSNP. When this occurs, we preserve the orientation of the refSNP by using the reverse complement of the cluster exemplar to set the orientation of the refSNP sequence.</p><p>Once the clustering process determines the orientation of all member sequences in a <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a>, it will gather a comprehensive set of alleles for a refSNP cluster.</p><div id="ch5.T.nc_hintwhen_the_alleles_of_a_submi" class="table"><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21088/table/ch5.T.nc_hintwhen_the_alleles_of_a_submi/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch5.T.nc_hintwhen_the_alleles_of_a_submi_lrgtbl__"><table><tbody><tr><th id="hd_b_ch5.T.nc_hintwhen_the_alleles_of_a_submi_1_1_1_1" rowspan="1" colspan="1" style="text-align:center;vertical-align:top;">Hint:</th></tr><tr><td headers="hd_b_ch5.T.nc_hintwhen_the_alleles_of_a_submi_1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">When the alleles of a submission appear to be different from the alleles of its parent refSNP, check the orientation of the submission for reverse orientation.</td></tr><tr><td headers="hd_b_ch5.T.nc_hintwhen_the_alleles_of_a_submi_1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"></td></tr></tbody></table></div></div></div><div id="ch5.ch5_4_8"><h4>Re-Mapping and refSNP Merging</h4><p>RefSNPs are operationally defined as a variation at a location on an ideal reference chromosome. Such reference chromosomes are the goal of genome assemblies. However, since work is still in progress on many of the genome projects, and since even the&#x02018;finished&#x02019; genomes are updated to correct past annotation errors, we currently define a refSNP as a variation in the interim reference sequence. Every time there is a genomic assembly update, the interim reference sequence may change, so the refSNPs must be updated or re-clustered.</p><p>The re-clustering process begins when <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> updates the genomic assembly. All existing refSNPs (rs) and newly submitted SNPs (ss) are mapped to the genome assembly using multiple <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> and <a class="def" href="/books/n/handbook/A1237/def-item/app103/">MegaBLAST</a> cycles as delineated in Appendix 2. We then <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> SNPs that co-locate at the same place on the genome into a single refSNP. Newly submitted SNPs can either co-locate to form a new refSNP cluster, or may instead cluster with an already existing refSNP. When newly submitted SNPs cluster among themselves, they are assigned a new refSNP number, and when they cluster with an already existing refSNP, they are assigned to that refSNP cluster.</p><p>Sometimes an existing refSNP will co-locate with another refSNP when dbSNP begins using an improved clustering algorithm, or when genome assemblies change between builds. A refSNP co-locates with another refSNP only if the mapped chromosome positions of the two refSNPs are identical. So when dbSNP uses an improved clustering algorithm that enhances our ability to more precisely place refSNPs, if the new placement of a refSNP is identical in location to another refSNP, the two refSNPs co-locate. Similarly, if a change in a genome assembly alters the position of a refSNP so that it is identical with the position of another refSNP, the two refSNPs co-locate. When two existing refSNPs co-locate, the refSNP with the higher refSNP number is retired (which means we never reuse it), and all the submitted SNPs in that higher refSNP <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> number are re-assigned to the refSNP with the lower refSNP number. The re-assignment of the submitted SNPs from a higher refSNP number to a refSNP cluster with a lower refSNP number is called a &#x0201c;merge&#x0201d;, and occurs during the &#x0201c;rs merge&#x0201d; step of the dbSNP mapping process. Merging is only used to reduce redundancy in the catalog of rs numbers so that each position has a unique identifier. All "rs merge" actions that occur are recorded and tracked.</p><p>There is an important exception to the merge process described above; this exception occurs when a co-locating <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> meets certain <a href="/projects/SNP/specs/criteria_for_precious_snps.txt" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">clinical and publication criteria</a>. A refSNP meeting these criteria is termed &#x0201c;precious&#x0201d; and will keep its original refSNP number (the refSNP number will NOT retire as discussed above) if it co-locates with a SNP that has a lower refSNP number. The purpose of having &#x0201c;precious&#x0201d; SNPs is to maintain refSNP number continuity for those SNPs that have been cited in the literature and are clinically important.</p><p>Once the clusters are formed, the variation of a refSNP is the union of all possible alleles defined in the set of submitted SNPs that compose the <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a>. </p><p><b>Please Note</b>: dbSNP only merges rs numbers that have an identical set of mappings to a genome and the same <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> type (e.g. both must be the same variation type and share one allele in common). We therefore would not merge a <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> and an indel (insertion/deletion) into a single rs number (different variation classes) since they represent two different types of mutational "events".</p></div><div id="ch5.RefSNP_Number_Stability"><h4>RefSNP Number Stability</h4><p>The stability of a refSNP number depends on what is meant by &#x0201c;stable&#x0201d;. If a refSNP number has been merged into another refSNP number, it is very easy to use a retired refSNP number to find the current refSNP number (see hint below) &#x02014; so in this case, one could consider a refSNP number to be stable since merged refSNP numbers are always associated with, and can always retrieve the current refSNP number. </p><div id="ch5.T.nc_hintthere_are_three_ways_the_yo" class="table"><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21088/table/ch5.T.nc_hintthere_are_three_ways_the_yo/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch5.T.nc_hintthere_are_three_ways_the_yo_lrgtbl__"><table><tbody><tr><th id="hd_b_ch5.T.nc_hintthere_are_three_ways_the_yo_1_1_1_1" rowspan="1" colspan="1" style="text-align:center;vertical-align:top;">Hint:</th></tr><tr><td headers="hd_b_ch5.T.nc_hintthere_are_three_ways_the_yo_1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">There are three ways the you can locate the partner numbers of a merged refSNP:<br /><ul><li class="half_rhythm"><div>If you enter a retired rs number into the &#x0201c;Search for IDs&#x0201d; search text box on the <a href="/SNP/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">dbSNP home page</a>, the response page will state that the <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> has been merged, and will provide the new rs number and a link to the refSNP page for that new rs number.</div></li></ul><ul><li class="half_rhythm"><div>You can retrieve a list of merged rs numbers from <a href="/sites/entrez?db=snp" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez SNP</a>. Just type &#x0201c;mergedrs&#x0201d; (without the quotation marks) in the text box at the top of the page and click the &#x0201c;go&#x0201d; button. ). You can limit the output to merged rs numbers within a certain species by clicking on the &#x0201c;Limits&#x0201d; tab and then selecting the organism you wish from the organism selection box. Each entry in the returned list will include the old rs numbers that has merged, and the new rs number it has merged into (with a link to the refSNPpage for the new rs number). </div></li></ul><ul><li class="half_rhythm"><div>You can also review the <a href="ftp://ftp.ncbi.nih.gov/snp/organisms/human_9606/database/organism_data/RsMergeArch.bcp.gz" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">RsMergeArch table</a> for the merge partners of a particular species of interest, as it tracks all merge events that occur in dbSNP. This table is available on the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site, a full description of of it can be found in the <a href="/SNP/snp_db_table_description.cgi?t=RsMergeArch" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">dbSNP Data Dictionary</a>, and the column definitions are located in the dbSNP_main_table.sql.gz, which can be found in the <a href="ftp://ftp.ncbi.nih.gov/snp/database/shared_schema" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">shared<wbr style="display:inline-block"></wbr>_schema</a> directory of the dbSNP FTP site.</div></li></ul></td></tr><tr><td headers="hd_b_ch5.T.nc_hintthere_are_three_ways_the_yo_1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;"></td></tr></tbody></table></div></div><p>If, however, what is meant by &#x0201c;stable&#x0201d; is that the refSNP number of a particular variation always remains the same, then one should not consider a refSNP entirely stable, as a refSNP number may change if two refSNP numbers merge as described above. Merging is more likely to happen, however, if the submitted flanking sequence of the refSNP exemplar is short, is of low quality, or if the genome assembly is immature. A refSNP number may also change if:</p><ul><li class="half_rhythm"><div>All of the submitted <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> (ss) numbers in a refSNP <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> are withdrawn by the submitter(a less than 1 in 100 occurrence)</div></li><li class="half_rhythm"><div>dbSNP breaks up an existing <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> and re-instantiates a retired rs number based on a reported conflict from a dbSNP user (a less than 1 in 1,000,000 occurrence) </div></li></ul></div></div><div id="ch5.ch5_4_11"><h3>Functional Analysis</h3><div id="ch5.ch5_4_11_1"><h4>Variation Functional Class</h4><p>We compute a functional context for sequence variations by inspecting the flanking sequence for gene features during the <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> annotation process, and do the same for <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a>/<a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> mRNAs.</p><p><a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t5/?report=objectonly" target="object" rid-figpopup="figch5ch5t5" rid-ob="figobch5ch5t5">Table 5</a> defines variation functional classes. We base class on the relationship between a variation and any local gene features. If a variation is near a transcript or in a transcript interval, but not in the coding region, then we define the functional class by the position of the variation relative to the structure of the aligned transcript. In other words, a variation may be near a gene (<a class="def" href="/books/n/handbook/A1237/def-item/app95/">locus</a> region), in a <a class="def" href="/books/n/handbook/A1237/def-item/app192/">UTR</a> (<a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a>-utr), in an <a class="def" href="/books/n/handbook/A1237/def-item/app86/">intron</a> (intron), or in a splice site (splice site). If the variation is in a coding region, then the functional class of the variation depends on how each <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> may affect the translated peptide sequence.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figch5ch5t5"><a href="/books/NBK21088/table/ch5.ch5_t5/?report=objectonly" target="object" title="Table" class="img_link icnblk_img figpopup" rid-figpopup="figch5ch5t5" rid-ob="figobch5ch5t5"><img class="small-thumb" src="/books/NBK21088/table/ch5.ch5_t5/?report=thumb" src-large="/books/NBK21088/table/ch5.ch5_t5/?report=previmg" alt="Table 5" /></a><div class="icnblk_cntnt"><h4 id="ch5.ch5_t5"><a href="/books/NBK21088/table/ch5.ch5_t5/?report=objectonly" target="object" rid-ob="figobch5ch5t5">Table</a></h4><p class="float-caption no_bottom_margin">Table 5. Function codes for refSNPs in gene features. <sup><i>a</i></sup> </p></div></div><p>Typically, one <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> of a variation will be the same as the <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> (contig reference), and the other allele will be either a synonymous change or a non-synonymous change. In some cases, one allele will be a synonymous change, and the other allele will be a non-synonymous change. If either allele in the variation is a non-synonymous change, then the variation is classified as non-synonymous; otherwise, the variation is classified as a synonymous variation. The primary functional classifications are as follows:</p><ul><li class="half_rhythm"><div> The functional class is noted as Contig Reference when the <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> is identical to the <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> (contig reference), and hence causes no change to the translated sequence.</div></li></ul><ul><li class="half_rhythm"><div> The functional class is noted as synonymous substitution when an <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> that is substituted for the reference sequence yields a new <a class="def" href="/books/n/handbook/A1237/def-item/app206/">codon</a> that encodes the same amino acid. </div></li></ul><ul><li class="half_rhythm"><div> The functional class is noted as non-synonymous substitution when an <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> that is substituted for the reference sequence yields a new <a class="def" href="/books/n/handbook/A1237/def-item/app206/">codon</a> that encodes a different amino acid. </div></li></ul><ul><li class="half_rhythm"><div> The functional class is noted as coding when a problem with the annotated coding region feature prohibits conceptual translation. The coding notation is based solely on position in this case.</div></li></ul><p>Because functional classification is defined by positional and sequence parameters, two facts emerge: (<i>a</i>) if a gene has multiple transcripts because of alternative splicing, then a variation may have several different functional relationships to the gene; and (<i>b</i>) if multiple genes are densely packed in a <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> region, then a variation at a single location in the genome may have multiple, potentially different, relationships to its local gene neighbors.</p></div><div id="ch5.ch5_4_11_2"><h4>SNP Position in 3D Structure</h4><p>When a <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> results in amino acid sequence change, knowing where that amino acid lies in the protein structure is valuable. We provide this information using the following procedure: To find the location of a SNP within a particular protein, we attempt to identify similar proteins whose structure is known by comparing the protein sequence against proteins from the <a class="def" href="/books/n/handbook/A1237/def-item/app130/">PDB</a> database of known protein structures using <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a>. Then, if we find matches, we use the BLAST alignment to identify the amino acid in the protein of known structure that corresponds to the amino acid containing the SNP. We store the position of the amino acid on the 3D structure that corresponds to the amino acid containing the SNP in the dbSNP table SNP3D.</p></div></div><div id="ch5.Population_Diversity_Data"><h3>Population Diversity Data</h3><p>The best single measure of a variation's diversity in different populations is its average <a class="def" href="/books/n/handbook/A1237/def-item/app69/">heterozygosity</a>. This measure serves as the general probability that both alleles are in a diploid individual or in a sample of two chromosomes. Estimates of average heterozygosity have an accompanying standard error based on the sample sizes of the underlying data, which reflects the overall uncertainty of the estimate. dbSNP&#x02019;s computation of average heterozygosity and standard error for RefSNP clusters is available <a href="/SNP/Hetfreq.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">online</a>. Please note that dbSNP computes heterozygosity based on the submitted <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> frequency for each <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a>. If the frequency data for a SNP is not submitted, we cannot compute the heterozygosity value, and therefore the refSNP report will show no heterozygosity estimate.</p><p>Additional population diversity data include population counts, individuals sampled for a variation, <a class="def" href="/books/n/handbook/A1237/def-item/app65/">genotype</a> frequencies, and Hardy Weinberg probabilities.</p></div><div id="ch5.ch5_4_2"><h3>Build Resource Integration</h3><p>We annotate the non-redundant set of variations (refSNP <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> set) on reference genome sequence contigs, chromosomes, mRNAs, and proteins as part of the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> project (<a href="/books/n/handbook/ch18/">Chapter 18</a>). We compute summary properties for each refSNP cluster, which we then use to <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> fresh indexes for dbSNP in the <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> databases, and to update the variation map in the NCBI <a class="def" href="/books/n/handbook/A1237/def-item/app99/">Map Viewer</a>. Finally, we update links between dbSNP and dbMHC, <a class="def" href="/books/n/handbook/A1237/def-item/app124/">OMIM</a>, Homologene, the NCBI Probe database, <a class="def" href="/books/n/handbook/A1237/def-item/app150/">PubMed</a>, UniGene, and <a class="def" href="/books/n/handbook/A1237/def-item/app188/">UniSTS</a>.</p><div id="ch5.ch5_4_9"><h4>Annotating GenBank and Other RefSeq Records</h4><p><a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> records can be annotated only by their original authors. Therefore, when we find high-quality hits of refSNP records to the <a class="def" href="/books/n/handbook/A1237/def-item/app74/">HTGS</a> and non-redundant divisions of GenBank, we connect them using <a class="def" href="/books/n/handbook/A1237/def-item/app94/">LinkOut</a> (<a href="/books/n/handbook/ch17/">Chapter 17</a>).</p><p>We annotate <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> mRNAs with variation features when the refSNP has a high-quality hit to the <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> sequence. If the variation is in the coding region of the transcript and has a non-synonymous <a class="def" href="/books/n/handbook/A1237/def-item/app3/">allele</a> that changes the protein sequence, we also annotate the variation on the protein translation of the mRNA. The alleles in protein annotations are the amino acid translations of the affected codons.</p></div><div id="ch5.ch5_4_10"><h4>NCBI Map Viewer Variation and Linkage Maps</h4><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app99/">Map Viewer</a> (<a href="/books/n/handbook/ch20/">Chapter 20</a>) can show multiple maps of sequence features in common chromosome coordinates. The variation map shows all variation features that we annotate on the current genome assembly. There are two ways to see the variation data. The default graphic mode shows the data as tick marks on the vertical coordinate scale. When variation is selected as the master map from the Maps and Options drop-down menu, and the user zooms in on the map to the individual RefSNP level, a summary of map quality, variation quality warning, functional relationships to genes, average <a class="def" href="/books/n/handbook/A1237/def-item/app69/">heterozygosity</a> with standard error, and validation information are provided. If <a class="def" href="/books/n/handbook/A1237/def-item/app65/">genotype</a>, haplotype, or <a class="def" href="/books/n/handbook/A1237/def-item/app94/">LinkOut</a> data are available, the master map will also contain links to this information.</p></div></div><div id="ch5.ch5_4_3"><h3>Public Release</h3><p>Public release of a new <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> involves an update to the public database and the production of a new set of files on the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site. We make an announcement to the <a href="/mailman/listinfo/dbsnp-announce" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">dbsnp-announce</a> mailing list when the new build for an organism is publicly available.</p></div></div><div id="ch5.ch5_s5"><h2 id="_ch5_ch5_s5_">dbSNP Resource Integration</h2><div id="ch5.ch5_5_1"><h3>Links from SNP Records to Submitter Websites</h3><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> database supports and encourages connections between assay records (submitted SNP ID numbers, or ss numbers) and supplementary data on the submitter's Web site. This connection is made using the LINKOUT field in the SNPassay batch header. <a class="def" href="/books/n/handbook/A1237/def-item/app94/">LinkOut</a> URLs are base URLs to which dbSNP can append the submitter's ID for the variation to construct a complete <a class="def" href="/books/n/handbook/A1237/def-item/app190/">URL</a> to the specific data for the record. We provide LinkOut pointers in the batch header section of SNP detail reports and in the refSNP report <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> membership section.</p></div><div id="ch5.ch5_5_2"><h3>Links within NCBI</h3><p>We make the following connections between refSNP clusters and other <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> resources during the <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> annotation process:</p><div id="ch5.Entrez_Gene"><h4>Entrez Gene</h4><p>There are two methods by which we localize variations to known genes: (<i>a</i>) if a variation is mapped to the genome, we note the variation/gene relationship (<a class="figpopup" href="/books/NBK21088/table/ch5.ch5_t5/?report=objectonly" target="object" rid-figpopup="figch5ch5t5" rid-ob="figobch5ch5t5">Table 5</a>) during functional classification and store the locus_id of the gene in the dbSNP table SNPContigLocusId; and (<i>b</i>) if the variation does not map to the genome, we look for high-quality blast hits for the variation against <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> sequence. We note these hits with the protein_ID (PID) of the protein (the conceptual translation of the mRNA transcript). <a class="def" href="/books/n/handbook/A1237/def-item/app97/">Entrez Gene</a> scans this table nightly and updates the table MapLinkPID with the locus_id for the gene when the protein is a known product of a gene.</p></div><div id="ch5.ch5_5_2_2"><h4>UniSTS</h4><p>When an original submitted <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> record shows a relationship between a SNP and a <a class="def" href="/books/n/handbook/A1237/def-item/app173/">STS</a>, we share the data with dbSTS and establish a link between the SNP and the STS record. We also examine refSNPs for proximity to STS features during <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> annotation. When we determine that a variation needs to be placed within an STS feature, we note the relationship in the dbSNP table SnpInSts.</p></div><div id="ch5.ch5_5_2_3"><h4>UniGene</h4><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> annotation pipeline relates refSNPs to UniGene <a class="def" href="/books/n/handbook/A1237/def-item/app46/">EST</a> clusters based on shared chromosomal location. We store Variation/<a class="def" href="/books/n/handbook/A1237/def-item/app187/">UniGene cluster</a> relationships in the dbSNP table UnigeneSnp.</p></div><div id="ch5.ch5_5_2_4"><h4>PubMed</h4><p>We connect individual submissions to <a class="def" href="/books/n/handbook/A1237/def-item/app150/">PubMed</a> record(s) of publications cited at the time of submission. To view links from PubMed to dbSNP, select &#x0201c;linkouts&#x0201d; as a PubMed query result.</p></div><div id="ch5.ch5_5_2_5"><h4>dbMHC</h4><p>dbSNP stores the underlying variation data that define HLA alleles at the nucleotide level. The combinations of alleles that define specific HLA alleles are stored in dbMHC. dbSNP points to dbMHC at the haplotype level, and dbMHC points to dbSNP at both the haplotype and variation level.</p></div></div></div><div id="ch5.ch5_s6"><h2 id="_ch5_ch5_s6_">How to Create a Local Copy of dbSNP</h2><p>dbSNP is a relational database that contains hundreds of tables. Since the inception of <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> 125, the design dbSNP has been altered to a &#x0201d;hub and spoke&#x0201d; model, where the dbSNP_Main_Table acts as the hub of a wheel, storing all of the central tables of the database, while each spoke of the wheel is an organism-specific database that contains the latest data for a specific organism. dbSNP exports the full contents of the database for the public to download from the dbSNP <a href="ftp://ftp.ncbi.nih.gov/snp/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">FTP</a> site. </p><p>Due to security concerns and vendor endorsement issues, we cannot provide users with direct dumps of dbSNP. The task of creating a local copy of dbSNP can be complicated, and should be left to an experienced programmer. The following sections will guide you in the process of creating a local copy of dbSNP, but these instructions assume knowledge of relational databases, and were not written with the novice in mind.</p><p>If you have problems establishing a local copy of dbSNP, please contact dbSNP at snp-admin@ncbi.nlm.nih.gov.</p><div id="ch5.ch5_6_1"><h3>Schema: The dbSNP Physical Model</h3><p>A schema is a necessary part of constructing your own copy of dbSNP because it is a visual representation of dbSNP that shows the logical relationship between data in dbSNP. It is available as a printable PDF <a href="ftp://ftp.ncbi.nih.gov/snp/database/b124/mssql/schema/erd_dbSNP.pdf" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">file</a> from the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site.</p><p>Data in dbSNP are organized into &#x0201c;subject areas&#x0201d; depending on the nature of the data. The <a href="/SNP/snp_db_list_table.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">data dictionary</a> currently includes a description of all the tables in dbSNP as well as tables of columns and their properties. Foreign keys are not enforced in the physical model because they make it harder to load table data asynchronously. In the future, we will add descriptions of individual columns. The <a href="/SNP/snp_db_list_table.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">data dictionary</a> is also available online from the dbSNP Web site. </p><div id="ch5.ch5_6_1_1"><h4>Resources Required for Creating a Local Copy of dbSNP</h4><div id="ch5.ch5_6_1_2"><h5>Software:</h5><ul><li class="half_rhythm"><div><b>Relational database software</b>. If you are planning to create a local copy of dbSNP, you must first have a relational database server, such as <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a>, Microsoft SQL server, or Oracle. dbSNP at <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> runs on an MSSQL server version 2000, but we know of users who have successfully created their local copy of dbSNP on Oracle.</div></li><li class="half_rhythm"><div><b>Data loading tool</b>. Loading data from the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site into a database requires a bulk data-loading tool, which usually comes with a database installation. An example of such a tool is the bcp (bulk-copy) utility that comes with <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a>, or the &#x0201c;bulkinsert&#x0201d;command in the MSSQL server.</div></li><li class="half_rhythm"><div><b>winzip/gzip to decompress <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> files</b>. Complete instructions on how to uncompress *.gz and *.Z files can be found on the dbSNP <a href="/Ftp/uncompress.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">FTP</a> site.</div></li></ul></div><div id="ch5.ch5_6_1_3"><h5>Hardware:</h5><ul><li class="half_rhythm"><div><b>Computer platforms/OS</b>. Databases can be maintained on any PC, Mac, or <a class="def" href="/books/n/handbook/A1237/def-item/app189/">UNIX</a> with an Internet connection.</div></li><li class="half_rhythm"><div><b>Disk space</b>. Currently, a complete copy of dbSNP that will include all organisms contained in dbSNP requires 500 <a class="def" href="/books/n/handbook/A1237/def-item/app60/">GB</a> of space. Depending on the organism you are interested in, you can simply create a local database that only includes data for the organism of your interest. Please allow room for growth.</div></li><li class="half_rhythm"><div><b>Memory</b>. The current sql server for dbSNP has 4GB of memory.</div></li><li class="half_rhythm"><div>
<b>Internet connection</b>. We recommend a high-speed connection to download such large database files.</div></li></ul></div><div id="ch5.ch5_6_1_4"><h5>dbSNP Data Location</h5><p>The <a href="ftp://ftp.ncbi.nih.gov/snp/database/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">FTP database directory</a> in the dbSNP FTP site contains the schema, data, and SQL statements to create the tables and indices for dbSNP:</p><ul><li class="half_rhythm"><div>The <a href="ftp://ftp.ncbi.nih.gov/snp/database/shared_schema/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">shared_schema</a> subdirectory contains the schema DDL (SQL Data Definition Language) for the dbSNP_main_table.</div></li><li class="half_rhythm"><div>The <a href="ftp://ftp.ncbi.nih.gov/snp/database/shared_data/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">shared_data</a> subdirectory contains data housed in the dbSNP_main_table that is shared by all organisms.</div></li><li class="half_rhythm"><div>The <a href="ftp://ftp.ncbi.nih.gov/snp/database/organism_schema/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">organism_schema</a> sub-directory contains links to the schema DDL for each organism specific database.</div></li><li class="half_rhythm"><div>The <a href="ftp://ftp.ncbi.nih.gov/snp/database/organism_data/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">organism_data</a> sub-directory contains links to the data housed in each organism specific database. The data organized in tables, where there is one file per table. The file name convention is: &#x0003c;tablename&#x0003e;.bcp.gz. The file name convention for the mapping table also includes the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> ID number and the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> genome build ID number. For example, B125_SNPContigLoc_35_1 means that during dbSNP build 125, this SNPContigLoc table has SNPs mapped to NCBI <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> build 35 version 1. The data files have one line per table row. Fields of data within each file are tab delimited.</div></li></ul><p>dbSNP uses standard SQL DDL(Data Definition Language) to create tables, views for those tables, and indexes. There are many utilities available to generate table/index creation statements from a database. </p><div id="ch5.T.nc_hintif_your_firewall_blocks_pas" class="table"><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21088/table/ch5.T.nc_hintif_your_firewall_blocks_pas/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch5.T.nc_hintif_your_firewall_blocks_pas_lrgtbl__"><table><tbody><tr><th id="hd_b_ch5.T.nc_hintif_your_firewall_blocks_pas_1_1_1_1" rowspan="1" colspan="1" style="text-align:center;vertical-align:top;">Hint</th></tr><tr><td headers="hd_b_ch5.T.nc_hintif_your_firewall_blocks_pas_1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">If your firewall blocks passive <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a>, you might get an error message that reads: &#x0201c;Passive mode refused. Turning off passive mode. No control connection for command: No such file or directory&#x0201d;. If this happens, try using a "smart" <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> client like NCFTP (available on most <a class="def" href="/books/n/handbook/A1237/def-item/app189/">UNIX</a> machines). Smart <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> clients are better at auto-negotiating active/passive <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> connections than are older <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> clients (e.g. Sun Solaris <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a>).</td></tr></tbody></table></div></div></div></div></div><div id="ch5.ch5_6_2"><h3>Stepwise Procedure for Creating a Local Copy of dbSNP</h3><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">
<b>Prepare the local area.</b>
</p></dd></dl><p>(check available space, etc.)</p><dl class="temp-labeled-list"><dt>2.</dt><dd><p class="no_top_margin">
<b>Download the schema files. </b>
</p></dd></dl><dl class="temp-labeled-list"><dt>a.</dt><dd><p class="no_top_margin">Download the following files from the dbSNP <a href="ftp://ftp.ncbi.nih.gov/snp/database/shared_schema/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">shared<wbr style="display:inline-block"></wbr>_schema</a> sub-directory: dbSNP_main_table, dbSNP_main_index_constraint, and all the files in the <a href="ftp://ftp.ncbi.nih.gov/snp/database/shared_data/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">shared<wbr style="display:inline-block"></wbr>_data</a> sub-directory. Together, the files from both of these sub-directories will allow you to create tables and indices for the dbSNP_main_table. </p></dd></dl><dl class="temp-labeled-list"><dt>b.</dt><dd><p class="no_top_margin">Go to the <a href="ftp://ftp.ncbi.nih.gov/snp/database/organism_schema/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">organism<wbr style="display:inline-block"></wbr>_schema</a> subdirectory, and select the organism for which you wish to create a database. For the purpose of this example, human_9606 has been selected. Once human_9606 is selected, you will be directed to the <a href="ftp://ftp.ncbi.nih.gov/snp/database/organism_schema/human_9606" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">human organism_schema</a> sub-directory. Download all of the files contained in this subdirectory.</p></dd></dl><dl class="temp-labeled-list"><dt>c.</dt><dd><p class="no_top_margin">Go to the <a href="ftp://ftp.ncbi.nih.gov/snp/database/organism_data/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">organism<wbr style="display:inline-block"></wbr>_data</a> subdirectory, and select the organism for which you wish to create a database. For the purpose of this example, human_9606 has been selected. Once you select human_9606, you will be directed to the <a href="ftp://ftp.ncbi.nih.gov/snp/database/organism_data/human_9606" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">human organism_data</a> sub-directory. Download all of the files contained in this subdirectory.</p></dd></dl><p>A user must always down load the files located in the most recent versions of the shared_schema and shared_data sub-directories in addition to any organism specific content.</p><p>Save all the files in your local directory and decompress them. </p><div id="ch5.T.nc_hinton_a_unix_operating_system_" class="table"><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21088/table/ch5.T.nc_hinton_a_unix_operating_system_/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch5.T.nc_hinton_a_unix_operating_system__lrgtbl__"><table><tbody><tr><th id="hd_b_ch5.T.nc_hinton_a_unix_operating_system__1_1_1_1" rowspan="1" colspan="1" style="text-align:center;vertical-align:top;">Hint:</th></tr><tr><td headers="hd_b_ch5.T.nc_hinton_a_unix_operating_system__1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">On a <a class="def" href="/books/n/handbook/A1237/def-item/app189/">UNIX</a> operating system, use gunzip to decompress the files: dbSNP_main_table and dbSNP_main_index_constraint.<br />The files on the <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site are <a class="def" href="/books/n/handbook/A1237/def-item/app189/">UNIX</a> files. <a class="def" href="/books/n/handbook/A1237/def-item/app189/">UNIX</a>, MS-DOS and Macintosh text files use different characters to indicate a new line. Load the appropriate new line conversion program for your system before using bcp.</td></tr></tbody></table></div></div><dl class="temp-labeled-list"><dt>3.</dt><dd><p class="no_top_margin">
<b>Create the dbSNP_main_table </b>
</p></dd></dl><dl class="temp-labeled-list"><dt>a.</dt><dd><p class="no_top_margin">From the <a href="ftp://ftp.ncbi.nih.gov/snp/database/shared_schema/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">shared<wbr style="display:inline-block"></wbr>_schema</a> sub-directory, use the dbSNP_main_table file to create tables, and use the dbSNP_main_index_constraint files to create indices for the dbSNP main database.</p></dd></dl><dl class="temp-labeled-list"><dt>b.</dt><dd><p class="no_top_margin">Load all of the bcp files located in the <a href="ftp://ftp.ncbi.nih.gov/snp/database/shared_data/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">shared<wbr style="display:inline-block"></wbr>_data</a> sub-directory into the dbSNP_main_table you just created using the data-loading tool of your database server (e.g., bcp for <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a>). See the sample <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> protocol and sample Unix C Shell script (below) for directions. </p></dd></dl><dl class="temp-labeled-list"><dt>c.</dt><dd><p class="no_top_margin">Create indices by opening the dbSNP_main_index_constraint.sql file. If you are using a database server that provides the isql utility, then use the following command: </p></dd></dl><p><br />isql -S &#x0003c;servername&#x0003e; -U usename -P password -i dbSNP_main_index_constraint.sql</p><div id="ch5.T.nc_hintthe_bcp_files_in_the_shared" class="table"><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21088/table/ch5.T.nc_hintthe_bcp_files_in_the_shared/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch5.T.nc_hintthe_bcp_files_in_the_shared_lrgtbl__"><table><tbody><tr><th id="hd_b_ch5.T.nc_hintthe_bcp_files_in_the_shared_1_1_1_1" rowspan="1" colspan="1" style="text-align:center;vertical-align:top;">Hint:</th></tr><tr><td headers="hd_b_ch5.T.nc_hintthe_bcp_files_in_the_shared_1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">The &#x0201c;.bcp&#x0201d; files in the shared_data and organism_data sub-directories may be loaded into most spreadsheet programs by setting the field delimiter character to &#x0201c;tab&#x0201d;.</td></tr></tbody></table></div></div><dl class="temp-labeled-list"><dt>4.</dt><dd><p class="no_top_margin">
<b>Create the organism specific database</b>
</p></dd></dl><p>Once the dbSNP_main_table has been created, create the organism specific database using the files in your specific organism&#x02019;s organism_schema and organism_data subdirectories. Human_9606 will be used for the purpose of this example: </p><dl class="temp-labeled-list"><dt>a.</dt><dd><p class="no_top_margin">Create the human_9606 database using the following files found in the human_9606 <a href="ftp://ftp.ncbi.nih.gov/snp/database/organism_schema/human_9606" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">organism<wbr style="display:inline-block"></wbr>_schema</a>: human_9606_table.sql.gz, human_9606_view.sql.gz, human_9606_index_constraint.sql.gz,</p></dd></dl><p>and human_9606_foreign_key.sql.gz</p><dl class="temp-labeled-list"><dt>b.</dt><dd><p class="no_top_margin">Load all of the bcp files located in the <a href="ftp://ftp.ncbi.nih.gov/snp/database/shared_data/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp"> shared<wbr style="display:inline-block"></wbr>_data</a> sub-directory into the human_9606 database you just created using the data-loading tool of your database server (e.g., bcp for <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a>). See the sample <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> protocol and sample Unix C shell script (below) for directions. </p></dd></dl><div id="ch5.T.nc_hintuse_ftp_i_to_turn_off_inter" class="table"><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21088/table/ch5.T.nc_hintuse_ftp_i_to_turn_off_inter/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch5.T.nc_hintuse_ftp_i_to_turn_off_inter_lrgtbl__"><table><tbody><tr><th id="hd_b_ch5.T.nc_hintuse_ftp_i_to_turn_off_inter_1_1_1_1" rowspan="1" colspan="1" style="text-align:center;vertical-align:top;">Hint:</th></tr><tr><td headers="hd_b_ch5.T.nc_hintuse_ftp_i_to_turn_off_inter_1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">Use &#x0201c;<b>ftp -i</b>&#x0201d; to turn off interactive prompting during multiple file transfers to avoid having to hit &#x0201c;yes&#x0201d; to confirm transfer hundreds of times.</td></tr></tbody></table></div></div><div id="ch5.T.nc_hintto_avoid_an_overflow_of_you" class="table"><p class="large-table-link" style="display:none"><span class="right"><a href="/books/NBK21088/table/ch5.T.nc_hintto_avoid_an_overflow_of_you/?report=objectonly" target="object">View in own window</a></span></p><div class="large_tbl" id="__ch5.T.nc_hintto_avoid_an_overflow_of_you_lrgtbl__"><table><tbody><tr><th id="hd_b_ch5.T.nc_hintto_avoid_an_overflow_of_you_1_1_1_1" rowspan="1" colspan="1" style="text-align:center;vertical-align:top;">Hint:</th></tr><tr><td headers="hd_b_ch5.T.nc_hintto_avoid_an_overflow_of_you_1_1_1_1" rowspan="1" colspan="1" style="text-align:left;vertical-align:top;">To avoid an overflow of your transaction log while using the bcp command option <br />(available in <a class="def" href="/books/n/handbook/A1237/def-item/app176/">Sybase</a> and SQL servers), select the "batch mode" by using the <br />command option: -b number of rows.&#x000a0; For example, the command option -b 10000 will<br />cause a commit to the table every 10,000 rows.</td></tr></tbody></table></div></div><dl class="temp-labeled-list"><dt>5.</dt><dd><p class="no_top_margin"><b>Sample <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> Loading protocol.</b>
</p></dd></dl><dl class="temp-labeled-list"><dt>a.</dt><dd><p class="no_top_margin">Type ftp -i ftp.ncbi.nih.gov (Use "anonymous" as user name and your email as your password).</p></dd><dt>b.</dt><dd><p class="no_top_margin"> Type: cd snp/database</p></dd><dt>c.</dt><dd><p class="no_top_margin">To get dbSNP_main for shared tables and shared data: Type ls to see if you are in the directory with the right files. Then type &#x0201c;cd shared_schema&#x0201d; to get schema file for dbSNP_main, and finally, type &#x0201c;cd shared_data&#x0201d; to get the data for dbSNP_main. </p></dd><dt>d.</dt><dd><p class="no_top_margin">Type binary (to set binary transfer mode).</p></dd><dt>e.</dt><dd><p class="no_top_margin">Type mget *.gz (to initiate transfer). Depending on the speed of the connection, this may take hours since the total transfer size is gigabytes in size and growing.</p></dd><dt>f.</dt><dd><p class="no_top_margin">To decompress the *.gz files, type gunzip *.gz. (Currently, the total size of the uncompressed bcp files is over 10 <a class="def" href="/books/n/handbook/A1237/def-item/app60/">GB</a>).</p></dd></dl><dl class="temp-labeled-list"><dt>6.</dt><dd><p class="no_top_margin">
<b>Use scripts to automate data loading.</b>
</p></dd></dl><dl class="temp-labeled-list"><dt>a.</dt><dd><p class="no_top_margin">Located in the <a href="ftp://ftp.ncbi.nih.gov/snp/database/loadscript/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">loadscript</a> subdirectory of the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site, there is a file called cmd.create_local_dbSNP.txt that provides a sample <a class="def" href="/books/n/handbook/A1237/def-item/app189/">UNIX</a> C shell script for creating a local copy of dbSNP_main and a local copy of a specific organism database using files in the shared_schema, and the organism_schema sub-directories.</p></dd></dl><dl class="temp-labeled-list"><dt>b.</dt><dd><p class="no_top_margin">Also in the the <a href="ftp://ftp.ncbi.nih.gov/snp/database/loadscript/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">loadscript</a> subdirectory of the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site, there is a file called cmd.bulkinsert.txt that provides a sample <a class="def" href="/books/n/handbook/A1237/def-item/app189/">UNIX</a> C shell script for loading tables with files located in shared_data and organism_data sub-directories.</p></dd></dl><dl class="temp-labeled-list"><dt>7.</dt><dd><p class="no_top_margin"><b>Data integrity (creating a partial local copy of dbSNP).</b>
</p></dd></dl><p>dbSNP is a relational database. Each table has either a unique index or a primary key. Foreign keys are not reinforced. There are advantages and a disadvantage to this approach. The advantages are that this approach makes it easy to drop and re-create the table using the dbSNP_main_table, which then makes it possible to create a partial local copy of dbSNP. For example, if you are interested only in the original submitted <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> and their population frequencies, and not in their map locations on <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> genome contigs or <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> Accession numbers (both are huge tables), then these tables can be skipped (i.e., SNPContigLoc and MapLink). Please remember that mapping tables such as SNPContigLoc will have a <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> ID prefix and suffix included in its file name. (e.g. SNPContigLoc will be b125_SNPContigLoc_35_1for SNP build 125, and NCBI <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> build 35 version 1). Of course, to select tables for a particular query, the contents of each table and the dbSNP entity relationship (ER) diagram need to be understood. The disadvantage of un-reinforced references is that either the stored procedures or the external code needs to be written to ensure the referential integrity.</p></div></div><div id="ch5.ch5_s7"><h2 id="_ch5_ch5_s7_">Appendix 1. dbSNP report formats.</h2><div id="ch5.ch5_7_1"><h3>ASN.1</h3><p>The docsum_2005.asn file is the ASN structure definition file for <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> and is located in the /<a href="ftp://ftp.ncbi.nih.gov/snp/specs/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">specs</a> subdirectory of the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site. The <a href="ftp://ftp.ncbi.nih.gov/snp/00readme.txt" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">00readme file</a>, located in the main dbSNP FTP directory, provides information about ASN.1 data structure and data exchange. ASN.1 text or binary output can be converted into one or more of the following formats: flatfile, <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a>, docsum, chromosome report, RS/SS, and <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a>. To convert from ASN.1 to another format, request ASN.1 output from either the dbSNP FTP site or the dbSNP batch query pages, and use dstool (located in the &#x0201c;<a href="ftp://ftp.ncbi.nih.gov/snp/bin/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">bin</a>&#x0201d; directory of the dSNP FTP site) to locally convert the output into as many alternative formats as needed. </p></div><div id="ch5.ch5_7_2"><h3>XML </h3><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a> format provides query-specific information about refSNP clusters,</p><p>as well as <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> members in the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> Exchange (NSE) format. The <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a> schema is located in the docsum_2005.xsd file, which is housed in the /<a href="ftp://ftp.ncbi.nih.gov/snp/specs/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">specs</a> sub-directory of the dbSNP <a class="def" href="/books/n/handbook/A1237/def-item/app58/">FTP</a> site. A human-readable text form of the NSE definitions can be found in docsum_2005.asn, also located in the /<a href="ftp://ftp.ncbi.nih.gov/snp/specs/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">specs</a> sub-directory of the dbSNP FTP site.</p></div><div id="ch5.ch5_7_3"><h3>FASTA: ss and rs</h3><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> report format provides the flanking sequence for each report of variation in dbSNP, as well as all the submitted sequences that have no variation. ss FASTA contains all submitted <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> sequences in FASTA format, whereas rs FASTA contains all the reference SNP sequences in FASTA format. The FASTA data format is typically used for sequence comparisons using <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a>. BLAST SNP is useful for conducting a few sequence comparisons in the FASTA format, whereas multiple FASTA sequence comparisons will require the construction of a local BLAST database of FASTA formatted data and the installation of a local stand-alone version of BLAST. </p></div><div id="ch5.ch5_7_4"><h3>rs docsum Flatfile</h3><p>The rs docsum flatfile report is generated from the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> datafiles and is provided in the files "/ASN1_flat/ds_flat_chXX.flat". Files are generated per chromosome (chXX in file name),as with all of the large report dumps. Because flatfile reports are compact, they will not provide you with as much information as the ASN.1 binary report, but they are useful for scanning human <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> data manually because they provide detailed information at a glance. A full description of the information provided in the rs docsum flatfile format is available in the 00readme file, located in the SNP directory of the <a href="ftp://ftp.ncbi.nih.gov/snp/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">SNP FTP</a> site.</p></div><div id="ch5.ch5_7_5"><h3>Chromosome Reports</h3><p>The chromosome reports format provides an ordered list of RefSNPs in approximate chromosome coordinates. Chromosome reports is a small file to download but contains a great deal of information that might be helpful in identifying SNPs useful as markers on maps or contigs because the coordinate system used in this format is the same as that used for the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> genome <a class="def" href="/books/n/handbook/A1237/def-item/app99/">Map Viewer</a>. It should also be mentioned that the chromosome reports directory might contain the multi/ file and/or the noton/ files. These files are lists (in chromosome report format) of SNPs that hit multiple chromosomes in the genome and those that did not hit any chromosomes in the genome, respectively. A full description of the information provided in the chromosome reports format is available in the 00readme file, located in the <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> directory of the <a href="ftp://ftp.ncbi.nih.gov/snp/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">SNP FTP</a> site.</p></div><div id="ch5.Genotype_Report"><h3>Genotype Report</h3><p>The dbSNP Genotype report shows strain-specific <a class="def" href="/books/n/handbook/A1237/def-item/app65/">genotype</a> information for model organisms, and contains a genotype detail link as well as a genotype <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a> link. The genotype detail link will provide the user with submitter and genotype information for each of the submitted SNPs in a refSNP <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> of interest, and the genotype XML link will allow the user to download the reported data in the Genotype Exchange XML format, which can be read by either Internet Explorer or Netscape browsers. XML dumps via the dbSNP ftp server provide the same content for all genotype data in dbSNP by organism and chromosome. </p></div></div><div id="ch5.ch5_s8"><h2 id="_ch5_ch5_s8_">Appendix 2. Rules and methodology for mapping</h2><p>A cycle of <a class="def" href="/books/n/handbook/A1237/def-item/app103/">MegaBLAST</a> and Blast alignment to the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> genome assembly of an organism is initiated either by the appearance of <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a>-formatted genome sequence for a new <a class="def" href="/books/n/handbook/A1237/def-item/app209/">build</a> of the assembly or by the significant accrual of newly submitted <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> data for that organism. </p><div id="ch5.ch5_8_2"><h3>Organism-specific Genome Mapping</h3><p>The refSNP(rs) and submitted <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> (ss) mapping process is a multi-step, computer-based procedure that begins when refSNP and submitted SNP <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> sets are aligned to the most recent genome assembly using <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> or <a class="def" href="/books/n/handbook/A1237/def-item/app103/">MegaBLAST</a>. Repeat masking during this process is accomplished automatically using the BLAST/MegaBLAST &#x0201c;Dust&#x0201d; option. To increase alignment stringency, multiple cycles of BLAST/MegaBLAST are employed, where the word size limit is reduced from 64 in the first cycle to16 by the final cycle. MegaBLAST parameters are set to a default position with the exception of a seeding hit suppression parameter(e.g. Parameter="-U F -F 'mL' -J T -X 10 -r 1 -q -3 -W 64 -m 11 -e 0.01")that suppresses seeding hits but allows extension through regions of lowercase sequence. The quality of each alignment is determined using an <a href="#ch5.Appendix_3_Alignment_profiling_funct">Alignment Profiling Function</a> developed specifically for this purpose. Alignments are selected based on a variable stringency threshold that ranges from 70% alignment to 50% alignment. If the alignment profiling function indicates the quality of the alignment is below a 70% alignment threshold, the alignment is discarded, although an alignment threshold of 50% is sometimes used in case there are gaps in the sequence.</p><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a>/<a class="def" href="/books/n/handbook/A1237/def-item/app103/">MegaBLAST</a> output of <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> binary files of local alignments is then analyzed by an algorithm (&#x0201c;Globalizer&#x0201d;) that sorts those local alignments that do not fit the dbSNP alignment profile criteria (defined by position and proximity to one another) to create a &#x0201c;Global Alignment&#x0201d; &#x02014; a group of local alignments that lay close to one another on a sequence. If the global alignment is greater than or equal to a pre-determined percentage of the flanking sequence, it is accepted as a true alignment between the refSNP or submitted <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> and the genome assembly. The Globalizer is especially helpful when refSNPs or submitted SNPs based on <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> sequence are being aligned to the genome assembly. In such a case, the MegaBLAST/BLAST ASN.1 binary output contains many small alignments that will not map to the genome assembly unless they undergo the &#x0201c;globalization&#x0201d; process.</p><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> binary output of &#x0201c;Globalizer&#x0201d; is then processed by a program called &#x0201c;Hit Analyzer&#x0201d;. This program defines the alleles and LOC types for each hit, and also determines the map position by using the closest map positions on either side of the <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> to establish the hit location. The text output of &#x0201c;Hit Analyzer&#x0201d; is then processed by the &#x0201c;Hit Filter&#x0201d;, which filters out paralogous hits and uses multiple strategies to select only those SNPs that have the greatest degree of alignment to a particular <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a>. The output from the &#x0201c;Hit Filter&#x0201d; is then placed into a map.bcp file and is processed by the &#x0201c;SnpMapInfo&#x0201d; program, which creates an MD5 signature for each SNP that is representative of all the positional information available for that SNP. The MD5 signature is then placed in the SNP MAP INFO file, which is then loaded into dbSNP. </p><p>RefSNPs and submitted SNPs are analyzed against <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app155/">RefSeq</a> mRNA, and GenBank clone accessions using a similar procedure to that described in the above paragraphs.</p><p>Once the all the results from previous steps are loaded into dbSNP, we perform <a class="def" href="/books/n/handbook/A1237/def-item/app26/">cluster</a> analysis using a program called &#x0201c;SNPHitCluster&#x0201d; which analyses SNPs having the same signatures to find candidates for clustering. If an MD5 signature for a particular <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> is different from the MD5 signature of another SNP, then the hits for those two SNPs are different, and therefore, the SNPs are unique and need not be clustered. If an MD5 signature of a particular SNP is the same as that of another SNP, the two SNPs may have the same hit pattern, and if after further analysis, the hit patterns are shown to be the same, the two SNPs will be clustered. </p></div></div><div id="ch5.Appendix_3_Alignment_profiling_funct"><h2 id="_ch5_Appendix_3_Alignment_profiling_funct_">Appendix 3 Alignment profiling function</h2><p>Mismatch weights are not equal along the flanking sequence, and should therefore be assigned according to a profiling function. Because of the nature of the sequencing process, it is common to have errors concentrated along the flanking sequence tails; we must, therefore, be mindful of this consideration and not disregard alignments in the tails of the query sequence just because of the high concentration of errors found there. Let us assume, therefore, that the distribution of errors follows the rule of natural distribution starting on some point within the flank. This can be approximated with the function F(x):</p><p>
<span class="graphic"><img src="/books/NBK21088/bin/ch5eq1.jpg" alt="Image ch5eq1.jpg" /></span>
</p><p>Alignment Quality, &#x0201c;Q&#x0201d;, can be calculated using the following equation: </p><p>
<span class="graphic"><img src="/books/NBK21088/bin/ch5eq2.jpg" alt="Image ch5eq2.jpg" /></span>
</p><p>Having:</p><p>
<span class="graphic"><img src="/books/NBK21088/bin/ch5eq3.jpg" alt="Image ch5eq3.jpg" /></span>
</p><p>The optimistic identity rate (so named since it doesn&#x02019;t include mismatches) can be calculated by the following function: </p><p>
<span class="graphic"><img src="/books/NBK21088/bin/ch5eq4.jpg" alt="Image ch5eq4.jpg" /></span>
</p><p>Mismatches will affect the numerator of the above function. A function to describe mismatches will contain parts of unmovable discontinuations. Strictly speaking, we must take the integral of this function in order to determine the mismatch effect, but due to the corpuscular nature of the alignment, we can easily replace it with the sum of the elementary function:</p><p>
<span class="graphic"><img src="/books/NBK21088/bin/ch5eq5.jpg" alt="Image ch5eq5.jpg" /></span>
</p><p>where m is the mismatch position vector.</p><p>Thus, the final function:</p><p>
<span class="graphic"><img src="/books/NBK21088/bin/ch5eq6.jpg" alt="Image ch5eq6.jpg" /></span>
</p></div><div id="ch5.ch5_s9"><h2 id="_ch5_ch5_s9_">Appendix 4. 3D structure neighbor analysis.</h2><p>When a protein is known to have a structure neighbor, dbSNP projects the RefSNPs located in that protein sequence onto sequence structures. </p><p>First, <a class="def" href="/books/n/handbook/A1237/def-item/app30/">contig</a> annotation results provide the <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> ID (snp_id), protein accession (protein_acc), contig and SNP amino acid residue (residue), as well as the amino acid position (aa_position) for a particular RefSNP. These data can be found in the dbSNP table, SNPContigLocusId. <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> sequence is then obtained for each protein accession using the program idfetch, with the command line parameters set to: </p><pre>-t 5 -dp -c 1 -q</pre><p>We <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> these sequences against the <a class="def" href="/books/n/handbook/A1237/def-item/app130/">PDB</a> database using &#x0201c;blastall&#x0201d; with the command line parameters set to: </p><pre>-p <a class="def" href="/books/n/handbook/A1237/def-item/app11/">blastp</a> -d pdb -i protein.fasta -o result.blast -e 0.0001 -m 3 -I T -v 1 -b 1</pre><p>Each <a class="def" href="/books/n/handbook/A1237/def-item/app168/">SNP</a> position in the protein sequence is used to determine its corresponding amino acid and amino acid position in the 3D structure from the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> result. These data are stored in the SNP3D table.</p></div><div id="bk_toc_contnr"></div></div></div>
            <div class="post-content"><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div><div class="small"><span class="label">Bookshelf ID: NBK21088</span></div><div style="margin-top:2em" class="bk_noprnt"><a class="bk_cntns" href="/books/n/handbook/">Contents</a><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch4/" title="Previous page in this title">&lt; Prev</a><a class="active page_link next" href="/books/n/handbook/ch6/" title="Next page in this title">Next &gt;</a></div></div></div></div>

        </div>

        <!-- Custom content below content -->
        <div class="col4">

        </div>


        <!-- Book content -->

        <!-- Custom contetnt below bottom nav -->
        <div class="col5">

        </div>
    </div>

    <div id="rightcolumn" class="four_col col last">
        <!-- Custom content above discovery portlets -->
        <div class="col6">
            <div id="ncbi_share_book"><a href="#" class="ncbi_share" data-ncbi_share_config="popup:false,shorten:true" ref="id=NBK21088&amp;db=books">Share</a></div>

        </div>
        <div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Views</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="PDF_download" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="/books/NBK21088/?report=reader">PubReader</a></li><li><a href="/books/NBK21088/?report=printable">Print View</a></li><li><a data-jig="ncbidialog" href="#_ncbi_dlg_citbx_NBK21088" data-jigconfig="width:400,modal:true">Cite this Page</a><div id="_ncbi_dlg_citbx_NBK21088" style="display:none" title="Cite this Page"><div class="bk_tt">Kitts A, Sherry S. The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation. 2002 Oct 9 [Updated 2011 Feb 2]. In: McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-.  Chapter 5.<span class="bk_cite_avail"></span></div></div></li><li><a href="/books/NBK21088/pdf/Bookshelf_NBK21088.pdf">PDF version of this page</a> (714K)</li><li><a href="/books/n/handbook/pdf/">PDF version of this title</a> (7.2M)</li><li><a href="#" class="toggle-glossary-link" title="Enable/disable links to the glossary">Disable Glossary Links</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>In this Page</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="page-toc" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="#_abs_rndgid_" ref="log$=inpage&amp;link_id=inpage">Summary</a></li><li><a href="#ch5.ch5_s1" ref="log$=inpage&amp;link_id=inpage">Introduction</a></li><li><a href="#ch5.ch5_s2" ref="log$=inpage&amp;link_id=inpage">Searching dbSNP</a></li><li><a href="#ch5.ch5_s3" ref="log$=inpage&amp;link_id=inpage">Submitted Content</a></li><li><a href="#ch5.ch5_s4" ref="log$=inpage&amp;link_id=inpage">Computed Content (The dbSNP Build Cycle)</a></li><li><a href="#ch5.ch5_s5" ref="log$=inpage&amp;link_id=inpage">dbSNP Resource Integration</a></li><li><a href="#ch5.ch5_s6" ref="log$=inpage&amp;link_id=inpage">How to Create a Local Copy of dbSNP</a></li><li><a href="#ch5.ch5_s7" ref="log$=inpage&amp;link_id=inpage">Appendix 1. dbSNP report formats.</a></li><li><a href="#ch5.ch5_s8" ref="log$=inpage&amp;link_id=inpage">Appendix 2. Rules and methodology for mapping</a></li><li><a href="#ch5.Appendix_3_Alignment_profiling_funct" ref="log$=inpage&amp;link_id=inpage">Appendix 3 Alignment profiling function</a></li><li><a href="#ch5.ch5_s9" ref="log$=inpage&amp;link_id=inpage">Appendix 4. 3D structure neighbor analysis.</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Recent Activity</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="recent_activity" id="Shutter"></a></div><div class="portlet_content"><div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="HTDisplay" class=""><div class="action"><a href="javascript:historyDisplayState('ClearHT')">Clear</a><a href="javascript:historyDisplayState('HTOff')" class="HTOn">Turn Off</a><a href="javascript:historyDisplayState('HTOn')" class="HTOff">Turn On</a></div><ul id="activity"><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=1" href="/portal/utils/pageresolver.fcgi?recordid=67c825bdb70fbb19600581f0">The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Varia...</a><div class="ralinkpop offscreen_noflow">The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=2" href="/portal/utils/pageresolver.fcgi?recordid=67c825bbfeee5b00acfb6218">The Taxonomy Project - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The Taxonomy Project - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=3" href="/portal/utils/pageresolver.fcgi?recordid=67c825bafeee5b00acfb5a28">Macromolecular Structure Databases - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Macromolecular Structure Databases - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=4" href="/portal/utils/pageresolver.fcgi?recordid=67c825b9d5edb449bf42bc62">PubMed: The Bibliographic Database - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">PubMed: The Bibliographic Database - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=5" href="/portal/utils/pageresolver.fcgi?recordid=67c825b6d5edb449bf429edd">The Databases - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The Databases - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li></ul><p class="HTOn">Your browsing activity is empty.</p><p class="HTOff">Activity recording is turned off.</p><p id="turnOn" class="HTOff"><a href="javascript:historyDisplayState('HTOn')">Turn recording back on</a></p><a class="seemore" href="/sites/myncbi/recentactivity">See more...</a></div></div></div>

        <!-- Custom content below discovery portlets -->
        <div class="col7">

        </div>
    </div>
</div>

<!-- Custom content after all -->
<div class="col8">

</div>
<div class="col9">

</div>

<script type="text/javascript" src="/corehtml/pmc/js/jquery.scrollTo-1.4.2.js"></script>
<script type="text/javascript">
    (function($){
        $('.skiplink').each(function(i, item){
            var href = $($(item).attr('href'));
            href.attr('tabindex', '-1').addClass('skiptarget'); // ensure the target can receive focus
            $(item).on('click', function(event){
                event.preventDefault();
                $.scrollTo(href, 0, {
                    onAfter: function(){
                        href.focus();
                    }
                });
            });
        });
    })(jQuery);
</script>
                        </div>
                        <div class="bottom">

                            <div id="NCBIFooter_dynamic">
    <!--<component id="Breadcrumbs" label="breadcrumbs"/>
    <component id="Breadcrumbs" label="helpdesk"/>-->

</div>

                            <div class="footer" id="footer">
	<section class="icon-section">
		<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
		<div class="grid-container container">
			<div class="icon-section_container">
				<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
					<defs>
						<style>
							.cls-11 {
							fill: #737373;
							}
						</style>
					</defs>
					<title>Twitter</title>
					<path class="cls-11" d="M250.11,105.48c-7,3.14-13,3.25-19.27.14,8.12-4.86,8.49-8.27,11.43-17.46a78.8,78.8,0,0,1-25,9.55,39.35,39.35,0,0,0-67,35.85,111.6,111.6,0,0,1-81-41.08A39.37,39.37,0,0,0,81.47,145a39.08,39.08,0,0,1-17.8-4.92c0,.17,0,.33,0,.5a39.32,39.32,0,0,0,31.53,38.54,39.26,39.26,0,0,1-17.75.68,39.37,39.37,0,0,0,36.72,27.3A79.07,79.07,0,0,1,56,223.34,111.31,111.31,0,0,0,116.22,241c72.3,0,111.83-59.9,111.83-111.84,0-1.71,0-3.4-.1-5.09C235.62,118.54,244.84,113.37,250.11,105.48Z">
					</path>
				</svg></a>
				<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
					<title>Facebook</title>
					<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
					</path>
				</svg></a>
				<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
						<title>LinkedIn</title>
						<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
						</path>
					</svg></a>
				<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
					<defs>
						<style>
							.cls-11,
							.cls-12 {
							fill: #737373;
							}

							.cls-11 {
							fill-rule: evenodd;
							}
						</style>
					</defs>
					<title>GitHub</title>
					<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
					</path>
					<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
					</path>
					<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
					</path>
					<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
					<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
					</path>
					<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
					</path>
					<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
					</path>
					<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
				</svg></a>
				<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
					<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
						<defs><style>.cls-1{fill:#737373;}</style></defs>
						<title>NCBI Insights Blog</title>
						<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
					</svg>
				</a>
			</div>
		</div>
	</section>

	<section class="container-fluid bg-primary">
		<div class="container pt-5">
			<div class="row mt-3">
				<div class="col-lg-3 col-12">
					<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
					<ul class="list-inline social_media">
						<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
                      <style type="text/css">
                        .st20 {
                          fill: #FFFFFF;
                        }

                        .st30 {
                          fill: none;
                          stroke: #FFFFFF;
                          stroke-width: 8;
                          stroke-miterlimit: 10;
                        }
                      </style>
                      <title>Twitter</title>
                      <g>
                        <g>
                          <g>
                            <path class="st20" d="M192.9,88.1c-5,2.2-9.2,2.3-13.6,0.1c5.7-3.4,6-5.8,8.1-12.3c-5.4,3.2-11.4,5.5-17.6,6.7                                                 c-10.5-11.2-28.1-11.7-39.2-1.2c-7.2,6.8-10.2,16.9-8,26.5c-22.3-1.1-43.1-11.7-57.2-29C58,91.6,61.8,107.9,74,116                                                 c-4.4-0.1-8.7-1.3-12.6-3.4c0,0.1,0,0.2,0,0.4c0,13.2,9.3,24.6,22.3,27.2c-4.1,1.1-8.4,1.3-12.5,0.5c3.6,11.3,14,19,25.9,19.3                                                 c-11.6,9.1-26.4,13.2-41.1,11.5c12.7,8.1,27.4,12.5,42.5,12.5c51,0,78.9-42.2,78.9-78.9c0-1.2,0-2.4-0.1-3.6                                                 C182.7,97.4,189.2,93.7,192.9,88.1z"></path>
                          </g>
                        </g>
                        <circle class="st30" cx="124.4" cy="128.8" r="108.2"></circle>
                      </g>
                    </svg></a></li>
						<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
							<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
                      <style type="text/css">
                        .st10 {
                          fill: #FFFFFF;
                        }

                        .st110 {
                          fill: none;
                          stroke: #FFFFFF;
                          stroke-width: 8;
                          stroke-miterlimit: 10;
                        }
                      </style>
                      <title>Facebook</title>
                      <g>
                        <g>
                          <path class="st10" d="M159,99.1h-24V88.4c0-5,3.3-6.2,5.7-6.2h16.8V60l-24.4-0.1c-22.1,0-26.2,16.5-26.2,27.1v12.1H90v22.5h16.9                                                       v67.5H135v-67.5h21.7L159,99.1z"></path>
                        </g>
                      </g>
                      <circle class="st110" cx="123.6" cy="123.2" r="108.2"></circle>
                    </svg>
						</a></li>
						<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
                      <title>Youtube</title>
                      <style type="text/css">
                        .st4 {
                          fill: none;
                          stroke: #FFFFFF;
                          stroke-width: 8;
                          stroke-miterlimit: 10;
                        }

                        .st5 {
                          fill: #FFFFFF;
                        }
                      </style>
                      <circle class="st4" cx="124.2" cy="123.4" r="108.2"></circle>
                      <g transform="translate(0,-952.36218)">
                        <path class="st5" d="M88.4,1037.4c-10.4,0-18.7,8.3-18.7,18.7v40.1c0,10.4,8.3,18.7,18.7,18.7h72.1c10.4,0,18.7-8.3,18.7-18.7                                             v-40.1c0-10.4-8.3-18.7-18.7-18.7H88.4z M115.2,1058.8l29.4,17.4l-29.4,17.4V1058.8z"></path>
                      </g>
                    </svg></a></li>
					</ul>
				</div>
				<div class="col-lg-3 col-12">
					<p class="address_footer text-white">National Library of Medicine<br />
						<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
							Bethesda, MD 20894</a></p>
				</div>
				<div class="col-lg-3 col-12 centered-lg">
					<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
						<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
						<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
				</div>
				<div class="col-lg-3 col-12 centered-lg">
					<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
						<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
						<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
				</div>
			</div>
			<div class="row">
				<div class="col-lg-12 centered-lg">
					<nav class="bottom-links">
						<ul class="mt-3">
							<li>
								<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
							</li>
							<li>
								<a class="text-white" href="https://www.nih.gov/">NIH</a>
							</li>
							<li>
								<a class="text-white" href="https://www.hhs.gov/">HHS</a>
							</li>
							<li>
								<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
							</li>
						</ul>
					</nav>
				</div>
			</div>
		</div>
	</section>
	<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
	<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
                        </div>
                    </div>
                    <!--/.page-->
                </div>
                <!--/.wrap-->
            </div><!-- /.twelve_col -->
        </div>
        <!-- /.grid -->

        <span class="PAFAppResources"></span>

        <!-- BESelector tab -->


        <noscript><img alt="statistics" src="/stat?jsdisabled=true&amp;ncbi_db=books&amp;ncbi_pdid=book-part&amp;ncbi_acc=NBK21088&amp;ncbi_domain=handbook&amp;ncbi_report=record&amp;ncbi_type=fulltext&amp;ncbi_objectid=&amp;ncbi_pcid=/NBK21088/&amp;ncbi_pagename=The Single Nucleotide Polymorphism Database (dbSNP) of Nucleotide Sequence Variation - The NCBI Handbook - NCBI Bookshelf&amp;ncbi_bookparttype=chapter&amp;ncbi_app=bookshelf" /></noscript>


        <!-- usually for JS scripts at page bottom -->
        <!--<component id="PageFixtures" label="styles"></component>-->


<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal104 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>

<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3879255/4121861/3501987/4008961/3893018/3821238/4062932/4209313/4212053/4076480/3921943/3400083/3426610.js" snapshot="books"></script></body>
</html>