477 lines
No EOL
160 KiB
HTML
477 lines
No EOL
160 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
||
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
||
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
||
|
||
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||
<!-- AppResources meta begin -->
|
||
<meta name="paf-app-resources" content="" />
|
||
<script type="text/javascript">var ncbi_startTime = new Date();</script>
|
||
|
||
<!-- AppResources meta end -->
|
||
|
||
<!-- TemplateResources meta begin -->
|
||
<meta name="paf_template" content="" />
|
||
|
||
<!-- TemplateResources meta end -->
|
||
|
||
<!-- Logger begin -->
|
||
<meta name="ncbi_db" content="books" /><meta name="ncbi_pdid" content="book-part" /><meta name="ncbi_acc" content="NBK154410" /><meta name="ncbi_domain" content="handbook2e" /><meta name="ncbi_report" content="record" /><meta name="ncbi_type" content="fulltext" /><meta name="ncbi_objectid" content="" /><meta name="ncbi_pcid" content="/NBK154410/" /><meta name="ncbi_pagename" content="The Database of Genotypes and Phenotypes (dbGaP) and PheGenI - The NCBI Handbook - NCBI Bookshelf" /><meta name="ncbi_bookparttype" content="chapter" /><meta name="ncbi_app" content="bookshelf" />
|
||
<!-- Logger end -->
|
||
|
||
<title>The Database of Genotypes and Phenotypes (dbGaP) and PheGenI - The NCBI Handbook - NCBI Bookshelf</title>
|
||
|
||
<!-- AppResources external_resources begin -->
|
||
<link rel="stylesheet" href="/core/jig/1.15.2/css/jig.min.css" /><script type="text/javascript" src="/core/jig/1.15.2/js/jig.min.js"></script>
|
||
|
||
<!-- AppResources external_resources end -->
|
||
|
||
<!-- Page meta begin -->
|
||
<meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE" /><meta name="citation_inbook_title" content="The NCBI Handbook [Internet]. 2nd edition" /><meta name="citation_title" content="The Database of Genotypes and Phenotypes (dbGaP) and PheGenI" /><meta name="citation_publisher" content="National Center for Biotechnology Information (US)" /><meta name="citation_date" content="2013/08/15" /><meta name="citation_author" content="Kimberly A Tryka" /><meta name="citation_author" content="Luning Hao" /><meta name="citation_author" content="Anne Sturcke" /><meta name="citation_author" content="Yumi Jin" /><meta name="citation_author" content="Masato Kimura" /><meta name="citation_author" content="Zhen Y Wang" /><meta name="citation_author" content="Lora Ziyabari" /><meta name="citation_author" content="Moira Lee" /><meta name="citation_author" content="Michael Feolo" /><meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK154410/" /><link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" /><meta name="DC.Title" content="The Database of Genotypes and Phenotypes (dbGaP) and PheGenI" /><meta name="DC.Type" content="Text" /><meta name="DC.Publisher" content="National Center for Biotechnology Information (US)" /><meta name="DC.Contributor" content="Kimberly A Tryka" /><meta name="DC.Contributor" content="Luning Hao" /><meta name="DC.Contributor" content="Anne Sturcke" /><meta name="DC.Contributor" content="Yumi Jin" /><meta name="DC.Contributor" content="Masato Kimura" /><meta name="DC.Contributor" content="Zhen Y Wang" /><meta name="DC.Contributor" content="Lora Ziyabari" /><meta name="DC.Contributor" content="Moira Lee" /><meta name="DC.Contributor" content="Michael Feolo" /><meta name="DC.Date" content="2013/08/15" /><meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK154410/" /><meta name="description" content="The Database of Genotypes and Phenotypes (dbGaP) is a National Institutes of Health (NIH) sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype (1). It was launched in response to the development of NIH’s GWAS policy and provides unprecedented access to very large genetic and phenotypic datasets funded by National Institutes of Health and other agencies worldwide. Scientists from the global research community may access all public data and apply for controlled access data." /><meta name="og:title" content="The Database of Genotypes and Phenotypes (dbGaP) and PheGenI" /><meta name="og:type" content="book" /><meta name="og:description" content="The Database of Genotypes and Phenotypes (dbGaP) is a National Institutes of Health (NIH) sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of genotype and phenotype (1). It was launched in response to the development of NIH’s GWAS policy and provides unprecedented access to very large genetic and phenotypic datasets funded by National Institutes of Health and other agencies worldwide. Scientists from the global research community may access all public data and apply for controlled access data." /><meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK154410/" /><meta name="og:site_name" content="NCBI Bookshelf" /><meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook2e-lrg.png" /><meta name="twitter:card" content="summary" /><meta name="twitter:site" content="@ncbibooks" /><meta name="warning" content="This publication is provided for historical reference only and the information may be out of date." /><meta name="bk-non-canon-loc" content="/books/n/handbook2e/dbGaP/" /><link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK154410/" /><link rel="stylesheet" href="/corehtml/pmc/css/figpopup.css" type="text/css" media="screen" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_print.min.css" type="text/css" media="print" /><style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style><style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style><script type="text/javascript" src="/corehtml/pmc/js/jquery.hoverIntent.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/common.min.js?_=3.18"> </script><script type="text/javascript" src="/corehtml/pmc/js/large-obj-scrollbars.min.js"> </script><script type="text/javascript">window.name="mainwindow";</script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/book-toc.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/books.min.js"> </script><meta name="book-collection" content="NONE" />
|
||
|
||
<!-- Page meta end -->
|
||
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8D4A1E7D9FDDC100000000007D006F.m_13" />
|
||
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3985586/3808861/4121862/3974050/3917732/251717/4216701/14534/45193/4113719/3849091/3984811/3751656/4033350/3840896/3577051/3852958/4008682/4207974/4206132/4062871/12930/3964959/3854974/36029/4128070/9685/3549676/3609192/3609193/3609213/3395586.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3411343/3882866.css" media="print" /></head>
|
||
<body class="book-part">
|
||
<div class="grid">
|
||
<div class="col twelve_col nomargin shadow">
|
||
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
|
||
<div class="sysmessages">
|
||
<noscript>
|
||
<p class="nojs">
|
||
<strong>Warning:</strong>
|
||
The NCBI web site requires JavaScript to function.
|
||
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
|
||
</p>
|
||
</noscript>
|
||
</div>
|
||
<!--/.sysmessage-->
|
||
<div class="wrap">
|
||
<div class="page">
|
||
<div class="top">
|
||
<div id="universal_header">
|
||
<section class="usa-banner">
|
||
<div class="usa-accordion">
|
||
<header class="usa-banner-header">
|
||
<div class="usa-grid usa-banner-inner">
|
||
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
|
||
<p>An official website of the United States government</p>
|
||
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
|
||
<span class="usa-banner-button-text">Here's how you know</span>
|
||
</button>
|
||
</div>
|
||
</header>
|
||
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
|
||
<div class="usa-banner-guidance-gov usa-width-one-half">
|
||
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
|
||
<div class="usa-media_block-body">
|
||
<p>
|
||
<strong>The .gov means it's official.</strong>
|
||
<br />
|
||
Federal government websites often end in .gov or .mil. Before
|
||
sharing sensitive information, make sure you're on a federal
|
||
government site.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
<div class="usa-banner-guidance-ssl usa-width-one-half">
|
||
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
|
||
<div class="usa-media_block-body">
|
||
<p>
|
||
<strong>The site is secure.</strong>
|
||
<br />
|
||
The <strong>https://</strong> ensures that you are connecting to the
|
||
official website and that any information you provide is encrypted
|
||
and transmitted securely.
|
||
</p>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<div class="usa-overlay"></div>
|
||
<header class="ncbi-header" role="banner" data-section="Header">
|
||
|
||
<div class="usa-grid">
|
||
<div class="usa-width-one-whole">
|
||
|
||
<div class="ncbi-header__logo">
|
||
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
|
||
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
|
||
</a>
|
||
</div>
|
||
|
||
<div class="ncbi-header__account">
|
||
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
|
||
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
|
||
<span class="fa fa-user" aria-hidden="true">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
|
||
<g style="fill: #fff">
|
||
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
|
||
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
|
||
</g>
|
||
</svg>
|
||
</span>
|
||
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
|
||
<span class="sr-only">Show account info</span>
|
||
</button>
|
||
</div>
|
||
|
||
<div class="ncbi-popup-anchor">
|
||
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
|
||
<div class="ncbi-popup-head">
|
||
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
|
||
<span class="fa fa-times">
|
||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
|
||
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
|
||
</svg>
|
||
</span>
|
||
<span class="usa-sr-only">Close</span></button>
|
||
<h4>Account</h4>
|
||
</div>
|
||
<div class="account-user-info">
|
||
Logged in as:<br />
|
||
<b><span class="username" id="uname_long">username</span></b>
|
||
</div>
|
||
<div class="account-links">
|
||
<ul class="usa-unstyled-list">
|
||
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
|
||
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
|
||
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
|
||
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
</div>
|
||
</div>
|
||
</header>
|
||
<div role="navigation" aria-label="access keys">
|
||
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
|
||
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
|
||
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
|
||
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
|
||
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
|
||
</div>
|
||
<section data-section="Alerts">
|
||
<div class="ncbi-alerts-placeholder"></div>
|
||
</section>
|
||
</div>
|
||
<div class="header">
|
||
<div class="res_logo"><h1 class="res_name"><a href="/books/" title="Bookshelf home">Bookshelf</a></h1><h2 class="res_tagline"></h2></div>
|
||
<div class="search"><form method="get" action="/books/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="books" selected="selected" data-ac_dict="bookshelf-search">Books</option><option value="gene">Gene</option><option value="nuccore">Nucleotide</option><option value="pubmed" class="last">PubMed</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books" data-ac_dict="bookshelf-search">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Books. Use up and down arrows to choose an item from the autocomplete." value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="dictionary:'bookshelf-search',disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'no'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class="searchlinks inline_list"><li>
|
||
<a href="/books/browse/">Browse Titles</a>
|
||
</li><li>
|
||
<a href="/books/advanced/">Advanced</a>
|
||
</li><li class="help">
|
||
<a href="/books/NBK3833/">Help</a>
|
||
</li><li class="disclaimer">
|
||
<a target="_blank" data-ga-category="literature_resources" data-ga-action="link_click" data-ga-label="disclaimer_link" href="https://www.ncbi.nlm.nih.gov/books/about/disclaimer/">Disclaimer</a>
|
||
</li></ul></div>
|
||
</div>
|
||
|
||
|
||
|
||
<!--<component id="Page" label="headcontent"/>-->
|
||
|
||
</div>
|
||
<div class="content">
|
||
<!-- site messages -->
|
||
<!-- Custom content 1 -->
|
||
<div class="col1">
|
||
|
||
</div>
|
||
|
||
<div class="container">
|
||
<div id="maincontent" class="content eight_col col">
|
||
<!-- Custom content in the left column above book nav -->
|
||
<div class="col2">
|
||
|
||
</div>
|
||
|
||
<!-- Book content -->
|
||
|
||
|
||
<!-- Custom content between navigation and content -->
|
||
<div class="col3">
|
||
|
||
</div>
|
||
|
||
<div class="document">
|
||
<div class="pre-content"><div><div class="bk_prnt"><p class="small">NCBI Bookshelf. A service of the National Library of Medicine, National Institutes of Health.</p><p>The NCBI Handbook [Internet]. 2nd edition. Bethesda (MD): National Center for Biotechnology Information (US); 2013-. </p></div><div class="messagearea bk_noprnt" style="margin-bottom:1.3846em "><ul class="messages"><li class="warn icon"><span class="icon">This publication is provided for historical reference only and the information may be out of date.</span></li></ul></div><div class="bk_prnt"><p style="color:red;"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p></div><div class="iconblock clearfix whole_rhythm no_top_margin bk_noprnt"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook2e/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook2e-lrg.png" alt="Cover of The NCBI Handbook" height="100px" width="80px" /></a><div class="icnblk_cntnt eight_col"><h2>The NCBI Handbook [Internet]. 2nd edition.</h2><a data-jig="ncbitoggler" href="#__NBK154410_dtls__">Show details</a><div style="display:none" class="ui-widget" id="__NBK154410_dtls__"><div>Bethesda (MD): <a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&targetsite=external&targetcat=link&targettype=publisher">National Center for Biotechnology Information (US)</a>; 2013-.</div></div><div class="half_rhythm"><ul class="inline_list"><li style="margin-right:1em"><a class="bk_cntns" href="/books/n/handbook2e/">Contents</a></li></ul></div><div class="bk_noprnt"><form method="get" action="/books/n/handbook2e/" id="bk_srch"><div class="bk_search"><label for="bk_term" class="offscreen_noflow">Search term</label><input type="text" title="Search this book" id="bk_term" name="term" value="" data-jig="ncbiclearbutton" /> <input type="submit" class="jig-ncbibutton" value="Search this book" submit="false" style="padding: 0.1em 0.4em;" /></div></form></div></div><div class="icnblk_cntnt two_col"><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook2e/AboutVariation/" title="Previous page in this title">< Prev</a><a class="active page_link next" href="/books/n/handbook2e/dbSNP/" title="Next page in this title">Next ></a></div></div></div></div></div>
|
||
<div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><h1 id="_NBK154410_"><span class="title" itemprop="name">The Database of Genotypes and Phenotypes (dbGaP) and PheGenI</span></h1><p class="contrib-group"><span itemprop="author">Kimberly A Tryka</span>, <span itemprop="author">Luning Hao</span>, <span itemprop="author">Anne Sturcke</span>, <span itemprop="author">Yumi Jin</span>, <span itemprop="author">Masato Kimura</span>, <span itemprop="author">Zhen Y Wang</span>, <span itemprop="author">Lora Ziyabari</span>, <span itemprop="author">Moira Lee</span>, and <span itemprop="author">Michael Feolo</span>.</p><a data-jig="ncbitoggler" href="#__NBK154410_ai__" style="border:0;text-decoration:none">Author Information and Affiliations</a><div style="display:none" class="ui-widget" id="__NBK154410_ai__"><p class="contrib-group"><h4>Authors</h4><span itemprop="author">Kimberly A Tryka</span>,<sup>1</sup> <span itemprop="author">Luning Hao</span>,<sup>1</sup> <span itemprop="author">Anne Sturcke</span>,<sup>1</sup> <span itemprop="author">Yumi Jin</span>,<sup>1</sup> <span itemprop="author">Masato Kimura</span>,<sup>1</sup> <span itemprop="author">Zhen Y Wang</span>,<sup>1</sup> <span itemprop="author">Lora Ziyabari</span>,<sup>1</sup> <span itemprop="author">Moira Lee</span>,<sup>1</sup> and <span itemprop="author">Michael Feolo</span><sup>1</sup>.</p><h4>Affiliations</h4><div class="affiliation"><sup>1</sup> NCBI<div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@kakyrt" class="oemail">vog.hin.mln.ibcn@kakyrt</a><div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@oah" class="oemail">vog.hin.mln.ibcn@oah</a></div><div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@agnaik" class="oemail">vog.hin.mln.ibcn@agnaik</a></div><div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@uynij" class="oemail">vog.hin.mln.ibcn@uynij</a></div><div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@amarumik" class="oemail">vog.hin.mln.ibcn@amarumik</a></div><div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@gnawaj" class="oemail">vog.hin.mln.ibcn@gnawaj</a></div><div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@lrabayiz" class="oemail">vog.hin.mln.ibcn@lrabayiz</a></div><div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@ariomeel" class="oemail">vog.hin.mln.ibcn@ariomeel</a></div><div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@oloef" class="oemail">vog.hin.mln.ibcn@oloef</a></div></div></div></div><p class="small">Created: <span itemprop="datePublished">August 15, 2013</span>.</p><p><em>Estimated reading time: 41 minutes</em></p></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><div id="dbGaP.Scope"><h2 id="_dbGaP_Scope_">Scope</h2><p>The Database of Genotypes and Phenotypes (dbGaP) is a National Institutes of Health (NIH) sponsored repository charged to archive, curate and distribute information produced by studies investigating the interaction of <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> and <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> (<a class="bk_pop" href="#dbGaP.REF.1">1</a>). It was launched in response to the development of NIH’s <a href="http://gwas.nih.gov/index.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>GWAS policy</u></a> and provides unprecedented access to very large genetic and phenotypic datasets funded by National Institutes of Health and other agencies worldwide. Scientists from the global research community may access all public data and apply for controlled access data.</p><p>The information contained in dbGaP includes individual level molecular and <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> data, analysis results, medical images, general information about the study, and documents that contextualize phenotypic variables, such as research protocols and questionnaires. Submitted data undergoes <a class="def" href="/books/n/handbook2e/glossary/def-item/quality-control/">quality control</a> and curation by dbGaP staff before being released to the public.</p><p>Information about submitted studies, summary level data, and documents related to studies can be accessed freely on the dbGaP website (<a href="/gap" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>http://www.ncbi.nlm.nih.gov/gap</u></a>). Individual-level data can be accessed only after a Controlled Access application, stating research objectives and demonstrating the ability to adequately protect the data, has been approved (<a href="https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=login" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=login</u></a>). Public summary data from dbGaP are also accessed without restriction via the <a class="def" href="/books/n/handbook2e/glossary/def-item/phegeni/">PheGenI</a> tool, as detailed in the <a href="#dbGaP.Related_Tools">Related tools section.</a></p></div><div id="dbGaP.History"><h2 id="_dbGaP_History_">History</h2><p>Planning for the <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a> began in 2006 and the database received its first request for data in mid-2007. The initial release of dbGaP contained data on two Genome-Wide Association Studies (GWAS): the Age-Related Eye Diseases Study (AREDS), a 600-subject, multicenter, case-controlled, prospective study of the clinical course of age-related macular degeneration and age-related cataracts supported by the National Eye Institute (NEI), and the National Institute of Neurological Disorders and Stroke (NINDS) Parkinsonism Study, a case-controlled study that gathered <a class="def" href="/books/n/handbook2e/glossary/def-item/dna/">DNA</a>, cell line samples and detailed phenotypic data on 2,573 subjects. The data from the Genetic Association Information Network (GAIN) (<a class="bk_pop" href="#dbGaP.REF.2">2</a>) was released soon after.</p><p>Although initially designed for GWAS, the scope of dbGaP has expanded to facilitate making individual level information accessible to research communities and to provide data needed to understand the manifestation of disease and how that relates to the <a class="def" href="/books/n/handbook2e/glossary/def-item/genome/">genome</a>, proteome and epigenome. The dbGaP has been growing rapidly since its inception. See the dbGaP <a href="/gap" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">home page</a> for current content.</p></div><div id="dbGaP.Data_Model"><h2 id="_dbGaP_Data_Model_">Data Model</h2><div id="dbGaP.Accessioned_Objects"><h3>Accessioned Objects</h3><p>The data in dbGaP are organized as a hierarchical structure of studies. Accessioned objects within dbGaP include studies, phenotypes (as variables and datasets), various molecular assay data (<a class="def" href="/books/n/handbook2e/glossary/def-item/snp/">SNP</a> and Expression Array, Sequence, and Epigenomic marks), analyses, and documents (<a class="figpopup" href="/books/NBK154410/figure/dbGaP.F1/?report=objectonly" target="object" rid-figpopup="figdbGaPF1" rid-ob="figobdbGaPF1">Figure 1</a>). Each of these is described in its own section below.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF1" co-legend-rid="figlgnddbGaPF1"><a href="/books/NBK154410/figure/dbGaP.F1/?report=objectonly" target="object" title="Figure 1. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF1" rid-ob="figobdbGaPF1"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image002.gif" src-large="/books/NBK154410/bin/dbGaP-Image002.jpg" alt="Figure 1. . This figure shows the relationships between dbGaP accessioned objects and whether they are available publicly or only through Controlled Access." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF1"><h4 id="dbGaP.F1"><a href="/books/NBK154410/figure/dbGaP.F1/?report=objectonly" target="object" rid-ob="figobdbGaPF1">Figure 1. </a></h4><p class="float-caption no_bottom_margin">This figure shows the relationships between dbGaP accessioned objects and whether they are available publicly or only through Controlled Access. This is an updated version of a figure that originally appeared in Mailman, et al. 2007. </p></div></div><div id="dbGaP.Studies"><h4>Studies</h4><p>The data archived and distributed by dbGaP are organized as studies. Studies may be either stand-alone or combined in a “parent study/child study” hierarchy. Parent or “top level” studies may have any number of child studies (also referred to as substudies). However, study hierarchy is limited to two levels (parent and child only). In other words, substudies may not have substudies. Studies, whether parent or child, can contain all types of data ascertained in genetic, clinical or epidemiological research projects such as <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> and molecular assay information that are linked via subject and sample IDs. Studies often contain documents, such as questionnaires and protocols, which help contextualize the phenotype and <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data. Study data are distributed by consent groups, each of which contains all data from a set of study participants who have signed the same consent agreement. In other words, the data delivered for a single consent group will all have the same Data Use Limitations (DULs) for future research use.</p><p>Each study is assigned a unique <a class="def" href="/books/n/handbook2e/glossary/def-item/accession-number/">accession number</a> which should be used when citing the study. The general dbGaP accession format for a study is phs######.v#.p#. The first three letters [phs] denote the object type (‘s’ denotes study), followed by 6-digit, 0-padded object number (######) which is consecutively assigned by dbGaP. The version number (.v#) indicates updates of the object, where # is initially 1 and increments by 1 as the object is updated. The version number is followed by participant group (.p#) where # is initially 1 and increments by 1 as the participant group changes. The version number of a study will increment any time the version of an object contained by the study (such as a <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> variable, <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data, or a sub study) is updated. The participant group of a study will change when existing subjects are removed, or when an existing subject changes from one consent group to another, but not when additional subjects are added.</p><p>While the data found in studies can vary widely based on both the number of participants and the variety of deposited phenotypic and molecular data, all studies include basic descriptive metadata such as study title, study description, inclusion/exclusion criteria, study history, disease terms, publications related to the study, names and affiliations of the principal investigators, and sources of funding. This information is publicly available on the study’s report page at the <a href="/gap" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">dbGaP website</a>.</p></div><div id="dbGaP.Datasets_and_Variables"><h4>Datasets and Variables</h4><p>Phenotypic data values are submitted to dbGaP as tabular files or datasets (accessioned with pht#, where ‘t’ denotes table), where columns represent phenotypic variables (accessioned with a phv#, where ‘v’ denotes variable) and rows represent subjects. A dbGaP <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> variable consists of two parts: the data values and the description of the data in the accompanying data dictionary. Each cell (value) in a <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> is stored in a relational <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a> and is mapped to the appropriate phenotype variable and subject. Phenotype variable metadata are provided by the submitter via a data dictionary for each dataset and include: variable name, variable description, units, and a list of any coded responses. The variable’s data type (text string, integer, decimal or date) is automatically determined by calculating which type is in the majority. Conflicts between submitted and calculated data types, or other discrepancies in the data, are reconciled by dbGaP curators in consultation with the data submitter.</p><p>Variables are created from the columns of the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a>; each variable and dataset is accessioned using the general dbGaP format ph(v|t)######.v#.p#. A variable’s version (v#) will change when either values of data change or its entry in the data dictionary changes. A dataset’s version will change when a variable inside the dataset is added, updated or deleted. For both variables and datasets the participant set (p#), is inherited from the study to which it belongs. Variables, and sometimes datasets, are linked to appropriate sections of documents (please see the Website section below for details).</p><p>Individual level <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> data is only available through the dbGaP <a href="https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=login" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>Authorized Access System</u></a><u>.</u> Public summary-level variable information is available on the dbGaP <a href="/gap" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>website</u></a> and <a href="/public/?/ftp/dbgap/studies/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>ftp site.</u></a></p></div><div id="dbGaP.Genotype_data"><h4>Genotype data</h4><p>Genotype data hosted at the dbGaP consist of individual level genotypes and aggregated summaries, both of which are distributed through the dbGaP <a href="https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=login" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>Authorized Access System</u></a>. The types of data available include <a class="def" href="/books/n/handbook2e/glossary/def-item/dna/">DNA</a> variations, <a class="def" href="/books/n/handbook2e/glossary/def-item/snp/">SNP</a> assay, DNA methylation (<a class="def" href="/books/n/handbook2e/glossary/def-item/epigenomics/">epigenomics</a>), copy number variation, as well as genomic/exomic sequencing. RNA data types such as expression array, RNA seq, and <a class="def" href="/books/n/handbook2e/glossary/def-item/eqtl/">eQTL</a> results are also available. For details about the accepted format of submitted <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> files please see the dbGaP <a href="/projects/gap/cgi-bin/GetZip.cgi?zip_name=dbGaP_SubmissionPackage.zip" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>submission guide</u></a>.</p><p>Genotype data are accessioned based on their data type and use the general dbGaP accession format ph(g|e|a)######.v# where ‘g’ denotes GWAS, ‘e’ expression, and ‘a’ analysis. Versioning of <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data is triggered by addition or withdrawal of samples, sample consent status change, or error correction.</p><p>Genotype data files are compressed and archived into tar files for distribution. The files are explicitly named to indicate file content, such as image data (cel and idat), <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> calls (genotype), and <a class="def" href="/books/n/handbook2e/glossary/def-item/locus/">locus</a> annotations (marker info). Genotype calls are usually clustered according to file format and genotyping platform, including one sample per file (indfmt), multiple-sample matrix (matrixfmt) and pre-defined variant call format (<a href="http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>vcf</u></a>). They will be accompanied by sample-info file for subject lookup and consent status. The consent code and consent abbreviation are also embedded in the file name.</p><p>Examples of <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data file names:</p><p>phe000005.v1.FHS_SHARe_project4_miRNA.sample-info.MULTI.tar</p><p>phg000006.v6.FHS_SHARe_Affy500K.<a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a>-calls-matrixfmt.c2.HMB-NPU-MDS-<a class="def" href="/books/n/handbook2e/glossary/def-item/irb/">IRB</a>.tar</p><p>The various pieces of the names can be parsed to extract meaningful content: the <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> accession (phe000005.v1 and phg000006.v6); the study (FHS_SHARe in both cases); the molecule type (miRNA); the platform/<a class="def" href="/books/n/handbook2e/glossary/def-item/chip/">chip</a> information (Affy500K); the content type (sample-info or genotype-calls-matrixfmt); the consent code (c2); and the consent abbreviation (HMB-NPU-MDS-<a class="def" href="/books/n/handbook2e/glossary/def-item/irb/">IRB</a>).</p></div><div id="dbGaP.Analyses"><h4>Analyses</h4><p>Because of the large volume of data generated and concerns regarding participant confidentiality many genetic epidemiological analyses have not been published. But, because individual-level data is only accessed through Controlled Access, dbGaP can archive, integrate and distribute these results.</p><p>Analyses can either be provided by submitters or be pre-computed by dbGaP staff, though pre-computes account for a small number of the total analyses. Submitted analysis results are accessioned with the prefix “pha”. After removing identifiable elements, like counts and frequencies, analysis results are displayed in the public dbGaP browser that dynamically links to <a class="def" href="/books/n/handbook2e/glossary/def-item/ncbi/">NCBI</a> annotation resources, like <a href="/snp" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>dbSNP</u></a>, <a href="/gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>Gene</u></a>, <a href="/refseq/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>RefSeq</u></a>. These public views can be found through the “Analysis” link on the study page and they can be downloaded from the <a class="def" href="/books/n/handbook2e/glossary/def-item/ftp/">FTP</a> site. The original submitted analyses, including updated marker info, are fully accessible through dbGaP Controlled Access.</p><p>Analysis files are typically formatted by population, trait and analysis method (typical), however some include surveys across multiple populations, and may use either SNPs or genes as the loci analyzed. However, in general, they all contain the following three parts which are also required for dbGaP submission.</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Metadata, which includes trait, population, sample size and brief descriptions on Analysis and Method.</p></dd><dt>2.</dt><dd><p class="no_top_margin">Marker information and genotyping summary, such as identifiers of loci (variation, gene and structure variant), alleles, <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> counts (frequency), call rate and <a class="def" href="/books/n/handbook2e/glossary/def-item/pvalue/">p-value</a> from Hardy-Weinberg-equilibrium testing.</p></dd><dt>3.</dt><dd><p class="no_top_margin">Testing statistics, including <a class="def" href="/books/n/handbook2e/glossary/def-item/pvalue/">p-value</a>, effect size (odds ratio/regression coefficient/relative risk) and direction (coding <a class="def" href="/books/n/handbook2e/glossary/def-item/allele/">allele</a>) if association results.</p></dd></dl><p>With these resources, other scientists can verify the discoveries, recalculate statistics under various genetic models, develop new hypotheses, and more importantly, construct a meta-analysis even though individual–level data are inaccessible. The details of data fields are listed in dbGaP submission guides and we welcome suggestions and comments from the scientific community. An interactive view of the analsysis results submitted to dbGaP is described in The dbGaP Genome Browser in the <a href="#dbGaP.Related_Tools">Related Tools section</a> of this chapter.</p></div><div id="dbGaP.Documents"><h4>Documents</h4><p>The dbGaP encourages investigators to submit documents related to their studies, such as protocols, patient questionnaires, survey instruments and consent forms, along with their data. These documents provide valuable information and context for subsequent researchers who will apply for and download datasets. All submitted documents are available publicly and can be used by anyone interested in gaining a better understanding of the phenotypic data found in a study.</p><p>Each document is accessioned using the general dbGaP format phd######.v# where “d” indicates document. A document’s version (v#) will change when the variables annotated on the document change, or when the document itself is changed significantly. (For example, fixing a typo would not be considered a significant change unless it were to change the meaning of the document.)</p><p>Documents submitted to dbGaP are represented in a common <a class="def" href="/books/n/handbook2e/glossary/def-item/xml/">XML</a> format. Converting documents into a common format allows all documents to be treated uniformly in the <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a> (aiding indexing and discovery) and to be displayed in a single HTML style. Additionally, the XML format allows curated information to be added to the documents. This curated information is used to create live links between the documents and other portions of the dbGaP website, such as variable report pages. Linking between documents and other objects will be discussed further in the section about the dbGaP website.</p><p>Documents are generally viewable on the dbGaP website in both HTML and <a class="def" href="/books/n/handbook2e/glossary/def-item/pdf/">PDF</a> format (the PDF for a document may be the originally submitted object, if it was sent as PDF, or could be a PDF representation of another format such as Microsoft Word or Excel or a plain text file).</p><p>The <a class="def" href="/books/n/handbook2e/glossary/def-item/xml/">XML</a> used by dbGaP is an extension of <a class="def" href="/books/n/handbook2e/glossary/def-item/nlm/">NLM</a>’s Archiving and Interchange Tag Set Version 2.3 (<a href="http://dtd.nlm.nih.gov/archiving/2.3/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>http://dtd.nlm.nih.gov/archiving/2.3/</u></a>). The extension adds structures to code questionnaires and adds a number dbGaP-specific attributes to common document structures (such as sections, tables, and lists) to facilitate curation. A copy of our extension is publicly available at <a href="http://dtd.nlm.nih.gov/gap/2.0/wga-study2.dtd" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>http://dtd.nlm.nih.gov/gap/2.0/wga-study2.dtd</u></a>, and documentation for the extension is located at <a href="http://dtd.nlm.nih.gov/gap/2.0/doc/wga-document.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>http://dtd.nlm.nih.gov/gap/2.0/doc/wga-document.html</u></a>.</p></div></div></div><div id="dbGaP.Dataflow"><h2 id="_dbGaP_Dataflow_">Dataflow</h2><div id="dbGaP.Submissions"><h3>Submissions</h3><p>The NIH strongly supports the broad sharing of de-identified data generated by NIH-funded investigators and facilitates data sharing for meritorious studies that are not NIH-funded. Decisions about whether non-NIH-funded data should be accepted are made by individual NIH Institutes and Centers (IC); ICs will not accept data unless the submission is compatible with <a href="http://grants.nih.gov/grants/guide/notice-files/NOT-OD-07-088.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">NIH’s GWAS policy</a>.</p><div id="dbGaP.NIHFunded_Studies"><h4>NIH-Funded Studies</h4><p>Institutional certification, as well as basic information about the study, is required when submitting data to dbGaP.</p><ul><li class="half_rhythm"><div><b>Institutional certification</b> consists of a letter signed by the principal investigator and an institutional official that confirms permission to submit data to dbGaP. NIH has developed <a href="http://gwas.nih.gov/pdf/PTC_for_IRBs_and_Institutions_revised5-31-11.pdf" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Points to Consider for IRBs and Institutions</a> to assist institutions in their review and certification of an investigator’s plan for submission of data to dbGaP.</div></li><li class="half_rhythm"><div><b>Basic information</b> consists of items like the title of the study, a description and history of the study, inclusion and exclusion criteria, listing of preview and certification of <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a>’s data</div></li><li class="half_rhythm"><div>rincipal investigators (PIs) and funding information.</div></li></ul><p>Principal investigators (PIs) should familiarize themselves with the <a href="http://gwas.nih.gov/pdf/PTC_for_IRBs_and_Institutions_revised5-31-11.pdf" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">“NIH Points to Consider”</a> document that provides information about: the NIH GWAS Data Sharing Policy; benefits of broad sharing of data through a central data repository; risks associated with the submission and subsequent sharing of such data; safeguards designed to protect the confidentiality of research participants; and specific points for institutional review boards (IRBs) to consider during review and certification of PIs’ data submission plans.</p><p>The principal investigators must contact their NIH program official (PO) to begin the submission process. If the study was not funded by the NIH, PIs should contact <a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@pleh-PaGbd" class="oemail">vog.hin.mln.ibcn@pleh-PaGbd</a> for guidance.</p></div><div id="dbGaP.NonNIHFunded_Studies"><h4>Non-NIH-Funded Studies</h4><p>To submit non-NIH-funded data to dbGaP the following information will need to be provided:</p><ul><li class="half_rhythm"><div><b>Institutional certification</b> as described in the last section. To provide this, someone from the institution or organization will need to be registered in eRA Commons. Information regarding registration is available from the <a href="https://commons.era.nih.gov/commons" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">eRA Commons website</a>. (Note: The review of a <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a>’s request can be initiated without the certification, but the review process will be expedited if GWAS staff receives the certification at time of submission.)</div></li><li class="half_rhythm"><div><b>Basic information</b> about the study, as described in the previous section.</div></li><li class="half_rhythm"><div><b>The NIH IC</b> that most closely aligns with the research. A list of ICs can be found at <a href="http://www.nih.gov/icd/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">http://www.nih.gov/icd/</a>.</div></li><li class="half_rhythm"><div><b>Whether the study has been published or accepted for publication.</b> If it has the <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a> should provide documentation (i.e., the publication citation or a copy of any correspondence indicating that an article about the study has been accepted for publication).</div></li></ul><p>The <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a> should submit all information and the certification to <a href="mailto:dev@null" data-email="vog.hin.liam@SAWG" class="oemail">vog.hin.liam@SAWG</a>. Once GWAS staff receives the documents, they will forward them to the appropriate IC program administrator for consideration. The IC program administrator will contact the PI with any questions and/or to notify you of the IC’s decision.</p><p>The <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a> is encouraged to consult with the Program Officer/Director (PO/PD) and/or <a href="http://gwas.nih.gov/04po2_2GPA.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">IC GWAS Program Administrator (GPA) </a>at an NIH Institute or Center (IC) to discuss the project, data sharing plan, and data certification process (non NIH funded projects should contact <a href="mailto:dev@null" data-email="pleH PaGbd" class="oemail">pleH PaGbd</a>) to complete the registration process.</p></div><div id="dbGaP.Instructions_for_submitters"><h4>Instructions for submitters</h4><div id="dbGaP.Study_Registration"><h5>Study Registration</h5><p>Before data can be submitted to dbGaP, the study must be registered in the dbGaP Registration system following these steps:</p><ul><li class="half_rhythm"><div>The GPA from the sponsoring IC gathers study registration information from the <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a>.</div></li><li class="half_rhythm"><div>Completes the study registration in the dbGaP Registration System by providing:</div><ul><li class="half_rhythm"><div>Study details</div></li><li class="half_rhythm"><div>Signed Institutional Certification</div></li><li class="half_rhythm"><div>Approved Data Use Certification (<a class="def" href="/books/n/handbook2e/glossary/def-item/duc/">DUC</a>)</div></li><li class="half_rhythm"><div>Consent groups and Data Use Limitations (DUL)</div></li></ul></li><li class="half_rhythm"><div>The Registration System sends an automated email to the investigator upon completion of the study registration acknowledging the study registration and giving further instructions on how to submit data to dbGaP.</div></li></ul></div><div id="dbGaP.Data_submission"><h5>Data submission</h5><p>The <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a> will be provided with the <a href="/projects/gap/cgi-bin/GetZip.cgi?zip_name=dbGaP_SubmissionPackage.zip" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">dbGaP Submission Guide</a>. The packet contains templates and instructions on how to format the data files for submission to dbGaP. The expected files for each single study are:</p><ul><li class="half_rhythm"><div>1_dbGaP_StudyConfig*</div></li><li class="half_rhythm"><div>2a_dbGaP_SubjectPhenotypesDS</div></li><li class="half_rhythm"><div>2b_dbGaP_SubjectPhenotypesDD</div></li><li class="half_rhythm"><div>3a_dbGaP_SampleAttributesDS*</div></li><li class="half_rhythm"><div>3b_dbGaP_SampleAttributesDD*</div></li><li class="half_rhythm"><div>4a_dbGaP_SubjectDS*</div></li><li class="half_rhythm"><div>4b_dbGaP_SubjectDD*</div></li><li class="half_rhythm"><div>5a_dbGaP_SubjectSampleMappingDS*</div></li><li class="half_rhythm"><div>5b_dbGaP_SubjectSampleMappingDD*</div></li><li class="half_rhythm"><div>6a_dbGaP_PedigreeDS**</div></li><li class="half_rhythm"><div>6b_dbGaP_PedigreeDD**</div><dl class="temp-labeled-list"><dt>*</dt><dd><p class="no_top_margin">Required</p></dd></dl></li></ul><p>** Required if there are related subjects</p><div id="dbGaP.Ba" class="box"><h3><span class="title">Note for studies that expect/involve SRA (Sequence Read Archive) file submission</span></h3><p>Once the required files (listed above) are received by dbGaP, passed dbGaP QCs, and the subject consents and subject sample <a class="def" href="/books/n/handbook2e/glossary/def-item/mapping/">mapping</a> have been loaded into dbGaP and provided to BioSample, the <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a> will be provided with a study <a class="def" href="/books/n/handbook2e/glossary/def-item/accession-number/">accession number</a> and a link to the corresponding study sample status page. The SRA submitter can then apply for an SRA submission account (Aspera account) and submit SRA files to dbGaP.</p></div></div></div></div><div id="dbGaP.Data_processing"><h3>Data processing</h3><p>Data received at dbGaP undergoes a sequence of processing steps. <a class="figpopup" href="/books/NBK154410/figure/dbGaP.F2/?report=objectonly" target="object" rid-figpopup="figdbGaPF2" rid-ob="figobdbGaPF2">Figure 2</a> illustrates the steps involved in moving a study through dbGaP. The first step is getting the study registered (as has been discussed above), and occurs before data transmission (shown in gray). The dbGaP data processing occurs in two pipelines, the <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> curation (blue) and <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> curation (purple). These pipelines are largely processed in parallel but converge prior to data release. The final step is preparing the study for release to the public (green). Particulars of the phenotype and genotype curation will be discussed below.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF2" co-legend-rid="figlgnddbGaPF2"><a href="/books/NBK154410/figure/dbGaP.F2/?report=objectonly" target="object" title="Figure 2. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF2" rid-ob="figobdbGaPF2"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image003.gif" src-large="/books/NBK154410/bin/dbGaP-Image003.jpg" alt="Figure 2. . This figure shows the complex processing that occurs after data has been submitted to dbGaP." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF2"><h4 id="dbGaP.F2"><a href="/books/NBK154410/figure/dbGaP.F2/?report=objectonly" target="object" rid-ob="figobdbGaPF2">Figure 2. </a></h4><p class="float-caption no_bottom_margin">This figure shows the complex processing that occurs after data has been submitted to dbGaP. Of particular note is the step, in the center of the chart in black, where samples are matched. </p></div></div><div id="dbGaP.Phenotype_processing"><h4>Phenotype processing</h4><p>The phenotypic data are subjected to both automated and human-mediated assessment.</p><p>Before the data are loaded into the <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a>, scripts are used to evaluate the following:</p><ul><li class="half_rhythm"><div><b>Poor formatting</b> - Each <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> submitted to dbGaP should be a rectangular table showing variables in columns and subject or sample IDs in rows.</div></li><li class="half_rhythm"><div><b><a class="def" href="/books/n/handbook2e/glossary/def-item/hipaa/">HIPAA</a> violations</b> - The datasets submitted to dbGaP should follow the Health Insurance Portability and Accountability Act (HIPAA) rules in order to protect the privacy of personally identifiable health information.</div></li><li class="half_rhythm"><div><b>Issues with Subject and Sample IDs</b> - Submitters are required to use the subject consent file to list all the subjects who participated in, or are referred to by, the study, with their consent values. Subjects who had not directly participated in the study, e.g., parents of participants included in the <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> file, should also be included in the consent file with consent value 0.</div></li><li class="half_rhythm"><div><b>Missing information in Data Dictionaries</b> - The submitters are required to submit data dictionaries, in addition to datasets, to explain the meanings of the variables and data values. For each value in the datasets, dbGaP requires that the submitters provide a variable description, variable type, units of values for numerical variables, and logical minimum and maximum values if available. For each encoded value, a code meaning should be included in the data dictionary.</div></li><li class="half_rhythm"><div><b>Issues with Pedigree files</b>- A <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> file submitted to dbGaP should include the following columns: family ID, subject ID, father ID, mother ID, sex, and twin ID if available. We also require that all the subjects who appear in father ID or mother ID columns also be included in the subject ID column.</div></li></ul><p>More detailed information about the particular automated testing performed to catch errors in the broad categories listed above can be found in <a href="#dbGaP.Appendix__Phenotype_Quality_Contro">Appendix – Phenotype Quality Control</a>.</p><p>Reports from the automated <a class="def" href="/books/n/handbook2e/glossary/def-item/quality-control/">quality control</a> (<a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a>) scripts are reviewed by curatorial staff. If necessary, curators will communicate with the submitters and ask that new files be submitted correcting the errors. Even if the automated QC checks do not detect problems, the curatorial staff check all data dictionaries manually to see if there are any problems that were not identified by automated checks.</p></div><div id="dbGaP.Genotype_processing"><h4>Genotype processing</h4><p>The <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data processing and <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> process consists of the following steps:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Check for availability of Sample-Subject Mapping file and validate against subject list.</p></dd><dt>2.</dt><dd><p class="no_top_margin">Check for availability of sample <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> file and validate against SSM.</p></dd><dt>3.</dt><dd><p class="no_top_margin">Process and <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> file and generate PLINK (<a class="bk_pop" href="#dbGaP.REF.3">3</a>) set.</p></dd><dt>4.</dt><dd><p class="no_top_margin">Check for data consistency in the submitted <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> component against data from other submissions for the study.</p></dd><dt>5.</dt><dd><p class="no_top_margin">Conduct <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> checks and generate <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a>-QC component. This step includes checking:</p><dl class="temp-labeled-list"><dt>a.</dt><dd><p class="no_top_margin">Missing call rates per sample/per marker.</p></dd><dt>b.</dt><dd><p class="no_top_margin">Minor <a class="def" href="/books/n/handbook2e/glossary/def-item/allele-frequency/">allele frequency</a>.</p></dd><dt>c.</dt><dd><p class="no_top_margin">Mendelian error rate when trios are available.</p></dd><dt>d.</dt><dd><p class="no_top_margin">Duplicate concordance check (Generate <a class="def" href="/books/n/handbook2e/glossary/def-item/snp/">SNP</a> and subject filters based on tests results</p></dd><dt>e.</dt><dd><p class="no_top_margin">when dups are available).</p></dd><dt>f.</dt><dd><p class="no_top_margin">Gender check.</p></dd><dt>g.</dt><dd><p class="no_top_margin">IBD analysis.</p></dd></dl></dd><dt>6.</dt><dd><p class="no_top_margin">Generate <a class="def" href="/books/n/handbook2e/glossary/def-item/snp/">SNP</a> and subject filters based on tests results.</p></dd><dt>7.</dt><dd><p class="no_top_margin">Verify <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> and <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> data using <a class="def" href="/books/n/handbook2e/glossary/def-item/ncbi/">NCBI</a> GWAS pre-compute against similar pre-compute provided by <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a>/analysis group.</p></dd><dt>8.</dt><dd><p class="no_top_margin">Split PLINK sets according to consent and generate <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a>-calls-mtrxfm components.</p></dd><dt>9.</dt><dd><p class="no_top_margin">Generate sample-info and marker-info release components.</p></dd><dt>10.</dt><dd><p class="no_top_margin">Partition and pack build of individual <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> files according to subject consent information and data type.</p></dd></dl><p>The quality of the <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data is checked at both the genotype data file and the genotyping level. Typically at the file level a genotype release contains individual level data in both individual (one file per sample) and matrix (one matrix with all samples) formats. The genotype matrices are generated by dbGaP curators from submitted individual genotype files and subject related information, such as gender and <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> data. These matrices then are used to generate pre-computes/metrics/<a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a>-filters, which are further verified against similar pre-computes submitted by the investigators. When necessary, the genotyping quality of each sample is also verified using a B-Allele frequency (<a class="def" href="/books/n/handbook2e/glossary/def-item/baf/">BAF</a>) analysis pipeline which calculates and processes BAF values to identify samples with extremely “noisy” or failed genotyping.</p><p>The following <a class="def" href="/books/n/handbook2e/glossary/def-item/quality-assurance/">quality assurance</a> steps are implemented to facilitate cross-study and cross-technology data merging and analysis:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Identify duplicated genotypes across all studies as well as generating data.</p></dd><dt>2.</dt><dd><p class="no_top_margin">Check data formats, annotation, and <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a>- metrics for <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data derived using different technologies.</p></dd><dt>3.</dt><dd><p class="no_top_margin">Check ID reconciliation, gender, missing call rate, duplicate concordance, identity-by-descent, and Mendelian error rate at sample as well as <a class="def" href="/books/n/handbook2e/glossary/def-item/snp/">SNP</a> level.</p></dd></dl></div><div id="dbGaP.Subjects_and_Samples"><h4>Subjects and Samples</h4><p>In dbGaP there are two similar yet distinct concepts that describe the participants in a study: subject and sample. A subject corresponds to an individual human. A sample in dbGaP corresponds to each analyte (<a class="def" href="/books/n/handbook2e/glossary/def-item/dna/">DNA</a>/RNA) that is put in the machine or on the <a class="def" href="/books/n/handbook2e/glossary/def-item/chip/">chip</a>, rather than the physical result of obtaining a tissue or blood sample, though this is accepted as well. Modeling samples this way allows dbGaP to track duplicates, centers, plates, wells, and any sequence of aliquots that precedes the actual aliquot used to produce the molecular data finally submitted to dbGaP. The tracking method also enables dbGaP to easily add, rename, or redact samples over time.</p><div id="dbGaP.Bb" class="box"><h3><span class="title">Example</span></h3><p>Consider a case in which a single subject has both a blood draw and a cheek swab. <a class="def" href="/books/n/handbook2e/glossary/def-item/dna/">DNA</a> is extracted from both samples. The DNA extracted from the blood is stored for years after being drawn, and then sequenced on two different platforms, and the DNA extracted from the cheek swab is also used on a GWAS <a class="def" href="/books/n/handbook2e/glossary/def-item/chip/">chip</a>. In this scenario, dbGaP prefers to receive three samples belonging to the same subject (even though there were two intermediate physical samples).</p><p><b>Note</b>: The information about intermediate samples may be informative and can be included as one or more variables in the sample attribute file.</p></div><p>Submitters are required to assign de-identified IDs to subjects and samples; these are submitted subject and sample ids. However, dbGaP also assigns a unique id to samples and subjects; these are the dbGaP Subject and Sample IDs and are included in the final <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> files available through controlled access. The dbGaP assigns its own IDs to accurately represent cases where a single subject (person) has participated in more than one study. In such a case the two submitted subjects will be assigned the same dbGaP Subject ID. This can only be done if the submitter provides the information that a subject in their study is the same subject as in an existing dbGaP study. Similarly, cell repository, or otherwise readily available samples such as Coriell samples, used as controls in multiple studies will typically receive the same dbGaP Sample ID.</p><p>All <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> and molecular data are connected though the Subject Sample Mapping file.</p></div><div id="dbGaP.Data_ID_mapping"><h4>Data ID mapping</h4><p>The data submitted to the dbGaP are de-identified. The <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> and <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data are connected through the subject sample <a class="def" href="/books/n/handbook2e/glossary/def-item/mapping/">mapping</a> file in which one sample is mapped to exactly one subject and one subject is mapped to any number of samples. The following is a partial list of the IDs and attributes included in dbGaP phenotype and molecular data files.</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin"><b>SUBJECT_ID:</b> This is the submitted Subject ID and it is included in the Subject Consent Data File, the Subject Sample Mapping Data File, the Pedigree Data File (if applicable), and all Subject Phenotype Data Files. SUBJECT_ID should be an integer or string value consisting of the following characters: English letters, Arabic numerals, period (.), hyphen (-), underscore (_), at symbol (@), and the pound sign (#). In addition to the submitted Subject ID, dbGaP will assign a dbGaP Subject ID that will be included in the final <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> dump files along with the submitted Subject ID.</p></dd><dt>2.</dt><dd><p class="no_top_margin"><b>SAMPLE_ID:</b> This is the submitted Sample ID and is included in the Subject Sample Mapping Data File and the Sample Attributes Data File. This ID should be used as the key for the individual level molecular data. Each sample should be submitted with a single, unique, de-identified Sample ID. The acceptable characters in Sample IDs are the same as those in the Subject IDs. In addition to the submitted Sample ID, dbGaP will assign a dbGaP Sample ID that will be included in the final <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> dump files along with the submitted Sample ID. The SAMPLE_IDs listed in the Subject Sample Mapping Data File should be identical to the samples found in the <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a>, SRA, and other molecular data.</p></dd><dt>3.</dt><dd><p class="no_top_margin"><b>dbGaP_SAMPLE_ID:</b> This is the dbGaP assigned unique identifier assigned to the submitted Sample ID. The dbGaP Sample ID is included as a column in the final <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> dump files whenever there is a submitted sample ID column.</p></dd><dt>4.</dt><dd><p class="no_top_margin"><b>dbGaP_SUBJECT_ID:</b> This is the dbGaP unique identifier assigned to the submitted Subject ID. The dbGaP Subject ID is included as a column in the final <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> dump files whenever there is a submitted subject ID column. The dbGaP Subject ID is unique cross all dbGaP studies, which means that if a subject is known to have participated in multiple studies that have been submitted to dbGaP, the same dbGaP Subject ID will be assigned to the individual across multiple studies, though the submitted subject ID may be different.</p></dd><dt>5.</dt><dd><p class="no_top_margin"><b>SOURCE_SUBJECT_ID</b> and <b>SUBJECT_SOURCE:</b> The Source Subject ID (SOURCE_SUBJECT_ID) is the de-identified alias Subject ID used in the public repository, consortium, institute, or study from where the subject has been obtained. The Subject Source (SUBJECT_SOURCE) is the name of the third party source, public repository, consortium, institute, or study that corresponds to the subject. For subjects originating from a shared source (such as a public repository, consortium, institute, study, etc.) or for subjects with alias IDs, these 2 variables will be included in the Subject Consent Data File. The SOURCE_SUBJECT_ID maps to the SUBJECT_ID. For referencing <a class="def" href="/books/n/handbook2e/glossary/def-item/hapmap/">HapMap</a> subjects from Coriell, the SUBJECT_SOURCE value is written as “Coriell.” The SOURCE_SUBJECT_ID should be written as the de-identified subject ID assigned by Coriell (e.g., NA12711).</p></dd><dt>6.</dt><dd><p class="no_top_margin"><b>FAMILY_ID:</b> The Family ID is a column of de-identified Family IDs in the <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> file. The Family ID is also referred to as the Pedigree ID. The family ID should be the same for individuals belonging in the same biological family. The family ID is found in the pedigree file if a pedigree file is available.</p></dd><dt>7.</dt><dd><p class="no_top_margin"><b>SEX:</b> The gender variable can be included in a subject <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> data file or in a <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> file if a pedigree file is available.</p></dd><dt>8.</dt><dd><p class="no_top_margin"><b>FATHER</b> and <b>MOTHER<i>:</i></b> In the <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> file, FATHER and MOTHER are the two columns of the unique, de-identified subject IDs of the participant’s biological father and mother. The Father ID and Mother ID may not be identical. 0 (zero) or blank is filled in for founders or marry-ins (parents not specified) in a pedigree. Each unique Father ID and unique Mother ID is also listed in the Subject ID column of both the Pedigree Data File and the Subject Consent Data File.</p></dd><dt>9.</dt><dd><p class="no_top_margin"><b>TWIN_ID:</b> Monozygotic twins or multiples of the same family have Twin IDs. Twins or multiples of the same family share the same TWINID, but are assigned different SUBJECT_IDs.</p></dd><dt>10.</dt><dd><p class="no_top_margin"><b>CONSENT:</b> Every subject that appears in a Subject Phenotype Data File must belong to a single consent group and every sample that appears in a Subject Sample Mapping File and in a Sample Attribute Data File must belong to a consented subject. The consent information is listed in the Subject Consent Data File. Consents are determined by the submitter, their <a class="def" href="/books/n/handbook2e/glossary/def-item/irb/">IRB</a>, and their GPA (GWAS Program Administrator) along with the <a class="def" href="/books/n/handbook2e/glossary/def-item/dac/">DAC</a> (Data Access Committee). All data is parsed into its respective consent groups for download.</p></dd></dl></div><div id="dbGaP.Curatorial_document_annotation"><h4>Curatorial document annotation</h4><p>One thing that sets dbGaP apart from similar databases is the extent of curatorial work done with the data and documentation we receive. For documents, this involves making connections between appropriate portions of text and other accessioned objects (such as variables, data tables, and other documents) and creating links to external resources. We refer to this process as “document annotation” and it involves embedding references into the <a class="def" href="/books/n/handbook2e/glossary/def-item/xml/">XML</a> for the documents.</p><p>Documents can either be annotated by the submitter or by the dbGaP curator responsible for a study. Types of annotations include adding variable IDs to particular sections of text so that the text can be linked to the variable report page or adding references so that hyperlinks can be made between chapters of a protocol document.</p></div></div></div><div id="dbGaP.Access"><h2 id="_dbGaP_Access_">Access</h2><div id="dbGaP.Public_data_unrestricted"><h3>Public data (unrestricted)</h3><div id="dbGaP.dbGaP_Website"><h4>dbGaP Website</h4><div id="dbGaP.Report_types"><h5>Report types</h5><p>The web site provides reports specific to the objects in dbGaP. These reports are explained in the following.</p><div id="dbGaP.Study"><h5>Study</h5><ul><li class="half_rhythm"><div>the study’s accession, name, description, history, inclusion/exclusion criteria, a summary of the molecular data collected, a list of related publications, and a list of relevant phenotypes selected by the <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a>;</div></li><li class="half_rhythm"><div>links to Authorized Access, description of data use limitations and use restrictions, release date, embargo release date, and a list of users and their public and technical research use statements who have been authorized to access individual-level data;</div></li><li class="half_rhythm"><div>links to publicly available information – including a study manifest -- via a public ftp site;</div></li><li class="half_rhythm"><div>links to other related <a class="def" href="/books/n/handbook2e/glossary/def-item/ncbi/">NCBI</a> resources (e.g. <a href="/biosample" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BioSample</a>, <a href="/sra" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">SRA</a>, <a href="/bioproject" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BioProject</a>, <a href="/mesh" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">MeSH</a>);</div></li></ul></div><div id="dbGaP.Variable"><h5>Variable</h5><ul><li class="half_rhythm"><div>the variable’s name, accession, description, comments;</div></li><li class="half_rhythm"><div>a statistical summary of the variable’s values;</div></li><li class="half_rhythm"><div>a curated list of excerpts from study documents that relate to the variable</div></li></ul></div><div id="dbGaP.Document"><h5>Document</h5><ul><li class="half_rhythm"><div>the document’s name and accession;</div></li><li class="half_rhythm"><div>the document’s contents in HTML format; note that the red question marks link particular excerpts of the document to other study objects. For example, clicking on a red question mark near a protocol description might list the <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> variables that were measured using that protocol;</div></li><li class="half_rhythm"><div>a link to a <a class="def" href="/books/n/handbook2e/glossary/def-item/pdf/">PDF</a> version of the document</div></li></ul></div><div id="dbGaP.Analysis"><h5>Analysis</h5><ul><li class="half_rhythm"><div>the analysis’ name, accession, description, and a brief synopsis of the methods used;</div></li><li class="half_rhythm"><div>relevant summary plots (<i>e.g.</i> Manhattan plots of p-values; Log QQ <a class="def" href="/books/n/handbook2e/glossary/def-item/pvalue/">p-value</a> plot);</div></li><li class="half_rhythm"><div>a link to the Genome Browser, where analysis results can be examined in greater detail.</div></li></ul></div></div><div id="dbGaP.Dataset"><h5>Dataset</h5><ul><li class="half_rhythm"><div>the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a>’s name, accession, and description;</div></li><li class="half_rhythm"><div>the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a>’s release date and embargo release date;</div></li><li class="half_rhythm"><div>list of variables contained in the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a>;</div></li><li class="half_rhythm"><div>links to summary report and data dictionary</div></li></ul></div><div id="dbGaP.Searching_dbGaP"><h5>Searching dbGaP</h5><p>All publicly released dbGaP studies can be queried from the search box on the top of the dbGaP homepage. Queries can be very simple, just keywords of interest (“cancer”), or complex, making use of search fields and <a class="def" href="/books/n/handbook2e/glossary/def-item/boolean/">Boolean</a> operators (“cholesterol[variable] AND <a href="/projects/gap/cgi-bin/study.cgi?study_id=phs000001" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=dbgap">phs000001</a>”). More complex searches can be facilitated by using the “Advanced Search” which helps create queries via a web form.</p><p>There are many search fields available in dbGaP. <a class="figpopup" href="/books/NBK154410/table/dbGaP.T.this_table_lists_fields_in_the_d/?report=objectonly" target="object" rid-figpopup="figdbGaPTthistablelistsfieldsinthed" rid-ob="figobdbGaPTthistablelistsfieldsinthed">Table 1</a> shows a selection of the most commonly used fields, explains what they search for, and gives an example of how the search would be formed.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figdbGaPTthistablelistsfieldsinthed"><a href="/books/NBK154410/table/dbGaP.T.this_table_lists_fields_in_the_d/?report=objectonly" target="object" title="Table 1. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPTthistablelistsfieldsinthed" rid-ob="figobdbGaPTthistablelistsfieldsinthed"><img class="small-thumb" src="/books/NBK154410/table/dbGaP.T.this_table_lists_fields_in_the_d/?report=thumb" src-large="/books/NBK154410/table/dbGaP.T.this_table_lists_fields_in_the_d/?report=previmg" alt="Table 1. . This table lists fields in the dbGaP advanced search that are likely to be useful to most searchers." /></a><div class="icnblk_cntnt"><h4 id="dbGaP.T.this_table_lists_fields_in_the_d"><a href="/books/NBK154410/table/dbGaP.T.this_table_lists_fields_in_the_d/?report=objectonly" target="object" rid-ob="figobdbGaPTthistablelistsfieldsinthed">Table 1. </a></h4><p class="float-caption no_bottom_margin">This table lists fields in the dbGaP advanced search that are likely to be useful to most searchers. </p></div></div><p>Additionally, complex queries can also contain <a class="def" href="/books/n/handbook2e/glossary/def-item/boolean/">Boolean</a> operators. For example:</p><p>Cancer[Disease] AND True[Study Has SRA Components]</p><p>returns a list of all studies having SRA data and where the <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a> has assigned the keyword “cancer” as a disease term.</p><p>As with all other <a class="def" href="/books/n/handbook2e/glossary/def-item/ncbi/">NCBI</a> resources, the searches in dbGaP are performed using the <a class="def" href="/books/n/handbook2e/glossary/def-item/entrez/">Entrez</a> search and retrieval system. Please see the <a href="/books/NBK21081/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>Entrez chapter</u></a> of the NCBI handbook for general guidance on forming Entrez queries.</p><p>Once a search <a class="def" href="/books/n/handbook2e/glossary/def-item/query/">query</a> is executed and results returned (<a class="figpopup" href="/books/NBK154410/figure/dbGaP.F3/?report=objectonly" target="object" rid-figpopup="figdbGaPF3" rid-ob="figobdbGaPF3">Figure 3</a>), clicking on an item’s name or accession will lead to a page listing more specific information about that object. This information is of particular importance to those users who want to find out more about a study before deciding whether or not to apply for Authorized Access. (Note that on each of the different pages, one can examine other objects in the study by using the navigational aid along the right-hand edge of the page.)</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF3" co-legend-rid="figlgnddbGaPF3"><a href="/books/NBK154410/figure/dbGaP.F3/?report=objectonly" target="object" title="Figure 3. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF3" rid-ob="figobdbGaPF3"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image004.gif" src-large="/books/NBK154410/bin/dbGaP-Image004.jpg" alt="Figure 3. . This shows the returns for a simple search on the word "diabetes"." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF3"><h4 id="dbGaP.F3"><a href="/books/NBK154410/figure/dbGaP.F3/?report=objectonly" target="object" rid-ob="figobdbGaPF3">Figure 3. </a></h4><p class="float-caption no_bottom_margin">This shows the returns for a simple search on the word "diabetes". Note that specific results for Studies, Variables, Study Documents, Analyses, and Datasets can be accessed by choosing the appropriate tab. </p></div></div><div id="dbGaP.Variables_on_the_public_website"><h5>Variables on the public website</h5><p>Phenotype variables can either be found by doing a search on the dbGaP home page, and then linking to an individual variable page (see <a class="figpopup" href="/books/NBK154410/figure/dbGaP.F4/?report=objectonly" target="object" rid-figpopup="figdbGaPF4" rid-ob="figobdbGaPF4">Figure 4</a> and <a class="figpopup" href="/books/NBK154410/figure/dbGaP.F5/?report=objectonly" target="object" rid-figpopup="figdbGaPF5" rid-ob="figobdbGaPF5">Figure 5</a> for examples), or they can be found by choosing the “Variables” tab if you are already looking at the website of a study. If you are using the “Variables” tab the phenotypes are generally grouped into broad categories for ease of browsing. These categories can be found to the right-hand side of any variable report web page (see <a class="figpopup" href="/books/NBK154410/figure/dbGaP.F4/?report=objectonly" target="object" rid-figpopup="figdbGaPF4" rid-ob="figobdbGaPF4">Figure 4</a>). Most studies use the following categories, as appropriate:</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF4" co-legend-rid="figlgnddbGaPF4"><a href="/books/NBK154410/figure/dbGaP.F4/?report=objectonly" target="object" title="Figure 4. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF4" rid-ob="figobdbGaPF4"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image005.gif" src-large="/books/NBK154410/bin/dbGaP-Image005.jpg" alt="Figure 4. . Top portion of the variable report for the variable phv000200829, CARDIOV." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF4"><h4 id="dbGaP.F4"><a href="/books/NBK154410/figure/dbGaP.F4/?report=objectonly" target="object" rid-ob="figobdbGaPF4">Figure 4. </a></h4><p class="float-caption no_bottom_margin">Top portion of the variable report for the variable phv000200829, CARDIOV. Variables can be browsed by category using the navigation to the right hand side of the page. </p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF5" co-legend-rid="figlgnddbGaPF5"><a href="/books/NBK154410/figure/dbGaP.F5/?report=objectonly" target="object" title="Figure 5. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF5" rid-ob="figobdbGaPF5"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image006.gif" src-large="/books/NBK154410/bin/dbGaP-Image006.jpg" alt="Figure 5. . Top portion of the variable report for the variable phv000200829, CARDIOV." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF5"><h4 id="dbGaP.F5"><a href="/books/NBK154410/figure/dbGaP.F5/?report=objectonly" target="object" rid-ob="figobdbGaPF5">Figure 5. </a></h4><p class="float-caption no_bottom_margin">Top portion of the variable report for the variable phv000200829, CARDIOV. This shows how variables are linked to appropriate sections of documents. If you follow the first link, you will be taken to the portion of the document show in Figure 6. </p></div></div><ul><li class="half_rhythm"><div>Affection Status</div></li><li class="half_rhythm"><div>Sociodemography and Administration</div></li><li class="half_rhythm"><div>Medical History</div></li><li class="half_rhythm"><div>Physical Observations</div></li><li class="half_rhythm"><div>Lab Measurements</div></li><li class="half_rhythm"><div>Psychological and Psychiatric Observations</div></li><li class="half_rhythm"><div>Lifestyle and Environment</div></li><li class="half_rhythm"><div>Treatment</div></li></ul><p>Exceptions to this grouping method are found in large studies such as the <a href="/projects/gap/cgi-bin/variable.cgi?study_id=phs000007.v19.p7&phv=159482&phd=1105&pha=3550&pht=1415&phvf=&phdf=&phaf=&phtf=&dssp=1&consent=&temp=1" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>Framingham</u></a><u> Cohort</u> which have their own long-standing system for grouping data. When searching for variables in large studies, or if you have a very specific <a class="def" href="/books/n/handbook2e/glossary/def-item/query/">query</a>, it can be more efficient to search for variables using the search box on the right side of the variable report page (or from the dbGaP home page if you want to perform a cross-study search), rather than attempting to browse through the hierarchy of folders.</p></div><div id="dbGaP.Documents_on_the_public_website"><h5>Documents on the public website</h5><p>There are multiple pathways to find documents through the dbGaP web site. On the dbGaP<a href="/gap" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u> home page</u></a> the newest studies are listed under the “Latest Studies” heading, with the most direct route to documents being the orange "D" icons. A gray icon means there are no documents associated with the study. Beneath that section, the "List Top Level Studies" link leads to a searchable listing of all studies and documents, with an advanced search option available for building document-specific queries. On a study page, clicking the Documents tab will open the study's default document, with a folder tree on the right to explore the rest, and a "Search Within This Study" box that will search document text (<a class="figpopup" href="/books/NBK154410/figure/dbGaP.F5/?report=objectonly" target="object" rid-figpopup="figdbGaPF5" rid-ob="figobdbGaPF5">Figure 5</a>). Variable pages may also link to documents in which they are annotated, through the "See document part in context" links. Documents for each study are also available on the dbGaP ftp site as a downloadable zip file, which includes the pdfs, xml, and images. The ftp site is accessible from study pages by clicking the link under "Publicly Available Data."</p></div></div><div id="dbGaP.Using_document_annotation"><h5>Using document annotation</h5><p><a href="#dbGaP.Curatorial_document_annotation">As noted previously</a>, curators establish connections between appropriate portions of text and other accessioned objects. As an example of the types of functionality that annotations can provide, imagine looking at a variable report page having to do with <a href="/projects/gap/cgi-bin/variable.cgi?study_id=phs000007.v19.p7&phv=162114&phd=3677&pha=3550&pht=1415&phvf=100378&phdf=100639&phaf=&phtf=&dssp=1&consent=&temp=1" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>whether subjects take a multivitamin</u></a>. If you scroll down to the bottom of the variable page, there is a section labeled “Document parts related to the variable” which is shown in <a class="figpopup" href="/books/NBK154410/figure/dbGaP.F5/?report=objectonly" target="object" rid-figpopup="figdbGaPF5" rid-ob="figobdbGaPF5">Figure 5</a>.</p><p>This shows that there are two documents, a Coding Manual and an Annotated Form, that have text which has been associated to the multivitamin variable. If you click on the “See document part in context” link you will be taken to the appropriate portion of the document. If you follow the link for the Annotated form, you would get taken to a page that looks like <a class="figpopup" href="/books/NBK154410/figure/dbGaP.F6/?report=objectonly" target="object" rid-figpopup="figdbGaPF6" rid-ob="figobdbGaPF6">Figure 6</a>.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF6" co-legend-rid="figlgnddbGaPF6"><a href="/books/NBK154410/figure/dbGaP.F6/?report=objectonly" target="object" title="Figure 6. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF6" rid-ob="figobdbGaPF6"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image007.gif" src-large="/books/NBK154410/bin/dbGaP-Image007.jpg" alt="Figure 6. . This show the portion of the web page for the document “GoKinD Study Diabetic Offspring” (phd000152." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF6"><h4 id="dbGaP.F6"><a href="/books/NBK154410/figure/dbGaP.F6/?report=objectonly" target="object" rid-ob="figobdbGaPF6">Figure 6. </a></h4><p class="float-caption no_bottom_margin">This show the portion of the web page for the document “GoKinD Study Diabetic Offspring” (phd000152.2) which you would be taken to if you clicked the first link from Figure 5. </p></div></div><p>In the image of the questionnaire shown in <a class="figpopup" href="/books/NBK154410/figure/dbGaP.F6/?report=objectonly" target="object" rid-figpopup="figdbGaPF6" rid-ob="figobdbGaPF6">Figure 6</a>, the icon of a red circle with a white question mark, <span class="graphic"><img src="/books/NBK154410/bin/dbGaP-Image001.jpg" alt="Image dbGaP-Image001.jpg" /></span>, indicates that a section of the document is associated with one or more accessioned objects in a study. The accessioned objects are generally variables, but can include data tables. Clicking on the icon will either take the user to a variable report page (if only a single variable is associated with the icon) or to an <a class="def" href="/books/n/handbook2e/glossary/def-item/entrez/">Entrez</a> search result page (if there are multiple objects associated with that icon).</p></div></div><div id="dbGaP.dbGaP_ftp_site"><h4>dbGaP ftp site</h4><p>The <a href="/public/?/ftp/dbgap/studies/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">ftp site</a> includes a directory for every study, which contains a directory for every version of a study, as well as a directory where analyses are found. Currently, each version of a study contains directories for documents, <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> variable summaries, manifests, and release notes (<a class="figpopup" href="/books/NBK154410/figure/dbGaP.F7/?report=objectonly" target="object" rid-figpopup="figdbGaPF7" rid-ob="figobdbGaPF7">Figure 7</a>). Manifests describe the files available in each consent category while release notes describe the history of the released files as well as giving details of any changes made from previous versions.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF7" co-legend-rid="figlgnddbGaPF7"><a href="/books/NBK154410/figure/dbGaP.F7/?report=objectonly" target="object" title="Figure 7. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF7" rid-ob="figobdbGaPF7"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image008.gif" src-large="/books/NBK154410/bin/dbGaP-Image008.jpg" alt="Figure 7. . This figure shows the basic organization of the ftp site, with the study phs000001." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF7"><h4 id="dbGaP.F7"><a href="/books/NBK154410/figure/dbGaP.F7/?report=objectonly" target="object" rid-ob="figobdbGaPF7">Figure 7. </a></h4><p class="float-caption no_bottom_margin">This figure shows the basic organization of the ftp site, with the study phs000001.v3.p1 opened up to show portions of the hierarchy of directories and files. </p></div></div><p>Please note that older versions of studies may have a different directory structure although they contain similar information.</p><p>The variable summaries and the data dictionaries are delivered as <a class="def" href="/books/n/handbook2e/glossary/def-item/xml/">XML</a> files with an accompanying <a class="def" href="/books/n/handbook2e/glossary/def-item/xsl/">XSL</a> file which produces the HTML rendering of the file that can be viewed on a browser.</p><p>The documents directory contains at least one .zip file that holds the xml files, images, and pdf versions of the documents in a study. In cases where there are a large number of documents the files may be separated into separate .zip files for xml, images, and pdf.</p></div></div></div><div id="dbGaP.dbGaP_Authorized_Access"><h2 id="_dbGaP_dbGaP_Authorized_Access_">dbGaP Authorized Access</h2><p>Data distribution by dbGaP is governed by the NIH’s policies and procedures for managing Genome Wide Association Study (GWAS) data. Information related to these policies can be found on the <a href="http://gwas.nih.gov/index.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>NIH GWAS website</u></a>. Questions related to GWAS policy can be directed to <u><a href="mailto:dev@null" data-email="vog.hin.liam@SAWG" class="oemail">vog.hin.liam@SAWG</a></u>.</p><p>The individual level data is only available to authorized users. Requests for data and data downloads are managed through the dbGaP <a href="https://dbgap.ncbi.nlm.nih.gov/aa/wga.cgi?page=login" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>Authorized Access System</u></a> (dbGaP-AA), a web platform that handles request submission, manages reviewing and approval processes carried out by signing officials (SOs) and Data Access Committees (DACs), and facilitates secured, high speed downloads of large data sets for approved users.</p><p>The dbGaP data are organized and distributed by consent groups. That is, the data are grouped by subjects that have agreed to the same set of data use limitations. The data can only be selected by consent group when making data access requests. There are no overlapping subjects between the consent groups within a study. The data requests are also reviewed and approved by consent group. Therefore it is very important that requesters understand the Data Use Limitations of consent groups before they apply for dbGaP data access.</p><p>Each data file distributed through the dbGaP has an embargo release date. The data access policy requires that the results obtained from analyzing the dbGaP data are not published before the embargo release date.</p><p>To access the Authorized Access system, non-NIH users must have an <a href="https://commons.era.nih.gov/commons" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>NIH eRA Commons</u></a> account with a Principal Investigator (<a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a>) role. The login username and password of a user’s dbGaP-AA account are the same as those of a user’s eRA account. NIH users need to be registered in the dbGaP system by the GWAS Project Administrator (GPA) of an affiliated institute before gaining access to the dbGaP-AA. After being registered, the NIH user can login to the dbGaP-AA account using the login username and password of their NIH CIT (or email) account.</p><p>A data access request (<a class="def" href="/books/n/handbook2e/glossary/def-item/dar/">DAR</a>) is made by filling out forms inside dbGaP-AA. The request includes a Research Use Statement and a Non-technical Summary. The DAR must also designate an institutional Signing Official and IT director for their project. If any of requested datasets has an <a class="def" href="/books/n/handbook2e/glossary/def-item/irb/">IRB</a> (Institutional Review Board) approval requirement, an IRB approval document should be uploaded to the system before submitting the request. By signing the application form, the data requester agrees to obey terms and conditions laid out in the governing Data Use Certification (<a class="def" href="/books/n/handbook2e/glossary/def-item/duc/">DUC</a>) document.</p><p>The <a class="def" href="/books/n/handbook2e/glossary/def-item/dar/">DAR</a> will first be reviewed by the SO. If approved, it will be passed on to the appropriate Data Access Committee or committees. A <a class="def" href="/books/n/handbook2e/glossary/def-item/dac/">DAC</a> is a committee appointed by an NIH institute (or group of institutes) which evaluates DARs requesting access to studies from their portfolio. Each DAC evaluates whether requests conform to NIH policies and procedures including whether the proposed research is consistent with the Data Use Limitations stipulated for each study. If approved, the requester must agree to obey data use restrictions dictated by participant informed consent agreements and to comply with data use, sharing, and security policies laid out in a governing <a class="def" href="/books/n/handbook2e/glossary/def-item/duc/">DUC</a>. At that point the data can be downloaded by the requester.</p><p>The dbGaP system manages data downloads using Aspera, a system designed to expedite high-speed data transfers. Use of Aspera requires that Aspera Connect, a browser plugin available through the Aspera website, is installed on the downloading machine. Data download can be carried out through either Aspera Connect’s web-interface or by using Aspera ASCP on the command line. For SRA (Sequence Read Archive) data distributed through the dbGaP data download can be done directly through the sra-toolkit, which allows transfer based on http protocols. Detailed information about sra-toolkit can be found from the <a href="/Traces/sra/sra.cgi?view=toolkit_doc&f=std" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>SRA toolkit documentation.</u></a></p><p>All data downloaded from the dbGaP are encrypted. The downloaded data, with the exception of SRA data, need to be decrypted before being used. For SRA data, we suggest that users work directly with the data dump utilities that are available through the <a class="def" href="/books/n/handbook2e/glossary/def-item/ncbi/">NCBI</a> sra-toolkit without decryption. The NCBI decryption tools and sra-toolkit are available from the <a href="/Traces/sra/sra.cgi?cmd=show&f=software&m=software&s=software" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>SRA software download site</u></a>.</p><p>Approved data users are required to submit an annual project progress report to all the DACs from which they received approval. A project close-out request should be filed if the project is finished. Most dbGaP data requests have a one year approval period. To renew a project the <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a> needs to revise and resubmit the <a class="def" href="/books/n/handbook2e/glossary/def-item/dar/">DAR</a>, as well as submit the annual report. The resubmitted project will go through the SO and <a class="def" href="/books/n/handbook2e/glossary/def-item/dac/">DAC</a> review process again. During this process, only expired data requests under the project will be re-reviewed. Previously approved data requests that have not expired will remain approved.</p><p>A data request is not transferrable. If a <a class="def" href="/books/n/handbook2e/glossary/def-item/pi/">PI</a> leaves the institution listed in the <a class="def" href="/books/n/handbook2e/glossary/def-item/dar/">DAR</a>, all the dbGaP requests sponsored by the institution should be closed out. As a part of the close-out process, all data downloaded through the project need to be destroyed and the process has to be confirmed by the IT director and SO. The PI will need to reapply for the data once they have settled at their new location.</p></div><div id="dbGaP.Related_Tools"><h2 id="_dbGaP_Related_Tools_">Related Tools</h2><div id="dbGaP.PhenotypeGenotype_Integrator_PheGe"><h3>Phenotype-Genotype Integrator (PheGenI)</h3><div id="dbGaP.Scope_1"><h4>Scope</h4><p>The <a href="/gap/phegeni" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>Phenotype-Genotype Integrator</u></a> (<a class="def" href="/books/n/handbook2e/glossary/def-item/phegeni/">PheGenI</a>), (<a class="bk_pop" href="#dbGaP.REF.4">4</a>) merges NHGRI <a class="def" href="/books/n/handbook2e/glossary/def-item/genome/">genome</a>-wide association study (GWAS) catalog data with several databases including <a href="/gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Gene</a>, <a href="/dbgap" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">dbGaP</a>, <a href="/omim" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">OMIM</a>, <a href="/gtex" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GTEx</a> and <a href="/snp" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">dbSNP</a>. This <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a>-oriented resource, intended for clinicians and epidemiologists interested in following up results from GWAS, can facilitate identification and ranking of variants that may warrant additional study.</p></div><div id="dbGaP.History_1"><h4>History</h4><p><a class="def" href="/books/n/handbook2e/glossary/def-item/phegeni/">PheGenI</a> was first released in 2011. The major functionality has not changed, <i>i.e.</i> modes of search and categories of display, but functions have been added to improve both queries and data processing. For example, an autocomplete function was added to facilitate the <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> queries, and download functions were added to the ideogram and tabular results sections. PheGenI is under active development, with contents and displays scheduled to be more closely integrated with additional web resources.</p></div><div id="dbGaP.Data_Flow"><h4>Data Flow</h4><p>PhenGenI is populated automatically via feeds from NHGRI, dbSNP, dbGaP and <a class="def" href="/books/n/handbook2e/glossary/def-item/ncbi/">NCBI</a>’s <a class="def" href="/books/n/handbook2e/glossary/def-item/genome/">genome</a> annotation pipeline. Please note that <a class="def" href="/books/n/handbook2e/glossary/def-item/phegeni/">PheGenI</a> does not display all p-values from each dbGaP-hosted analysis. Specifically, only p-values <10<sup>-4</sup>, and/or the lowest 100 p-values are included for each analysis. Currently, the <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> search terms are based on <a href="https://www.nlm.nih.gov/mesh/meshhome.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">MeSH</a>, but will be enhanced with additional options in the future.</p></div><div id="dbGaP.Access_1"><h4>Access</h4><p>Users can search based on chromosomal location, gene, <a class="def" href="/books/n/handbook2e/glossary/def-item/snp/">SNP</a>, or <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> and then view and download results. Association results can be filtered by <a class="def" href="/books/n/handbook2e/glossary/def-item/pvalue/">p-value</a>, and <a class="def" href="/books/n/handbook2e/glossary/def-item/genotype/">genotype</a> data can be filtered by location of variant site relative to gene annotation. The results are separated into several categories, including association results, genes, SNPs, <a class="def" href="/books/n/handbook2e/glossary/def-item/eqtl/">eQTL</a> data, a dynamic <a class="def" href="/books/n/handbook2e/glossary/def-item/genome/">genome</a> view and dbGaP studies. Each section provides a download function.</p><p>As a tool to find data in dbGaP, the view of all analysis results is accessed by clicking on the dbGaP link in the source column of the Association Results table. For full analysis and aggregate statistics such as <a class="def" href="/books/n/handbook2e/glossary/def-item/allele/">allele</a> frequencies, apply for controlled access.</p><p>Gene’s Phenotypes section also provides links to <a class="def" href="/books/n/handbook2e/glossary/def-item/phegeni/">PheGenI</a>, via the anchor “Review <a class="def" href="/books/n/handbook2e/glossary/def-item/eqtl/">eQTL</a> and <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> association data in this region using PheGenI”.</p></div></div><div id="dbGaP.The_dbGaP_Genome_Browser"><h3>The dbGaP Genome Browser</h3><p>The <a class="def" href="/books/n/handbook2e/glossary/def-item/genome/">genome</a> wide association results hosted at the dbGaP are displayed through the dbGaP genome browser, where they can be viewed along the human genome.</p><p>The dbGaP <a class="def" href="/books/n/handbook2e/glossary/def-item/genome/">genome</a> browser can be accessed through the analysis page of a given dbGaP study. For example, under the “Analyses” tab of the dbGaP study <a href="/projects/gap/cgi-bin/study.cgi?study_id=phs000585.v1.p1" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>phs000585.v1.p1</u></a>. If there are multiple analyses, you can select one from the right panel. The link named <a href="/projects/SNP/gViewer/gView.cgi?aid=3588" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri"><u>View association results in Genome Browser</u></a> leads to the chromosomal viewer and each region (block) there contains results from all tested loci within (<a class="figpopup" href="/books/NBK154410/figure/dbGaP.F8/?report=objectonly" target="object" rid-figpopup="figdbGaPF8" rid-ob="figobdbGaPF8">Figure 8</a>). The color is coded for the smallest <a class="def" href="/books/n/handbook2e/glossary/def-item/pvalue/">p-value</a> in that block.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF8" co-legend-rid="figlgnddbGaPF8"><a href="/books/NBK154410/figure/dbGaP.F8/?report=objectonly" target="object" title="Figure 8. " class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF8" rid-ob="figobdbGaPF8"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image009.gif" src-large="/books/NBK154410/bin/dbGaP-Image009.jpg" alt="Figure 8. . Genome Browser showing pha002914." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF8"><h4 id="dbGaP.F8"><a href="/books/NBK154410/figure/dbGaP.F8/?report=objectonly" target="object" rid-ob="figobdbGaPF8">Figure 8. </a></h4><p class="float-caption no_bottom_margin">Genome Browser showing pha002914. </p></div></div><p>The <a class="def" href="/books/n/handbook2e/glossary/def-item/genome/">genome</a> browsing page (<a class="figpopup" href="/books/NBK154410/figure/dbGaP.F9/?report=objectonly" target="object" rid-figpopup="figdbGaPF9" rid-ob="figobdbGaPF9">Figure 9</a>) is opened by clicking on the region. The testing results are tabularized in the middle. The genome track on the top allows zooming in to see more detailed genomic location and linkage disequilibrium structure. GWAS Catalog (NHGRI) data, or an added analysis, can be aligned with the current track under the same coordinates, which allows viewers to compare results from different studies. The sequencing view (bottom) shows genome annotations (gene, transcript and protein) at that region. Each object in this page is linked to its annotated <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a>, which helps scientists to study biological function behind the genetic variations.</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figdbGaPF9" co-legend-rid="figlgnddbGaPF9"><a href="/books/NBK154410/figure/dbGaP.F9/?report=objectonly" target="object" title="Figure 9." class="img_link icnblk_img figpopup" rid-figpopup="figdbGaPF9" rid-ob="figobdbGaPF9"><img class="small-thumb" src="/books/NBK154410/bin/dbGaP-Image010.gif" src-large="/books/NBK154410/bin/dbGaP-Image010.jpg" alt="Figure 9." /></a><div class="icnblk_cntnt" id="figlgnddbGaPF9"><h4 id="dbGaP.F9"><a href="/books/NBK154410/figure/dbGaP.F9/?report=objectonly" target="object" rid-ob="figobdbGaPF9">Figure 9.</a></h4></div></div></div></div><div id="dbGaP.References"><h2 id="_dbGaP_References_">References</h2><dl class="temp-labeled-list"><dt>1.</dt><dd><div class="bk_ref" id="dbGaP.REF.1">Mailman MD, Feolo M, Jin Y, Kimura M, Tryka K, Bagoutdinov R, Hao L, Kiang A, Paschall J, Phan L, et al. The NCBI dbGaP database of genotypes and phenotypes. <span><span class="ref-journal">Nat Genet. </span>2007;<span class="ref-vol">39</span>(10):1181–6.</span> [<a href="/pmc/articles/PMC2031016/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC2031016</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/17898773" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 17898773</span></a>]</div></dd><dt>2.</dt><dd><div class="bk_ref" id="dbGaP.REF.2">GAIN Collaborative Research Group. Manolio TA, Rodriguez LL, Brooks L, Abecasis G; Collaborative Association Study of Psoriasis, Ballinger D, Daly M, Donnelly P, Faraone SV; International Multi-Center ADHD Genetics Project, Frazer K, Gabriel S, Gejman P; Molecular Genetics of Schizophrenia Collaboration, Guttmacher A, Harris EL, Insel T, Kelsoe JR; Bipolar Genome Study, Lander E, McCowin N, Mailman MD, Nabel E, Ostell J, Pugh E, Sherry S, Sullivan PF; Major Depression Stage 1 Genomewide Association in Population-Based Samples Study, Thompson JF, Warram J; Genetics of Kidneys in Diabetes (GoKinD) Study, Wholley D, Milos PM, Collins FS. New models of collaboration in genome-wide association studies: the Genetic Association Information Network. <span><span class="ref-journal">Nat Genet. </span>2007;<span class="ref-vol">39</span>(9):1045–51.</span> [<a href="https://pubmed.ncbi.nlm.nih.gov/17728769" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 17728769</span></a>]</div></dd><dt>3.</dt><dd><div class="bk_ref" id="dbGaP.REF.3">PLINK. : <a href="http://pngu.mgh.harvard.edu/~purcell/plink/" ref="pagearea=cite-ref&targetsite=external&targetcat=link&targettype=uri">http://pngu<wbr style="display:inline-block"></wbr>.mgh.harvard<wbr style="display:inline-block"></wbr>.edu/~purcell/plink/</a></div></dd><dt>4.</dt><dd><div class="bk_ref" id="dbGaP.REF.4">Ramos EM, Hoffman D, Junkins HA, Maglott D, Phan L, Sherry ST, Feolo M, Hindorff LA. Phenotype-Genotype Integrator (PheGenI): synthesizing genome-wide association study (GWAS) data with existing genomic resources. <span><span class="ref-journal">Eur J Hum Genet. </span>2013</span> [<a href="/pmc/articles/PMC3865418/" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pmc">PMC free article<span class="bk_prnt">: PMC3865418</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/23695286" ref="pagearea=cite-ref&targetsite=entrez&targetcat=link&targettype=pubmed">PubMed<span class="bk_prnt">: 23695286</span></a>]</div></dd></dl></div><div id="dbGaP.Appendix__Phenotype_Quality_Contro"><h2 id="_dbGaP_Appendix__Phenotype_Quality_Contro_">Appendix – Phenotype Quality Control</h2><p>Each submitted <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> should have a corresponding data dictionary with information describing the variables and their values. Additionally, dbGaP requires the following three special datasets to be submitted:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Subject Consent</p></dd><dt>2.</dt><dd><p class="no_top_margin">Subject Sample Mapping</p></dd><dt>3.</dt><dd><p class="no_top_margin">Pedigree</p></dd></dl><p>After receiving the data submissions, <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts are executed to check the files for potential errors. The results are manually checked and errors are reported to submitters for clarification or resubmission of data. There are usually a few iterations prior to the study being able to be loaded.</p><div id="dbGaP.The_format_of_the_datasets"><h3>The format of the datasets</h3><p>Each <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> submitted to dbGaP should be a rectangular table showing variables in columns and subject or sample IDs in rows. Each dataset should be a single tab-delimited plain text file. Microsoft Excel files are also accepted, but are converted to tab-delimited plain text files for processing. Once the files pass all the qc checks, they are loaded into dbGaP databases and distributed as tab-delimited plain text files to approved Authorized Access users. The following formatting requirements will be ensured by running <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Each column has a unique, non-blank column header (variable name).</p></dd><dt>2.</dt><dd><p class="no_top_margin">Each <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> has a subject (or sample) ID column.</p></dd><dt>3.</dt><dd><p class="no_top_margin">Each row has a subject (or sample) ID value.</p></dd><dt>4.</dt><dd><p class="no_top_margin">There are no duplicated rows in the table.</p></dd><dt>5.</dt><dd><p class="no_top_margin">Datasets do not include any characters that will not be rendered correctly on the web pages.</p></dd><dt>6.</dt><dd><p class="no_top_margin">Duplicated subject IDs in different rows are acceptable, but will be reported in the qc checks so that curators can manually verify that there are no errors.</p></dd><dt>7.</dt><dd><p class="no_top_margin">Variables without any values (with only column header) are acceptable but will be reported in the qc checks.</p></dd></dl></div><div id="dbGaP.Subject_and_Sample_IDs"><h3>Subject and Sample IDs</h3><p>A dbGaP phenotypic <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> is a collection of variable values of individuals (subjects) or samples of individuals. Each subject should be submitted with a distinct subject ID; each sample should be submitted with a distinct sample ID. Submitters can use multiple subject alias IDs for a single subject. In addition to the submitted subject ID dbGaP assigns a single dbGaP subject ID for each submitted subject (individual person), even if the subject has multiple alias IDs. The dbGaP subject ID different from the submitted subject ID. Submitters are required to use the subject consent file to list all the subjects who participated in, or referred to by, the study, with their consent values. Subjects who did not participate in the study, namely <a class="def" href="/books/n/handbook2e/glossary/def-item/hapmap/">HapMap</a> controls and parents of participants included in the <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> file, should also be included in the consent file with consent value 0.</p><p>The subject and sample <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts check the following:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Each subject, who might be represented by multiple aliases, in the consent file has exactly one consent value (an integer).</p></dd><dt>2.</dt><dd><p class="no_top_margin">Each sample, which might be represented by multiple aliases, in the sample <a class="def" href="/books/n/handbook2e/glossary/def-item/mapping/">mapping</a> file maps to exactly one subject in the subject consent file.</p></dd><dt>3.</dt><dd><p class="no_top_margin">All subjects in all the datasets (including subjects that have molecular data only and no <a class="def" href="/books/n/handbook2e/glossary/def-item/phenotype/">phenotype</a> data and relatives in the <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> file) are included in the subject consent file. Additional subjects who are in the subject consent file, but are not found in any of the phenotype datasets are flagged and reported, but are not considered an error if the subject’s data will be submitted at a later time.</p></dd><dt>4.</dt><dd><p class="no_top_margin">Samples that are not found in the molecular data, but are found in the subject sample <a class="def" href="/books/n/handbook2e/glossary/def-item/mapping/">mapping</a> file will be flagged and reported, but are not considered an error if the sample will be submitted at a later time.</p></dd><dt>5.</dt><dd><p class="no_top_margin">Multiple alternate names (aliases) for a single subject within a single or across multiple studies is assigned only one dbGaP subject ID. If the subject does not have a dbGaP subject ID, a unique ID will be assigned to the subject when the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> is loaded into the <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a>. Currently, a single dbGaP subject ID is assigned to a Subject only when the submitter provides the linking information. This is true at the sample level as well. The case of alternate names for samples should be less common than subjects, since dbGaP considers sample IDs to refer to the final analyte (<a class="def" href="/books/n/handbook2e/glossary/def-item/dna/">DNA</a>/RNA) that is put in the machine or on the <a class="def" href="/books/n/handbook2e/glossary/def-item/chip/">chip</a>, rather than the physical result of obtaining a tissue or blood sample, though this is accepted as well.</p></dd><dt>6.</dt><dd><p class="no_top_margin">If the gender of a subject is reported in multiple places (different datasets or different rows of the same <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a>) the gender values should be the same.</p></dd><dt>7.</dt><dd><p class="no_top_margin">If a subject, or an alias of this subject, is already found in the dbGaP <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a>, the gender of the subject in the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> should be the same as that in the database.</p></dd><dt>8.</dt><dd><p class="no_top_margin">If a sample, or an alias of this sample, is already found in the dbGaP <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a>, the sample in the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> should map to the same subject as in the database.</p></dd><dt>9.</dt><dd><p class="no_top_margin">Each subject within a single study should not have conflicting case-control status, especially in the scenario when the same case control variable appears in multiple datasets.</p></dd><dt>10.</dt><dd><p class="no_top_margin">The number of subjects and samples are consistent between iterative submissions. When the counts are different, they are reported to the submitter for confirmation or resubmission.</p></dd></dl></div><div id="dbGaP.HIPAA_violations"><h3>HIPAA violations</h3><p>The datasets submitted to dbGaP should follow the Health Insurance Portability and Accountability Act (<a class="def" href="/books/n/handbook2e/glossary/def-item/hipaa/">HIPAA</a>) rules in order to protect the privacy of personally identifiable health information. Due to the complexity of HIPAA rules, it is impossible to write a program to report all HIPAA violations without turning up false positives. It is also impractical to manually check all the data values and find all the HIPAA violations. <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts have been created to check variable names, descriptions, and values, and to flag variables that are likely to have sensitive information. dbGaP curators then manually check the flagged variables to determine whether these are HIPAA violations. The QC scripts first report all variables whose names or descriptions contains the following key words (case insensitive except for ‘IP’ and ‘DOB’):</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">name</p></dd><dt>2.</dt><dd><p class="no_top_margin">address</p></dd><dt>3.</dt><dd><p class="no_top_margin">zip</p></dd><dt>4.</dt><dd><p class="no_top_margin">phone</p></dd><dt>5.</dt><dd><p class="no_top_margin">telephone</p></dd><dt>6.</dt><dd><p class="no_top_margin">fax</p></dd><dt>7.</dt><dd><p class="no_top_margin">mail</p></dd><dt>8.</dt><dd><p class="no_top_margin">email</p></dd><dt>9.</dt><dd><p class="no_top_margin">social</p></dd><dt>10.</dt><dd><p class="no_top_margin">ssn</p></dd><dt>11.</dt><dd><p class="no_top_margin">ss#</p></dd><dt>12.</dt><dd><p class="no_top_margin">birth</p></dd><dt>13.</dt><dd><p class="no_top_margin">DOB</p></dd><dt>14.</dt><dd><p class="no_top_margin">license</p></dd><dt>15.</dt><dd><p class="no_top_margin">account</p></dd><dt>16.</dt><dd><p class="no_top_margin">certificate</p></dd><dt>17.</dt><dd><p class="no_top_margin">vehicle</p></dd><dt>18.</dt><dd><p class="no_top_margin">url</p></dd><dt>19.</dt><dd><p class="no_top_margin">IP</p></dd></dl><p>Only the names or descriptions containing a whole-word match with at least one of the above key words are reported. A word in the variable name or description that contains a key word as a substring is not considered a match. For example, “Leave your email/phone.” is reported as a match since it contains key words “email” and “phone”, but “zipper” is not reported because it only contains key word “zip” as a substring.</p><p>In many cases the variable names or descriptions do not have any indication that the variable might have <a class="def" href="/books/n/handbook2e/glossary/def-item/hipaa/">HIPAA</a> incompatible information. To work around this, the <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts also check variable data values for sensitive information. Data values are much harder to check than variable names and descriptions due to the sheer number of individual values and the great variety of errors. Fortunately, almost all of the HIPAA violations in the datasets submitted to dbGaP <a class="def" href="/books/n/handbook2e/glossary/def-item/database/">database</a> are related to dates, including dates as separated values and dates embedded in longer texts. Below are some of the examples of the dates found in the datasets submitted to dbGaP:</p><ul><li class="half_rhythm"><div>11-JUN-1970</div></li><li class="half_rhythm"><div>01-SEP-65</div></li><li class="half_rhythm"><div>SEPT-NOV 1995</div></li><li class="half_rhythm"><div>2004.05.10</div></li><li class="half_rhythm"><div>2/2/85</div></li><li class="half_rhythm"><div>8-11-83</div></li><li class="half_rhythm"><div>10/1974</div></li><li class="half_rhythm"><div>JAN ‘93</div></li><li class="half_rhythm"><div>3/04</div></li><li class="half_rhythm"><div>SEPT 85</div></li><li class="half_rhythm"><div>NOV-89-</div></li><li class="half_rhythm"><div>FEB64</div></li><li class="half_rhythm"><div>NOVEMBER 1992</div></li><li class="half_rhythm"><div>“DEC” “92”</div></li><li class="half_rhythm"><div>Feb.4</div></li><li class="half_rhythm"><div>Jan – 1996</div></li><li class="half_rhythm"><div>March, 2004</div></li><li class="half_rhythm"><div>(Nov,2005)</div></li><li class="half_rhythm"><div>APR. “85</div></li><li class="half_rhythm"><div>APRIL 91</div></li><li class="half_rhythm"><div>apr 2000</div></li><li class="half_rhythm"><div>(APRIL 1997)</div></li><li class="half_rhythm"><div>(3/00)</div></li><li class="half_rhythm"><div>DEC.1992</div></li><li class="half_rhythm"><div>1998-May</div></li><li class="half_rhythm"><div>October-September, 2004</div></li><li class="half_rhythm"><div>Jan. 1</div></li><li class="half_rhythm"><div>May 3rd</div></li><li class="half_rhythm"><div>xxxxxxIN2002.03.01</div></li><li class="half_rhythm"><div>19941122</div></li><li class="half_rhythm"><div>112004</div></li></ul><p>Most of the above values, e.g., “01-SEP-65”, “2004.05.10”, “DEC.11992”, are obviously date values and not <a class="def" href="/books/n/handbook2e/glossary/def-item/hipaa/">HIPAA</a> compatible. It is hard to write programs to find dates in all the different formats without generating too many false positives. However, some of them are not so obvious and need manual confirmation using variable descriptions and context of the values. For example, “3/04” could mean “March 2004”, or “3 out of 4”; 19941122 could be “Nov. 22, 1994” or the number 19941122; “112004” could be “Nov. 2004”, “Nov. 20, 2004” or the number 112004. If we report all the 6-digit numbers as potential date values, we will generate a great amount of false positives. More complicated algorithms are needed to detect date values with high sensitivity without sacrificing too much specificity. We use the following algorithm to detect the date values in the datasets:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Two 1 or 2-digit numbers and a 2 or 4-digit number, in this order, separated by “/”, “-“ or “.”, e.g., “3/5/1994” or “12-28-03”.</p></dd><dt>2.</dt><dd><p class="no_top_margin">One 4-digit number and two 1 or 2-digit numbers separated by “/”, “-“ or “.”, e.g., “1994.2.13”.</p></dd><dt>3.</dt><dd><p class="no_top_margin">A 1 or 2-digit number and a 4-digit number starting with 19 or 20 separated by “/”, e.g., “10/1994” (but not “10.1994”).</p></dd><dt>4.</dt><dd><p class="no_top_margin">A 1 or 2-digit number followed by a “/” and a 2-digit number starting with 0, e.g., “3/04” (but not “3/94”).</p></dd><dt>5.</dt><dd><p class="no_top_margin">A month name or short name and a 1, 2, or 4-digit number, in either order, separated by some non-letter, non-number characters or not separated, e.g., “JAN ‘93”, “FEB64”, “May 3rd” (but not “may be 14”). An example of a false positive is “4 (may be under reporting)”.</p></dd><dt>6.</dt><dd><p class="no_top_margin">A 6-digit number is considered to be a potential date value if its first four digits make a valid date in mmdd format (i.e., first two digits read as month second two as day of the month). For example, 122876 is considered to be a potential date value since 1128 is a valid date (Nov. 28) in mmdd format; 231208 or 113198 is not a potential date since 2312 or 1131 is not a valid date in month/day format. If all of the values, or first 10 values, of a variable are 6-digit potential dates, this variable together with its potential date values will be reported by the scripts.</p></dd><dt>7.</dt><dd><p class="no_top_margin">An 8-digit number is considered to be a potential date value if it makes a valid date in the 20<sup>th</sup> or 21<sup>st</sup> century in either mmddyyyy or yyyymmdd format. For example, 19940822 is considered to be a potential date since it can be read as 1994/08/22 (Aug. 22, 1994). 10312005 is a potential date value since it can be read as 10/31/2005 (Oct. 31, 2005). “19080230” is not considered to be a potential date since neither 1908/02/30 nor 19/08/0230 is a valid date in the 20<sup>th</sup> or 21<sup>st</sup> century. If all of the values or the first 10 values of a variable are 8-digit numbers of potential date values, the variable will be reported as containing potential <a class="def" href="/books/n/handbook2e/glossary/def-item/hipaa/">HIPAA</a> violations.</p></dd></dl><p>In addition, the <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts also report values that look like social security numbers (e.g., “123-45-6789” or “123456789”), phone numbers (e.g., “321-456-7890” or “(301)456-7890”), zip codes (e.g., “MD 20892”), etc. A few cases of this kind of information have been detected by the QC scripts. However, other cases like names of people are not found by the QC scripts, but by human curation.</p><p>Extreme values that might be used to identify individual participants (ages over 90, extremely heavy body weights, families with extraordinary large numbers of children) are also <a class="def" href="/books/n/handbook2e/glossary/def-item/hipaa/">HIPAA</a> violations. The <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts infer age variables from variable names, descriptions, and units and report ages over 89 as potential HIPAA violations. For other extreme values, since the HIPAA rules don’t specify particular cut-off values, we check the value distribution curves by hand and decide whether we need to hide the extreme values on a case-by-case basis.</p></div><div id="dbGaP.Data_dictionaries"><h3>Data dictionaries</h3><p>Data dictionaries are required to be submitted along with every <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a>, to explain the meanings of the variables and data values. For each value in the datasets, dbGaP requires that the submitters provide a variable description, variable type, units of values for numerical variables, as well as logical minimum and maximum values if available. For each encoded value, a code meaning should be included in the data dictionary. Since the data dictionaries submitted to dbGaP vary in format, many of which are not quite machine-readable, curators spend a good deal of time understanding the data dictionaries, correcting errors, and making other modifications so that they can be read by computer programs. Then <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts are executed to compare the data dictionaries with the corresponding datasets. The QC scripts report variables in the datasets that are missing required information (such as descriptions) in the data dictionary, as well as variables described in the data dictionaries but not found in the datasets. Many of these mismatches are caused by typos, such as “0” for “O” and vice-versa. A number of the numerical variables submitted to dbGaP are missing units. Often the units are implied in a variable’s description or in other documents. The QC scripts try to add the units back by checking the variable descriptions, which is then verified by manual curation.</p><p>In addition to missing descriptions and units, many datasets submitted to dbGaP have missing, or incomplete, code/value pairs. Some of these errors are easy to detect, e.g., the values of a categorical variable are encoded by integers and all of the code meanings are provided except for one code. However, if the variables contain both numerical values and numerically encoded categorical values, the errors of missing code meanings are hard to detect. Usually in this case, the submitters would use numbers beyond logical value range to encode for non-numerical meanings. For example, if the variable is age of patient, they would use code values like -1, 999 for meanings like “N/A” or “unknown”. If the submitters provided logical minimum and maximum values to us, it would be easy for us to find all the missing code meanings. However, in most of the cases the logical minimum and maximum values are either not available or incorrect. Unreasonable or suspicious values found automatically or manually are reported to submitters to clarify and correct.</p><p><a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts are executed to compare each submitted <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> to its corresponding data dictionary. The QC scripts report the following errors or potential errors:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Variables missing descriptions in data dictionary.</p></dd><dt>2.</dt><dd><p class="no_top_margin">Variables with descriptions in data dictionary but not found in <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> (usually due to manual typos in variable names).</p></dd><dt>3.</dt><dd><p class="no_top_margin">Potential errors or missing information in value code meanings.</p></dd></dl><p>The following algorithm is used to detect potential errors in the code meanings of each variable:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">If the variable is labeled as code-value type by the submitter, report all values in <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> without code meanings in data dictionary.</p></dd><dt>2.</dt><dd><p class="no_top_margin">If all the values are numbers,</p><dl class="temp-labeled-list"><dt>i.</dt><dd><p class="no_top_margin">Report extreme values beyond 5×SD as potential encoded values. Exclude 5 largest numbers when calculating SD.</p></dd><dt>ii.</dt><dd><p class="no_top_margin">Report rare negative numbers as potential encoded values. A negative number is considered to be rare if only 1 or 2 out of total more than 10 distinct values, or less than 1% of the distinct values are negative numbers.</p></dd></dl></dd><dt>3.</dt><dd><p class="no_top_margin">If all the values are non-number texts,</p><dl class="temp-labeled-list"><dt>i.</dt><dd><p class="no_top_margin">Report all the values without code meanings in the data dictionary if more than half of the distinct values have code meanings.</p></dd><dt>ii.</dt><dd><p class="no_top_margin">Report all the values in the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a> that differ from a code in the data dictionary only by case. For example, if the data dictionary includes code meaning “UNK=Unknown”, but the dataset has a value “Unk” instead of “UNK”, the scripts report the case mismatch.</p></dd></dl></dd><dt>4.</dt><dd><p class="no_top_margin">If some of the values are numbers but some are non-numbers, separate the values into a set of numerical values and a set of text values, then report the potential encoded values using the above rules.</p></dd><dt>5.</dt><dd><p class="no_top_margin">Report all the code values in data dictionary but not used in the <a class="def" href="/books/n/handbook2e/glossary/def-item/dataset/">dataset</a>.</p></dd></dl><p>Again there is a trade-off between sensitivity and specificity. The <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts allow the curator to set some parameters like cut-off number of SDs to adjust the sensitivity and specificity. For example, if too many real extreme values are reported as potential encoded values, i.e., the false positive rate is high, we can set the parameter to let the QC scripts report only the extreme values beyond 6×SD or more.</p></div><div id="dbGaP.Pedigree_file"><h3>Pedigree file</h3><p>If there are related individuals in the study, a <a class="def" href="/books/n/handbook2e/glossary/def-item/pedigree/">pedigree</a> file should be submitted to dbGaP. Pedigrees can be quite complex depending on the number of vertical and horizontal relationships, however all relationships can be summarized using the following five required columns: family ID, subject ID, father ID, mother ID, and sex. dbGaP also collects twin IDs, where these IDs can be expanded to include multiples. An additional column can be included to differentiate monozygotic and dizygotic twins, and twin ID if available. All subjects who appear in the father ID or the mother ID columns should also be included in the subject ID column. <a class="def" href="/books/n/handbook2e/glossary/def-item/qc/">QC</a> scripts were created to check the pedigree files and report the following errors or potential errors:</p><dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Any of the above required columns is missing.</p></dd><dt>2.</dt><dd><p class="no_top_margin">Subject IDs appearing more than once in the subject ID column.</p></dd><dt>3.</dt><dd><p class="no_top_margin">Father or mother IDs that are not found in the subject ID column.</p></dd><dt>4.</dt><dd><p class="no_top_margin">Subjects missing family IDs.</p></dd><dt>5.</dt><dd><p class="no_top_margin">Subjects missing sex values.</p></dd><dt>6.</dt><dd><p class="no_top_margin">Male subjects shown in the mother ID column and female subjects as fathers.</p></dd><dt>7.</dt><dd><p class="no_top_margin">Subjects with non-null but same father and mother IDs.</p></dd><dt>8.</dt><dd><p class="no_top_margin">Subjects having children with their parents or grandparents.</p></dd><dt>9.</dt><dd><p class="no_top_margin">Subjects having children with their sibling or half siblings.</p></dd><dt>10.</dt><dd><p class="no_top_margin">Subjects having children with their uncle or aunts.</p></dd><dt>11.</dt><dd><p class="no_top_margin">Subjects having children with their cousins (Usually these are not errors. We flag them out just to make sure the data is correct.)</p></dd></dl></div></div><div id="bk_toc_contnr"></div></div></div>
|
||
<div class="post-content"><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div><div class="small"><span class="label">Bookshelf ID: NBK154410</span></div><div style="margin-top:2em" class="bk_noprnt"><a class="bk_cntns" href="/books/n/handbook2e/">Contents</a><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook2e/AboutVariation/" title="Previous page in this title">< Prev</a><a class="active page_link next" href="/books/n/handbook2e/dbSNP/" title="Next page in this title">Next ></a></div></div></div></div>
|
||
|
||
</div>
|
||
|
||
<!-- Custom content below content -->
|
||
<div class="col4">
|
||
|
||
</div>
|
||
|
||
|
||
<!-- Book content -->
|
||
|
||
<!-- Custom contetnt below bottom nav -->
|
||
<div class="col5">
|
||
|
||
</div>
|
||
</div>
|
||
|
||
<div id="rightcolumn" class="four_col col last">
|
||
<!-- Custom content above discovery portlets -->
|
||
<div class="col6">
|
||
<div id="ncbi_share_book"><a href="#" class="ncbi_share" data-ncbi_share_config="popup:false,shorten:true" ref="id=NBK154410&db=books">Share</a></div>
|
||
|
||
</div>
|
||
<div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Views</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="PDF_download" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="/books/NBK154410/?report=reader">PubReader</a></li><li><a href="/books/NBK154410/?report=printable">Print View</a></li><li><a data-jig="ncbidialog" href="#_ncbi_dlg_citbx_NBK154410" data-jigconfig="width:400,modal:true">Cite this Page</a><div id="_ncbi_dlg_citbx_NBK154410" style="display:none" title="Cite this Page"><div class="bk_tt">Tryka KA, Hao L, Sturcke A, et al. The Database of Genotypes and Phenotypes (dbGaP) and PheGenI. 2013 Aug 15. In: The NCBI Handbook [Internet]. 2nd edition. Bethesda (MD): National Center for Biotechnology Information (US); 2013-. <span class="bk_cite_avail"></span></div></div></li><li><a href="/books/NBK154410/pdf/Bookshelf_NBK154410.pdf">PDF version of this page</a> (1.4M)</li><li><a href="/books/n/handbook2e/pdf/">PDF version of this title</a> (14M)</li><li><a href="#" class="toggle-glossary-link" title="Enable/disable links to the glossary">Disable Glossary Links</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>In this Page</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="page-toc" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="#dbGaP.Scope" ref="log$=inpage&link_id=inpage">Scope</a></li><li><a href="#dbGaP.History" ref="log$=inpage&link_id=inpage">History</a></li><li><a href="#dbGaP.Data_Model" ref="log$=inpage&link_id=inpage">Data Model</a></li><li><a href="#dbGaP.Dataflow" ref="log$=inpage&link_id=inpage">Dataflow</a></li><li><a href="#dbGaP.Access" ref="log$=inpage&link_id=inpage">Access</a></li><li><a href="#dbGaP.dbGaP_Authorized_Access" ref="log$=inpage&link_id=inpage">dbGaP Authorized Access</a></li><li><a href="#dbGaP.Related_Tools" ref="log$=inpage&link_id=inpage">Related Tools</a></li><li><a href="#dbGaP.References" ref="log$=inpage&link_id=inpage">References</a></li><li><a href="#dbGaP.Appendix__Phenotype_Quality_Contro" ref="log$=inpage&link_id=inpage">Appendix – Phenotype Quality Control</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Recent Activity</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="recent_activity" id="Shutter"></a></div><div class="portlet_content"><div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="HTDisplay" class=""><div class="action"><a href="javascript:historyDisplayState('ClearHT')">Clear</a><a href="javascript:historyDisplayState('HTOff')" class="HTOn">Turn Off</a><a href="javascript:historyDisplayState('HTOn')" class="HTOff">Turn On</a></div><ul id="activity"><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=1" href="/portal/utils/pageresolver.fcgi?recordid=67da0593cde49f3df7e5de67">The Database of Genotypes and Phenotypes (dbGaP) and PheGenI - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The Database of Genotypes and Phenotypes (dbGaP) and PheGenI - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=2" href="/portal/utils/pageresolver.fcgi?recordid=67da05922f30673f7bf89f7b">Variation Overview - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Variation Overview - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=3" href="/portal/utils/pageresolver.fcgi?recordid=67da0591cde49f3df7e5d792">Variation - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Variation - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=4" href="/portal/utils/pageresolver.fcgi?recordid=67da059084f3725e592448fc">Virus Variation - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Virus Variation - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=5" href="/portal/utils/pageresolver.fcgi?recordid=67da058f84f3725e592442bc">About Viral and Phage Genome Processing and Tools - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">About Viral and Phage Genome Processing and Tools - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li></ul><p class="HTOn">Your browsing activity is empty.</p><p class="HTOff">Activity recording is turned off.</p><p id="turnOn" class="HTOff"><a href="javascript:historyDisplayState('HTOn')">Turn recording back on</a></p><a class="seemore" href="/sites/myncbi/recentactivity">See more...</a></div></div></div>
|
||
|
||
<!-- Custom content below discovery portlets -->
|
||
<div class="col7">
|
||
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- Custom content after all -->
|
||
<div class="col8">
|
||
|
||
</div>
|
||
<div class="col9">
|
||
|
||
</div>
|
||
|
||
<script type="text/javascript" src="/corehtml/pmc/js/jquery.scrollTo-1.4.2.js"></script>
|
||
<script type="text/javascript">
|
||
(function($){
|
||
$('.skiplink').each(function(i, item){
|
||
var href = $($(item).attr('href'));
|
||
href.attr('tabindex', '-1').addClass('skiptarget'); // ensure the target can receive focus
|
||
$(item).on('click', function(event){
|
||
event.preventDefault();
|
||
$.scrollTo(href, 0, {
|
||
onAfter: function(){
|
||
href.focus();
|
||
}
|
||
});
|
||
});
|
||
});
|
||
})(jQuery);
|
||
</script>
|
||
</div>
|
||
<div class="bottom">
|
||
|
||
<div id="NCBIFooter_dynamic">
|
||
<!--<component id="Breadcrumbs" label="breadcrumbs"/>
|
||
<component id="Breadcrumbs" label="helpdesk"/>-->
|
||
|
||
</div>
|
||
|
||
<div class="footer" id="footer">
|
||
<section class="icon-section">
|
||
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
|
||
<div class="grid-container container">
|
||
<div class="icon-section_container">
|
||
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<defs>
|
||
<style>
|
||
.cls-11 {
|
||
fill: #737373;
|
||
}
|
||
</style>
|
||
</defs>
|
||
<title>Twitter</title>
|
||
<path class="cls-11" d="M250.11,105.48c-7,3.14-13,3.25-19.27.14,8.12-4.86,8.49-8.27,11.43-17.46a78.8,78.8,0,0,1-25,9.55,39.35,39.35,0,0,0-67,35.85,111.6,111.6,0,0,1-81-41.08A39.37,39.37,0,0,0,81.47,145a39.08,39.08,0,0,1-17.8-4.92c0,.17,0,.33,0,.5a39.32,39.32,0,0,0,31.53,38.54,39.26,39.26,0,0,1-17.75.68,39.37,39.37,0,0,0,36.72,27.3A79.07,79.07,0,0,1,56,223.34,111.31,111.31,0,0,0,116.22,241c72.3,0,111.83-59.9,111.83-111.84,0-1.71,0-3.4-.1-5.09C235.62,118.54,244.84,113.37,250.11,105.48Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<title>Facebook</title>
|
||
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<title>LinkedIn</title>
|
||
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
|
||
</path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
||
<defs>
|
||
<style>
|
||
.cls-11,
|
||
.cls-12 {
|
||
fill: #737373;
|
||
}
|
||
|
||
.cls-11 {
|
||
fill-rule: evenodd;
|
||
}
|
||
</style>
|
||
</defs>
|
||
<title>GitHub</title>
|
||
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
|
||
</path>
|
||
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
|
||
</path>
|
||
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
|
||
</path>
|
||
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
|
||
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
|
||
</path>
|
||
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
|
||
</svg></a>
|
||
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
|
||
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
|
||
<defs><style>.cls-1{fill:#737373;}</style></defs>
|
||
<title>NCBI Insights Blog</title>
|
||
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
|
||
</svg>
|
||
</a>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
|
||
<section class="container-fluid bg-primary">
|
||
<div class="container pt-5">
|
||
<div class="row mt-3">
|
||
<div class="col-lg-3 col-12">
|
||
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
|
||
<ul class="list-inline social_media">
|
||
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
||
<style type="text/css">
|
||
.st20 {
|
||
fill: #FFFFFF;
|
||
}
|
||
|
||
.st30 {
|
||
fill: none;
|
||
stroke: #FFFFFF;
|
||
stroke-width: 8;
|
||
stroke-miterlimit: 10;
|
||
}
|
||
</style>
|
||
<title>Twitter</title>
|
||
<g>
|
||
<g>
|
||
<g>
|
||
<path class="st20" d="M192.9,88.1c-5,2.2-9.2,2.3-13.6,0.1c5.7-3.4,6-5.8,8.1-12.3c-5.4,3.2-11.4,5.5-17.6,6.7 c-10.5-11.2-28.1-11.7-39.2-1.2c-7.2,6.8-10.2,16.9-8,26.5c-22.3-1.1-43.1-11.7-57.2-29C58,91.6,61.8,107.9,74,116 c-4.4-0.1-8.7-1.3-12.6-3.4c0,0.1,0,0.2,0,0.4c0,13.2,9.3,24.6,22.3,27.2c-4.1,1.1-8.4,1.3-12.5,0.5c3.6,11.3,14,19,25.9,19.3 c-11.6,9.1-26.4,13.2-41.1,11.5c12.7,8.1,27.4,12.5,42.5,12.5c51,0,78.9-42.2,78.9-78.9c0-1.2,0-2.4-0.1-3.6 C182.7,97.4,189.2,93.7,192.9,88.1z"></path>
|
||
</g>
|
||
</g>
|
||
<circle class="st30" cx="124.4" cy="128.8" r="108.2"></circle>
|
||
</g>
|
||
</svg></a></li>
|
||
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
|
||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
||
<style type="text/css">
|
||
.st10 {
|
||
fill: #FFFFFF;
|
||
}
|
||
|
||
.st110 {
|
||
fill: none;
|
||
stroke: #FFFFFF;
|
||
stroke-width: 8;
|
||
stroke-miterlimit: 10;
|
||
}
|
||
</style>
|
||
<title>Facebook</title>
|
||
<g>
|
||
<g>
|
||
<path class="st10" d="M159,99.1h-24V88.4c0-5,3.3-6.2,5.7-6.2h16.8V60l-24.4-0.1c-22.1,0-26.2,16.5-26.2,27.1v12.1H90v22.5h16.9 v67.5H135v-67.5h21.7L159,99.1z"></path>
|
||
</g>
|
||
</g>
|
||
<circle class="st110" cx="123.6" cy="123.2" r="108.2"></circle>
|
||
</svg>
|
||
</a></li>
|
||
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
||
<title>Youtube</title>
|
||
<style type="text/css">
|
||
.st4 {
|
||
fill: none;
|
||
stroke: #FFFFFF;
|
||
stroke-width: 8;
|
||
stroke-miterlimit: 10;
|
||
}
|
||
|
||
.st5 {
|
||
fill: #FFFFFF;
|
||
}
|
||
</style>
|
||
<circle class="st4" cx="124.2" cy="123.4" r="108.2"></circle>
|
||
<g transform="translate(0,-952.36218)">
|
||
<path class="st5" d="M88.4,1037.4c-10.4,0-18.7,8.3-18.7,18.7v40.1c0,10.4,8.3,18.7,18.7,18.7h72.1c10.4,0,18.7-8.3,18.7-18.7 v-40.1c0-10.4-8.3-18.7-18.7-18.7H88.4z M115.2,1058.8l29.4,17.4l-29.4,17.4V1058.8z"></path>
|
||
</g>
|
||
</svg></a></li>
|
||
</ul>
|
||
</div>
|
||
<div class="col-lg-3 col-12">
|
||
<p class="address_footer text-white">National Library of Medicine<br />
|
||
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
|
||
Bethesda, MD 20894</a></p>
|
||
</div>
|
||
<div class="col-lg-3 col-12 centered-lg">
|
||
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
|
||
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
|
||
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
|
||
</div>
|
||
<div class="col-lg-3 col-12 centered-lg">
|
||
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
|
||
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
|
||
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
|
||
</div>
|
||
</div>
|
||
<div class="row">
|
||
<div class="col-lg-12 centered-lg">
|
||
<nav class="bottom-links">
|
||
<ul class="mt-3">
|
||
<li>
|
||
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.nih.gov/">NIH</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
|
||
</li>
|
||
<li>
|
||
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
|
||
</li>
|
||
</ul>
|
||
</nav>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
</section>
|
||
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
|
||
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
<!--/.page-->
|
||
</div>
|
||
<!--/.wrap-->
|
||
</div><!-- /.twelve_col -->
|
||
</div>
|
||
<!-- /.grid -->
|
||
|
||
<span class="PAFAppResources"></span>
|
||
|
||
<!-- BESelector tab -->
|
||
|
||
|
||
|
||
<noscript><img alt="statistics" src="/stat?jsdisabled=true&ncbi_db=books&ncbi_pdid=book-part&ncbi_acc=NBK154410&ncbi_domain=handbook2e&ncbi_report=record&ncbi_type=fulltext&ncbi_objectid=&ncbi_pcid=/NBK154410/&ncbi_pagename=The Database of Genotypes and Phenotypes (dbGaP) and PheGenI - The NCBI Handbook - NCBI Bookshelf&ncbi_bookparttype=chapter&ncbi_app=bookshelf" /></noscript>
|
||
|
||
|
||
<!-- usually for JS scripts at page bottom -->
|
||
<!--<component id="PageFixtures" label="styles"></component>-->
|
||
|
||
|
||
<!-- CE8BC1E97D9F05E1_0182SID /projects/books/PBooks@9.11 portal106 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
|
||
<span id="portal-csrf-token" style="display:none" data-token="CE8BC1E97D9F05E1_0182SID"></span>
|
||
|
||
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3879255/4121861/3501987/4008961/3893018/3821238/4062932/4209313/4212053/4076480/3921943/3400083/3426610.js" snapshot="books"></script></body>
|
||
</html> |