nih-gov/www.ncbi.nlm.nih.gov/refseq/functionalelements

1576 lines
103 KiB
XML

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<!-- AppResources meta begin -->
<meta name="paf-app-resources" content="" />
<!-- AppResources meta end -->
<!-- TemplateResources meta begin -->
<meta name="paf_template" content="StdNCol" />
<!-- TemplateResources meta end -->
<!-- Page meta begin -->
<!-- Page meta end -->
<!-- Logger begin -->
<meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="refseq" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="static" />
<!-- Logger end -->
<title>NCBI RefSeq Functional Elements</title>
<!-- PageFixtures headcontent begin -->
<link type="text/css" rel="stylesheet" href="/core/assets/genome/css/genome.css" /><link type="text/css" rel="stylesheet" href="/core/assets/genome/css/genome_links.css" />
<!-- PageFixtures headcontent end -->
<!-- AppResources external_resources begin -->
<script type="text/javascript" src="/core/jig/1.15.6/js/jig.min.js"></script>
<!-- AppResources external_resources end -->
<!-- Page headcontent begin -->
<meta name="subsite" content="refseq" />
<meta name="path" content="refseq/functionalelements" />
<meta name="modified" content="2024-04-05T14:48:24Z" />
<!-- Page headcontent end -->
<!-- PageFixtures resources begin -->
<link xmlns="http://www.w3.org/1999/xhtml" type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218191/css/4207974/4206132.css" xml:base="http://127.0.0.1/sites/static/header_footer" />
<!-- PageFixtures resources end -->
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8B0C0C7C8015F10000000000CD0097.m_5" /><script type="text/javascript"><!--
var ScriptPath = '/portal/';
var objHierarchy = {"name":"PAFAppLayout","type":"Layout","realname":"PAFAppLayout",
"children":[{"name":"PAFAppLayout.AppController","type":"Cluster","realname":"PAFAppLayout.AppController",
"children":[{"name":"PAFAppLayout.AppController.AppResources","type":"Portlet","realname":"PAFAppLayout.AppController.AppResources","shortname":"AppResources"},
{"name":"PAFAppLayout.AppController.RequestProcessor","type":"Portlet","realname":"PAFAppLayout.AppController.RequestProcessor","shortname":"RequestProcessor"},
{"name":"PAFAppLayout.AppController.Controller","type":"Cluster","realname":"PAFAppLayout.AppController.Controller",
"children":[{"name":"PAFAppLayout.AppController.Controller.AnnotStatusStaticContentController","type":"Portlet","realname":"PAFAppLayout.AppController.Controller.AnnotStatusStaticContentController","shortname":"AnnotStatusStaticContentController"}]},
{"name":"PAFAppLayout.AppController.Page","type":"Cluster","realname":"PAFAppLayout.AppController.Page",
"children":[{"name":"PAFAppLayout.AppController.Page.PAFPageSelectorData","type":"Portlet","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFPageSelectorData","shortname":"PAFPageSelectorData"},
{"name":"PAFAppLayout.AppController.Page.PAFStaticPage","type":"Cluster","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFStaticPage",
"children":[{"name":"PAFAppLayout.AppController.Page.PAFStaticPage.MainPortlet","type":"Portlet","realname":"PAFAppLayout.AppController.Page.PAFPageSelector.PAFStaticPage.MainPortlet","shortname":"MainPortlet"}]}]},
{"name":"PAFAppLayout.AppController.PageFixtures","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.PageFixturesP","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.PageFixturesP","shortname":"PageFixturesP"},
{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar.SearchBarChooser","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar.SearchBarChooser","shortname":"SearchBarChooser"},
{"name":"PAFAppLayout.AppController.PageFixtures.SearchBar.PAFSearchBar","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.SearchBar.PAFSearchBar","shortname":"PAFSearchBar"}]},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter","type":"Cluster","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter",
"children":[{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIBreadcrumbs","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIBreadcrumbs","shortname":"NCBIBreadcrumbs"},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIHelpDesk","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIHelpDesk","shortname":"NCBIHelpDesk"},
{"name":"PAFAppLayout.AppController.PageFixtures.HeaderFooter.NCBIApplog_NoScript_Ping","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.PAFPageFixtures.HeaderFooter.NCBIApplog_NoScript_Ping","shortname":"NCBIApplog_NoScript_Ping"}]},
{"name":"PAFAppLayout.AppController.PageFixtures.DummyPortlet","type":"Portlet","realname":"PAFAppLayout.AppController.PageFixtures.DummyPortlet","shortname":"DummyPortlet"}]},
{"name":"PAFAppLayout.AppController.TemplateResources","type":"Cluster","realname":"PAFAppLayout.AppController.TemplateResources",
"children":[{"name":"PAFAppLayout.AppController.TemplateResources.StdNColResources","type":"Portlet","realname":"PAFAppLayout.AppController.TemplateResources.PAFTemplateResources.StdNColResources","shortname":"StdNColResources"}]},
{"name":"PAFAppLayout.AppController.Logger","type":"Portlet","realname":"PAFAppLayout.AppController.Logger","shortname":"Logger"},
{"name":"PAFAppLayout.AppController.DebugConsole","type":"Portlet","realname":"PAFAppLayout.AppController.DebugConsole","shortname":"DebugConsole"}]}]};
--></script>
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218658/css/4121862/3974050/3917732/251717/4218659/4218660/14534/45193/3534283/4128070/4062871.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218658/css/3529741.css" media="print" /><script type="text/javascript">
var ObjectLinks=[{i:0, ename: "p$ExL", esid:"*", sname: "p$ExL", ssid:"*", dname:"p$el", dsid:"0",m:"CopyValue",p:[],f: function(src, dst) {fn_CopyValue(src, dst);}}]
var ActiveNames = {"p$ExL":1};
</script></head>
<body class=" static">
<div class="grid">
<div class="col twelve_col nomargin shadow">
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
<div class="sysmessages">
<noscript>
<p class="nojs">
<strong>Warning:</strong>
The NCBI web site requires JavaScript to function.
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
</p>
</noscript>
</div>
<!--/.sysmessage-->
<div class="wrap">
<div class="page">
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" id="universal_header" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="usa-banner">
<div class="usa-accordion">
<header class="usa-banner-header">
<div class="usa-grid usa-banner-inner">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
<p>An official website of the United States government</p>
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
<span class="usa-banner-button-text">Here's how you know</span>
</button>
</div>
</header>
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
<div class="usa-banner-guidance-gov usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
<div class="usa-media_block-body">
<p>
<strong>The .gov means it's official.</strong>
<br />
Federal government websites often end in .gov or .mil. Before
sharing sensitive information, make sure you're on a federal
government site.
</p>
</div>
</div>
<div class="usa-banner-guidance-ssl usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
<div class="usa-media_block-body">
<p>
<strong>The site is secure.</strong>
<br />
The <strong>https://</strong> ensures that you are connecting to the
official website and that any information you provide is encrypted
and transmitted securely.
</p>
</div>
</div>
</div>
</div>
</section>
<div class="usa-overlay"></div>
<header class="ncbi-header" role="banner" data-section="Header">
<div class="usa-grid">
<div class="usa-width-one-whole">
<div class="ncbi-header__logo">
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
</a>
</div>
<div class="ncbi-header__account">
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
<span class="fa fa-user" aria-hidden="true">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
<g style="fill: #fff">
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
</g>
</svg>
</span>
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
<span class="sr-only">Show account info</span>
</button>
</div>
<div class="ncbi-popup-anchor">
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
<div class="ncbi-popup-head">
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
<span class="fa fa-times">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
</svg>
</span>
<span class="usa-sr-only">Close</span></button>
<h4>Account</h4>
</div>
<div class="account-user-info">
Logged in as:<br />
<b><span class="username" id="uname_long">username</span></b>
</div>
<div class="account-links">
<ul class="usa-unstyled-list">
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
</ul>
</div>
</div>
</div>
</div>
</div>
</header>
<div role="navigation" aria-label="access keys">
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
</div>
<section data-section="Alerts">
<div class="ncbi-alerts-placeholder"></div>
</section>
</div>
</div>
<!--/.header-->
<div class="header">
<div class="res_logo"><h1 class="res_name"><a href="/refseq/" title="RefSeq home">RefSeq</a></h1><h2 class="res_tagline">Integrated reference sequences</h2></div>
<div class="search"><form method="get" action="/refseq/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="refseq" selected="selected">RefSeq</option><option value="books">Books</option><option value="protein">Protein</option><option value="gene" class="last">Gene</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="refseq">RefSeq</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search RefSeq" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div><input type="hidden" name="p$a" id="p$a" /><input type="hidden" name="p$l" id="p$l" value="PAFAppLayout" /><input type="hidden" name="p$st" id="p$st" value="refseq" /><input name="SessionId" id="SessionId" value="CE8B5AF87C7FFCB1_0191SID" disabled="disabled" type="hidden" /><input name="Snapshot" id="Snapshot" value="/projects/refseq/refseq@1.21" disabled="disabled" type="hidden" /></form></div>
</div>
<div class="nav_and_browser">
</div>
<!-- was itemctrl -->
<div class="container">
<div id="maincontent" class="content col twelve_col last">
<div class="col1">
<h1 id="ncbi-refseq-functional-elements">NCBI RefSeq Functional Elements</h1>
<div class="toc">
<ul>
<li><a href="#Overview">Overview</a></li>
<li><a href="#Functional_Element_RefSeq_Records">RefSeq Functional Element Records</a><ul>
<li><a href="#Functional_Element_RefSeq_Feature_Annotation">RefSeq Functional Element Feature Annotation</a></li>
<li><a href="#Feature_Annotation_Glossary">Feature Annotation Glossary</a></li>
<li><a href="#cell_type_activity">Cell or Tissue Type Activity</a></li>
</ul>
</li>
<li><a href="#Interactions">Interaction Data</a></li>
<li><a href="#Data_Access">Data Access</a><ul>
<li><a href="#Access_via_Gene">Access via Gene</a></li>
<li><a href="#Access_via_Nucleotide">Access via Nucleotide</a></li>
<li><a href="#Access_via_BLAST">Access via BLAST</a></li>
<li><a href="#Access_via_BioProject">Access via BioProject</a></li>
<li><a href="#Access_via_NCBI_Graphical_Displays">Access via NCBI Graphical Displays</a></li>
<li><a href="#Access_via_Track_Hub">Access via the RefSeq Functional Elements Track Hub</a></li>
<li><a href="#Access_via_FTP">Access via FTP</a><ul>
<li><a href="#RefSeq_FTP">RefSeq FTP</a></li>
<li><a href="#Gene_FTP">Gene FTP</a></li>
<li><a href="#Genomes_FTP">Genomes FTP</a></li>
<li><a href="#Feature_table">Feature Table</a></li>
<li><a href="#Feat_extraction">Feature and Metadata Extraction Examples</a></li>
</ul>
</li>
</ul>
</li>
<li><a href="#References">References</a></li>
<li><a href="#Feedback">Feedback</a></li>
</ul>
</div>
<h2 id="Overview">Overview</h2>
<p>NCBI provides <a href="https://www.ncbi.nlm.nih.gov/refseq/">RefSeq</a> and <a href="https://www.ncbi.nlm.nih.gov/gene/">Gene</a> records for <strong>non-genic</strong> functional elements that have been described in the literature and are experimentally validated. Elements in scope include experimentally verified gene regulatory regions (e.g., enhancers, silencers, locus control regions), known structural elements (e.g., insulators, DNase I hypersensitive sites, matrix/scaffold-associated regions), well-characterized DNA replication origins, and clinically-significant sites of DNA recombination and genomic instability. Priority is given to genomic regions that are implicated in human disease or are otherwise of significant interest to the research community. Currently, the scope of this project is restricted to human and mouse. Our current scope does not include functional elements predicted from large-scale epigenomic mapping studies, nor elements that exist solely based on disease-associated variation.</p>
<p>Each RefSeq Functional Element (RefSeqFE) sequence has a corresponding record in NCBI's <a href="https://www.ncbi.nlm.nih.gov/gene/">Gene</a> database (see example in <a href="#Figure1">Figure 1</a>). NCBI Gene records for Functional Elements differ from conventional genes in that they have the Gene type 'biological region.' All Functional Element Gene records include a list and a graphical view of annotated feature types, a brief summary of the function of the region, a list of related INSDC accessions, and a comprehensive bibliography of relevant publications. A link to the orthologous human or mouse record is provided where appropriate.</p>
<p><img src="/core/assets/refseq/images/GeneSumm4_18.png" alt="Gene Summary" id="Figure1" /></p>
<p><strong>Figure 1.</strong> An example of an NCBI Gene record for a biological region (only the Summary section is shown here). Note that the 'Gene type' is 'biological region' and 'Feature type(s)' are listed.</p>
<h2 id="Functional_Element_RefSeq_Records">RefSeq Functional Element Records</h2>
<p>RefSeq Functional Element sequences are represented as follows:</p>
<ul>
<li>As genomic RefSeqs with an NG_ prefix and the 'RefSeqFE' keyword (e.g., <a href="https://www.ncbi.nlm.nih.gov/nuccore/NG_046887">NG_046887.1</a>)</li>
<li>As DNA sequences encompassing the genomic range of one or more experimentally-validated functional elements.</li>
<li>Based on the <strong>plus</strong> strand of the current human or mouse reference genome assembly, unless otherwise indicated.</li>
<li>With 100-nt padding on each end for extra genomic context.</li>
<li>Elements considered to be functionally related and closely situated in the genome are included together on the same NG_ record (e.g., an enhancer and contained protein-binding sites; multiple nearby enhancer and/or promoter fragments).</li>
<li>Experimentally-validated features are annotated on each sequence record through manual curation by NCBI RefSeq staff as described <a href="#Feature_Annotation_Glossary">below</a>.</li>
<li>Manually curated RefSeq Functional Element records have a REVIEWED status (see <a href="https://www.ncbi.nlm.nih.gov/refseq/about/">About RefSeq</a> for status descriptions).</li>
<li>Records generated through automatic bulk processing, such as the validated dataset from the <a href="https://enhancer.lbl.gov/">VISTA Enhancer Browser</a>, have a PROVISIONAL status.</li>
</ul>
<h3 id="Functional_Element_RefSeq_Feature_Annotation">RefSeq Functional Element Feature Annotation</h3>
<p>RefSeq Functional Element sequences include manually curated features in accordance with <a href="http://www.insdc.org/">International Nucleotide Sequence Database Collaboration</a> (INSDC) standards. Features that are supported by direct experimental evidence include at least one '/experiment' qualifier with an evidence code (ECO ID) from the <a href="http://www.evidenceontology.org/">Evidence &amp; Conclusion Ontology</a>, and at least one citation from <a href="https://www.ncbi.nlm.nih.gov/pubmed/">PubMed</a>. It is important to note that annotated sequence ranges may be approximate depending on the experimental evidence type, and that features may include extraneous sequences that are not necessary for function. Feature annotation can be viewed on the RefSeq Nucleotide flat file (<a href="#Figure2">Figure 2</a>), in the graphical view in Gene records (<a href="#Figure3">Figure 3</a>), and in NCBI genome browsers (see Access via NCBI Graphical Displays <a href="#Access_via_NCBI_Graphical_Displays">below</a>).</p>
<p><img src="/core/assets/refseq/images/FlatfileFEFeats_4-23.png" alt="Flat File Features" id="Figure2" /></p>
<p><strong>Figure 2.</strong> Example of a RefSeq Functional Element NG_ flat file and descriptions of feature annotation and common qualifiers.</p>
<p><img src="/core/assets/refseq/images/GraphicalView4_18.png" alt="Graphical View" id="Figure3" /></p>
<p><strong>Figure 3.</strong> Feature annotation in the Gene graphical display. Additional track sets, including conventional gene annotation, repeat region and variation tracks, may be exposed using the Tracks button (see <a href="https://www.ncbi.nlm.nih.gov/tools/sviewer/">NCBI Sequence Viewer Documentation</a>).</p>
<h3 id="Feature_Annotation_Glossary">Feature Annotation Glossary</h3>
<p>Features are annotated on RefSeq Functional Element NG_ records based on review of the scientific literature. Annotated features are in accord with INSDC <a href="http://www.insdc.org/documents/feature-table">Feature Table</a> specifications, where some INSDC feature keys have specific feature classes, e.g., the 'misc_recomb' and 'regulatory' feature keys. In addition, RefSeq-specific controlled vocabulary terms are sometimes used to provide further feature specificity, e.g., for 'misc_feature,' or 'misc_recomb' or 'regulatory' features that are not defined by a specific feature class. The feature keys, feature classes and controlled vocabularies can be mapped to equivalent terms in the <a href="http://www.sequenceontology.org/">Sequence Ontology</a> (SO), where those SO terms are used as SO_types for genome-annotated features in column 3 of NCBI GFF3 files (see the feature table <a href="#Feature_table">below</a>). The following feature types are used for RefSeq Functional Elements, with equivalent SO IDs shown in parentheses:</p>
<p><strong>misc_feature</strong></p>
<p>Used for functionally significant features that currently lack a more specific INSDC feature key. Controlled vocabularies are provided for additional feature specificity and to facilitate bulk search and retrieval. In GenBank flat files, controlled vocabulary terms are used at the beginning of a '/note' qualifier and are separated from any additional '/note' text by a semi-colon. Underscores replace spaces for the same terms in ASN.1 and GFF3 formats.</p>
<p><em>Flat file qualifier example</em>: /note="conserved region; ultraconserved element uc.328"</p>
<p>RefSeq controlled vocabularies for 'misc_feature':</p>
<blockquote>
<ul>
<li>biological_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0001411">SO:0001411</a>; <strong>Special note:</strong> This is a parental feature spanning all other feature annotation on each record. It is a 'misc_feature' denoted by '/note="Region: biological region"' in GenBank flat files, a 'Region' feature in ASN.1 format and a 'biological_region' in GFF3 format.</li>
<li>CAGE_cluster (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0001917">SO:0001917</a>)</li>
<li>conserved_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000330">SO:0000330</a>)</li>
<li>nucleotide_cleavage_site (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002204">SO:0002204</a>)</li>
<li>nucleotide_motif (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000714">SO:0000714</a>)</li>
<li>repeat_instability_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002202">SO:0002202</a>)</li>
<li>replication_start_site (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002203">SO:0002203</a>)</li>
<li>sequence_alteration (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0001059">SO:0001059</a>)</li>
<li>sequence_comparison (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002072">SO:0002072</a>)</li>
<li>sequence_feature (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000110">SO:0000110</a>)</li>
<li>transcription_start_site (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000315">SO:0000315</a>)</li>
</ul>
</blockquote>
<p><strong>misc_recomb</strong></p>
<p>Used for genomic regions known to undergo recombination events. See INSDC's <a href="http://www.insdc.org/controlled-vocabulary-recombination-class">Controlled vocabulary for recombination_class</a> for details on 'recombination_class' types.</p>
<p><em>Flat file qualifier example</em>: recombination_class="non_allelic_homologous"</p>
<p>INSDC 'recombination_class' types used for RefSeq Functional Elements:</p>
<blockquote>
<ul>
<li>chromosome_breakpoint (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0001021">SO:0001021</a>)</li>
<li>meiotic (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002155">SO:0002155</a>)</li>
<li>mitotic (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002154">SO:0002154</a>)</li>
<li>non_allelic_homologous (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002094">SO:0002094</a>)</li>
<li>other</li>
</ul>
</blockquote>
<p>RefSeq controlled vocabularies for recombination_class="other":</p>
<blockquote>
<ul>
<li>recombination_hotspot (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000298">SO:0000298</a>)</li>
</ul>
</blockquote>
<p><strong>misc_structure</strong> (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000002">SO:0000002</a>)</p>
<p>Used for miscellaneous structural regions that are considered functionally important, including G-quadruplex and cruciform structures. Additional details are provided in flat file '/note' qualifiers.</p>
<p><strong>mobile_element</strong> (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0001037">SO:0001037</a>)</p>
<p>Used for mobile elements, including transposable elements, retrotransposons and endogenous retroviruses, that are described in the literature as being functionally significant and/or represent genomic landmarks for a region. The repeat family (e.g., SINE:AluSg) is indicated in the '/mobile_element_type' qualifier. An '/inference' qualifier may be provided with a reference to a publicly accessible algorithm, e.g., RepeatMasker:4.0.5. Note that the vast majority of mobile elements located within the span of a RefSeq Functional Element NG_ will not be annotated. Mobile elements can be viewed in NCBI graphical displays by exposing the 'Repeats identified by RepeatMasker' track when viewing annotation on the genome.</p>
<p><strong>protein_bind</strong> (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000410">SO:0000410</a>)</p>
<p>Used where there is experimental evidence of <strong>direct protein binding</strong> to a DNA fragment, e.g., electrophoretic mobility shift assay (EMSA) or DNase I footprint evidence for binding of a specific protein, family, or complex. Predicted binding sites that lack experimental validation, or which are validated solely by chromatin immunoprecipitation, are not annotated. The annotated range is based on the experimental fragment described in the literature (e.g., an EMSA probe) and is typically longer than the core binding motif. The bound protein name (or protein family name) is provided in the '/bound_moiety' qualifier, where the protein name is the <a href="https://www.genenames.org/">HUGO Gene Nomenclature Committee</a> (HGNC) official full name for the encoding gene (e.g., <a href="https://www.ncbi.nlm.nih.gov/gene?cmd=Retrieve&amp;dopt=Graphics&amp;list_uids=10664">CTCF</a> is 'CCCTC-binding factor').</p>
<p><strong>regulatory</strong></p>
<p>See INSDC's <a href="http://www.insdc.org/controlled-vocabulary-regulatoryclass">Controlled vocabulary for regulatory_class</a> for details on 'regulatory_class' types. Typically, the annotated region corresponds to an experimentally-defined fragment that was found to be sufficient for function, e.g., a fragment used in a reporter assay. Annotated sequences may therefore include extraneous sequences that are not necessary for function.</p>
<p>Specific notes:</p>
<blockquote>
<ul>
<li>
<p>Short feature motifs (i.e., CAAT_signal, TATA_box, GC_signal) that lack direct experimental validation may occasionally be annotated when described in a publication as a significant genomic landmark.</p>
</li>
<li>
<p>DNase_I_hypersensitive_site ranges are determined on a case-by-case basis based on examination of published experimental evidence. They typically include generous padding equivalent to at least one nucleosome plus linker span (~200 nt) in addition to the determined core site. Closely situated sites that are difficult to resolve may be combined into a single annotated feature, as indicated in the '/note' qualifier.</p>
</li>
</ul>
</blockquote>
<p>INSDC 'regulatory_class' types used for RefSeq Functional Elements:</p>
<blockquote>
<ul>
<li>CAAT_signal (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000172">SO:0000172</a>)</li>
<li>DNase_I_hypersensitive_site (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000685">SO:0000685</a>)</li>
<li>enhancer (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000165">SO:0000165</a>)</li>
<li>enhancer_blocking_element (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002190">SO:0002190</a>)</li>
<li>GC_signal (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000173">SO:0000173</a>)</li>
<li>imprinting_control_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002191">SO:0002191</a>)</li>
<li>insulator (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000627">SO:0000627</a>)</li>
<li>locus_control_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000037">SO:0000037</a>)</li>
<li>matrix_attachment_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000036">SO:0000036</a>)</li>
<li>promoter (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000167">SO:0000167</a>)</li>
<li>response_element (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0002205">SO:0002205</a>)</li>
<li>replication_regulatory_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0001682">SO:0001682</a>)</li>
<li>silencer (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000625">SO:0000625</a>)</li>
<li>TATA_box (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000174">SO:0000174</a>)</li>
<li>transcriptional_cis_regulatory_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0001055">SO:0001055</a>)</li>
<li>other</li>
</ul>
</blockquote>
<p>RefSeq controlled vocabularies for regulatory_class="other":</p>
<blockquote>
<ul>
<li>epigenetically_modified_region (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0001720">SO:0001720</a>)</li>
<li>micrococcal_nuclease_hypersensitive_site (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0005836">SO:0005836</a>)</li>
</ul>
</blockquote>
<p><strong>repeat_region</strong></p>
<p>Used for tandem repeats, microsatellites, and other low-complexity repeats that are described in the literature as being functionally significant and/or represent genomic landmarks for a region. Note that not all low-complexity regions located within the span of a RefSeq Functional Element NG_ will be annotated. An '/inference' qualifier may be provided with a reference to a publicly accessible algorithm, e.g., RepeatMasker:4.0.5.</p>
<p>INSDC repeat types used for RefSeq Functional Elements:</p>
<blockquote>
<ul>
<li>minisatellite (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000643">SO:0000643</a>)</li>
<li>microsatellite (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000289">SO:0000289</a>)</li>
<li>direct_repeat (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000314">SO:0000314</a>)</li>
<li>dispersed_repeat (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000658">SO:0000658</a>)</li>
<li>inverted_repeat (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000294">SO:0000294</a>)</li>
<li>tandem_repeat (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000705">SO:00007050</a>)</li>
</ul>
</blockquote>
<p><strong>rep_origin</strong> (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000296">SO:0000296</a>)</p>
<p>Used for DNA replication origins that are well-supported and reproducibly observed in the literature. A '/direction' qualifier may be included when it is known that the origin fires in one or both directions.</p>
<p><strong>stem_loop</strong> (<a href="http://www.sequenceontology.org/browser/current_svn/term/SO:0000313">SO:0000313</a>)</p>
<p>Used prior to August 2020 for stem-loop regions that are considered functionally important. Note that stem-loop structures are formed by complementary pairing on the same strand and frequently correspond to cruciform-like structures in double-stranded DNA, thus these are now represented as misc_structure features.</p>
<h3 id="cell_type_activity">Cell or Tissue Type Activity</h3>
<p>Most RefSeqFE features include formatted cell/tissue type activity derived from the experimental evidence for the feature. Cell/tissue types are generally named as described in supporting publications, and may be appended with specific conditions, developmental states or additives needed for feature activity. Cell/tissue type activities are indicated in '/function' qualifiers in GenBank flat files, or in equivalent 'function=' attributes in GFF3 or bigBed files for genome-annotated features.</p>
<p><strong>General '/function' qualifier format for most curated features:</strong> </p>
<p>/function="free-text describing feature function {active_cell/tissue: cell type 1 | cell type 2}"</p>
<p><a href="https://www.ncbi.nlm.nih.gov/nucleotide/NG_055003">NG_055003.1</a> examples: </p>
<ul>
<li>/function="up-regulates the human A-gamma-globin promoter in transiently transfected K562 cells and in K562 and MEL cell colony assays {active_cell/tissue: K562 | MEL}" </li>
<li>/function="necessary for Hbb-y promoter activity and binds BKLF and EKLF in SCFA-induced MEL cells {active_cell/tissue: MEL(+SCFA)}"</li>
<li>/function="necessary for expression of the Hbb-y gene in embryonic yolk sac {active_cell/tissue: yolk sac(E10.5)}"</li>
</ul>
<p><strong>Specific formatting for VISTA enhancer features:</strong> </p>
<p>/function="enhancer in: tissue 1[score] | tissue 2[score]"</p>
<p><a href="https://www.ncbi.nlm.nih.gov/nuccore/NG_053556">NG_053556.1</a> example: </p>
<ul>
<li>/function="enhancer in: neural tube[8/9] | forebrain[9/9]"</li>
</ul>
<p>Formatting includes:</p>
<ul>
<li>Cell/tissue types listed within curly brackets and preceded by the string 'active_cell/tissue: ' for most cases, or preceded by 'enhancer in: ' for VISTA enhancer features.</li>
<li>Space-flanked pipe delimitation for multiple cell/tissue types. </li>
<li>Optional parentheses following a cell/tissue type to indicate specific conditions or developmental states. A plus sign indicates an additive needed for activity, e.g., a chemical used for cellular induction or a co-transfected factor necessary for activity. If a feature is active with and without an additive or condition, or if specific conditions for activity are not readily apparent from descriptions in cited publications, only the root cell/tissue name is provided. </li>
<li>Optional square brackets following a cell/tissue type to indicate activity scores. These are currently specific to VISTA enhancer features given the general non-quantitative nature of RefSeqFE features, which are based on a diverse range of evidence types from different sources. </li>
</ul>
<p>Limitations and notes for use: </p>
<ul>
<li>Cell/tissue type specificity may not be limited to the listed cell/tissue type(s), which are derived from publications cited for experimental evidence. This does not exclude possible feature activity in other cell/tissue types that were not tested in the cited evidence. </li>
<li>Lack of activity in a tested cell/tissue type is not included. </li>
<li>Markup for cell/tissue activity conditions may not be fully comprehensive, thus users should refer to cited publications for full details. </li>
<li>Not all features are in scope for cell/tissue activity markup, e.g., repeat or recombination features. </li>
<li>A minority of in-scope RefSeqFE features currently lack formatted markup, which will be gradually backfilled. </li>
</ul>
<p>Cell or tissue type activity data can be accessed as follows: </p>
<ul>
<li>From '/function' qualifiers on either individual RefSeqFE NG_ accessions obtained from the Nucleotide database (see <a href="#Access_via_Nucleotide">below</a>) or from bulk download of weekly updated GenBank flat files obtained from the RefSeq FTP site, as described <a href="#RefSeq_FTP">below</a>. </li>
<li>From 'function=' attributes in column 9 of GFF3 files downloaded from the Genomes FTP site, as described <a href="#Genomes_FTP">below</a>. Examples of RefSeqFE cell/tissue type activity data extraction from GFF3 files are included <a href="#gff_metadata">below</a>. </li>
<li>From a dedicated custom metadata column (#14) in 'FEfeats_AR##.bb' bigBed files produced from March 2023 onwards, obtained from the RefSeq FTP site as described <a href="#RefSeq_FTP">below</a>. Relevant cell/tissue types are provided in a pipe-delimited list per feature. Examples of RefSeqFE cell/tissue type activity data extraction from bigBedToBed-converted files are included <a href="#bb_metadata">below</a>. </li>
<li>From metadata popup boxes that display '/function' qualifier texts for individual features in <a href="#Access_via_NCBI_Graphical_Displays">NCBI graphical displays</a> and the <a href="#Access_via_Track_Hub">RefSeqFE track hub</a>.</li>
</ul>
<h2 id="Interactions">Interaction Data</h2>
<p>Non-genic biological regions are linked to target genes and other biological regions when there is experimental evidence for interactions. Interaction data is provided for regulatory interactions, typically between a gene regulatory element and a target gene, and for recombination partner interactions between recombination-type biological regions. Biological region ranges are based on parental 'biological_region' features that may include several underlying gene regulatory or other non-genic functional features, while target gene ranges are based on NCBI 'gene' feature annotations. Each interaction is supported by experimental data in the literature, and publications are provided in our bigInteract files described <a href="#bigInteract">below</a>.</p>
<p>Interaction data can be accessed as follows:</p>
<ul>
<li>
<p id="bigInteract">By download from the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/">RefSeq FTP</a> site in <a href="http://genome.ucsc.edu/goldenPath/help/interact.html">bigInteract</a> file format. Pairwise interactions with genomic coordinates are provided in the 'FEregintxns_AR##.inter.bb' and 'FErecombpartners_AR##.inter.bb' files, where '##' represents the NCBI annotation release identifier. These files are available at this <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/data/human/">link</a> for human, or at this <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/data/mouse/">link</a> for mouse. We recommend the use of data from the most recent annotation release directory. Separate bigInteract files are provided for regulatory interactions (both human and mouse) and recombination partners (human only). These files also include a custom metadata column (#19) containing a comma-delimited list of PubMed IDs with experimental support for each interaction.</p>
</li>
<li>
<p>By download from the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/">RefSeq FTP</a> site in <a href="http://genome.ucsc.edu/goldenPath/help/bigBed.html">bigBed</a> file format. Interactions are provided for biological regions in a custom metadata column (#13) in the 'FEbiolregions_AR##.bb' files as pipe-delimited lists of loci known to interact with the given biological region. These files are available from the same FTP directories indicated for the bigInteract files <a href="#bigInteract">above</a>. Additional information about 'FEbiolregions_AR##.bb' files can be found <a href="#bigBed">below</a>.</p>
</li>
<li>
<p>By download from the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/">RefSeq FTP</a> site in <a href="https://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html">GenBank flat file</a> format ('[human/mouse].biological_region.gbff.gz' files). RefSeq accessions for biological regions with validated functional interactions contain a '/function' qualifier listing loci that are known to interact with the biological region. The qualifier is present on the 'misc_feature' denoted as '/note="Region: biological region"'. For example, on <a href="https://www.ncbi.nlm.nih.gov/nuccore/NG_042043.1">NG_042043.1</a> the '/function' qualifier appears as: <br />
/function="regulatory_interactions: H19 | Igf2 | LOC105311846" <br />
The weekly-updated files are available at this <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/">link</a>. Alternatively, individual RefSeq accessions may be queried, viewed and downloaded in flat file format in the <a href="https://www.ncbi.nlm.nih.gov/nucleotide/">Nucleotide</a> database, as described <a href="#Access_via_Nucleotide">below</a>. </p>
</li>
<li>
<p>By download from the <a href="https://ftp.ncbi.nlm.nih.gov/genomes/">Genomes FTP</a> site in <a href="https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md">GFF3</a> file format. Relevant 'biological_region' features have a 'function' attribute in column 9, where interacting loci are indicated as described for GenBank flat files above. GFF3 file availability is described <a href="#Genomes_FTP">below</a>.</p>
</li>
<li>
<p>By graphical visualization from the RefSeq Functional Elements track hub (RefSeqFE Hub). Interaction data may be viewed in the regulatory interactions and recombination partner tracks, as described for our track hub <a href="#Access_via_Track_Hub">below</a> and shown in <a href="#Figure5">Figure 5</a>.</p>
</li>
</ul>
<h2 id="Data_Access">Data Access</h2>
<p>RefSeq Functional Element records can be accessed via the following NCBI resources:</p>
<ul>
<li><a href="#Access_via_Gene">Gene</a></li>
<li><a href="#Access_via_Nucleotide">Nucleotide</a></li>
<li><a href="#Access_via_BLAST">BLAST</a></li>
<li><a href="#Access_via_BioProject">BioProject</a></li>
<li><a href="#Access_via_NCBI_Graphical_Displays">NCBI Graphical Displays</a></li>
<li><a href="#Access_via_Track_Hub">RefSeq Functional Elements Track Hub</a></li>
<li><a href="#Access_via_FTP">FTP</a></li>
</ul>
<h3 id="Access_via_Gene">Access via Gene</h3>
<p>RefSeq Functional Element records can be found in Gene by searching in a variety of ways, including by record names or symbols, associated PubMed IDs, accession IDs, annotated chromosome and base locations, organism, text words, and properties (e.g., genetype biological region[prop]). Additional options can be found in the <a href="https://www.ncbi.nlm.nih.gov/gene/advanced">Gene Advanced Search Builder</a>. Results can be further filtered by selecting side facets in search results pages. See the <a href="https://www.ncbi.nlm.nih.gov/books/NBK3841/#EntrezGene">Gene Help</a> document for more information on querying Gene.</p>
<p><em>Example queries</em>:</p>
<p>To find all RefSeq Functional Element records in human:
<a href="https://www.ncbi.nlm.nih.gov/gene/?term=genetype+biological+region%5Bprop%5D+AND+homo+sapiens%5Borgn%5D">genetype biological region[prop] AND homo sapiens[orgn]</a></p>
<p>To find named recombination regions in human:
<a href="https://www.ncbi.nlm.nih.gov/gene/?term=genetype+biological+region%5Bprop%5D+AND+homo+sapiens%5Borgn%5D+AND+recombination+region%5Bgene%2Fprotein+name%5D">genetype biological region[prop] AND homo sapiens[orgn] AND recombination region[gene/protein name]</a></p>
<p>To find human records that include a locus control region feature:
<a href="https://www.ncbi.nlm.nih.gov/gene/?term=genetype+biological+region%5Bprop%5D+AND+homo+sapiens%5Borgn%5D+AND+feattype+locus+control+region%5Bprop%5D">genetype biological region[prop] AND homo sapiens[orgn] AND feattype locus control region[prop]</a></p>
<p>To find VISTA enhancer records in mouse:
<a href="https://www.ncbi.nlm.nih.gov/gene/?term=genetype+biological+region%5Bprop%5D+AND+mus+musculus%5Borgn%5D+AND+VISTA*%5Bgene%2Fprotein+name%5D">genetype biological region[prop] AND mus musculus[orgn] AND VISTA*[gene/protein name]</a></p>
<p>To find human Functional Element records associated with coronavirus biology:
<a href="https://www.ncbi.nlm.nih.gov/gene/?term=coronavirus+related%5Bfilter%5D+AND+genetype+biological+region%5Bproperties%5D">coronavirus related[filter] AND genetype biological region[properties]</a> <br />
Or find subsets of those records involved in particular processes with:</p>
<blockquote>
<ul>
<li><a href="https://www.ncbi.nlm.nih.gov/gene/?term=%22involved+in+host+gene+regulation%22%5Btext+word%5D">"involved in host gene regulation"[text word]</a></li>
<li><a href="https://www.ncbi.nlm.nih.gov/gene/?term=%22involved+in+host+gene+recombination%22%5Btext+word%5D">"involved in host gene recombination"[text word]</a></li>
</ul>
</blockquote>
<h3 id="Access_via_Nucleotide">Access via Nucleotide</h3>
<p>The <a href="https://www.ncbi.nlm.nih.gov/nuccore/">Nucleotide</a> database displays RefSeq Functional Element records in GenBank flat file format by default. See the <a href="https://www.ncbi.nlm.nih.gov/books/NBK44863/">Entrez Sequences Help</a> document for details on Nucleotide record display options, including instructions on how to retrieve FASTA sequences for specific features annotated on each RefSeq.</p>
<p>RefSeq Functional Element records can be queried in the Nucleotide database in several ways, including by names, symbols, accession ID, organism or associated publications. See the <a href="https://www.ncbi.nlm.nih.gov/nuccore/advanced">Nucleotide Advanced Search Builder</a> for further options. Queries by the 'RefSeqFE' keyword, by BioProject ID (see <a href="#Access_via_BioProject">Access via BioProject</a> below) and by Feature key are particularly useful for retrieving Functional Element RefSeqs.</p>
<p><em>Example queries</em>:</p>
<p>To retrieve all human Functional Element RefSeqs using the 'RefSeqFE' keyword:
<a href="https://www.ncbi.nlm.nih.gov/nuccore/?term=homo+sapiens%5Borgn%5D+AND+RefSeqFE%5Bkeyword%5D">homo sapiens[orgn] AND RefSeqFE[keyword]</a> <br />
Or alternatively by using the PRJNA343958 BioProject accession:
<a href="https://www.ncbi.nlm.nih.gov/nuccore/?term=homo+sapiens%5Borgn%5D+AND+PRJNA343958%5Bbioproject%5D">homo sapiens[orgn] AND PRJNA343958[bioproject]</a></p>
<p>To retrieve mouse Functional Element RefSeqs with an annotated enhancer feature:
<a href="https://www.ncbi.nlm.nih.gov/nuccore/?term=regulatory+enhancer%5Bfeature+key%5D+AND+mus+musculus%5Borgn%5D+AND+RefSeqFE%5Bkeyword%5D">regulatory enhancer[feature key] AND mus musculus[orgn] AND RefSeqFE[keyword]</a></p>
<p>To retrieve human Functional Element RefSeqs with an annotated rep_origin feature:
<a href="https://www.ncbi.nlm.nih.gov/nuccore/?term=rep+origin%5Bfeature+key%5D+AND+homo+sapiens%5Borgn%5D+AND+RefSeqFE%5Bkeyword%5D">rep origin[feature key] AND homo sapiens[orgn] AND RefSeqFE[keyword]</a></p>
<p>To retrieve human Functional Element RefSeqs associated with coronavirus biology:
<a href="https://www.ncbi.nlm.nih.gov/nuccore/?term=coronavirus+related%5Bprop%5D+AND+biomol+genomic%5Bprop%5D">coronavirus related[prop] AND biomol genomic[prop]</a> <br />
Or find subsets of those records involved in particular processes with:</p>
<blockquote>
<ul>
<li><a href="https://www.ncbi.nlm.nih.gov/nuccore/?term=%22involved+in+host+gene+regulation%22%5Btext+word%5D">"involved in host gene regulation"[text word]</a></li>
<li><a href="https://www.ncbi.nlm.nih.gov/nuccore/?term=%22involved+in+host+gene+recombination%22%5Btext+word%5D">"involved in host gene recombination"[text word]</a></li>
</ul>
</blockquote>
<h3 id="Access_via_BLAST">Access via BLAST</h3>
<p>RefSeq Functional Element sequences are in NCBI's Nucleotide database, thus matching RefSeqs can be retrieved through <a href="https://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastn&amp;PAGE_TYPE=BlastSearch&amp;LINK_LOC=blasthome">Nucleotide BLAST</a> sequence searches when the following options are selected in the 'Choose Search Set' area:</p>
<ul>
<li>Standard databases (nr etc.) -- radio button selection</li>
<li>Nucleotide collection (nr/nt) -- pull-down menu selection</li>
<li>Organism -- entering an organism name (human or mouse) is optional but will yield organism-specific results with faster searching</li>
</ul>
<h3 id="Access_via_BioProject">Access via BioProject</h3>
<p>All RefSeq Functional Elements are represented in <a href="https://www.ncbi.nlm.nih.gov/bioproject">BioProject</a> accession <a href="https://www.ncbi.nlm.nih.gov/bioproject/?term=PRJNA343958">PRJNA343958</a>. Sequence records can be retrieved from links within the 'Project Data' section. Nucleotide database queries can be appended with that BioProject accession for retrieval of RefSeq Functional Element records (see <a href="#Access_via_Nucleotide">Access via Nucleotide</a> example queries above).</p>
<h3 id="Access_via_NCBI_Graphical_Displays">Access via NCBI Graphical Displays</h3>
<p>All genome-annotated features from RefSeq Functional Elements can be viewed by turning on the 'Biological regions' track available in the 'Genes/Products' track group and 'NCBI Other Features' category in NCBI graphical displays, including the <a href="https://www.ncbi.nlm.nih.gov/genome/gdv/">Genome Data Viewer</a>, <a href="https://www.ncbi.nlm.nih.gov/projects/sviewer/">Sequence Viewer</a>, <a href="https://www.ncbi.nlm.nih.gov/variation/view/">Variation Viewer</a> and graphical images in Gene records. Note that the 'Biological regions' track may be viewable by default depending on the NCBI browser. For example, it will be on view in Gene records when RefSeq features are annotated on the reference genome, or in the Genome Data Viewer or Variation Viewer when the 'Genes' track set is selected under 'NCBI Recommended Track Sets', but it may be necessary to turn on the track if other track sets or the default tracks are selected. The track can be turned on under Tracks -&gt; Configure Tracks -&gt; Genes/Products -&gt; Category: NCBI Other Features, where selection of the most recent 'Biological regions, aggregate' track is recommended; see the <a href="#Figure4">Figure 4</a> track configuration interface. Note that the 'Biological regions' track does not include overlapping or nearby conventional gene annotations, which can be found in the 'Genes' track (<a href="#Figure3">Figure 3</a>). Similarly, users should refer to 'Variation' type tracks to see overlapping variation features, e.g., dbSNP, ClinVar or dbVar tracks. If the RefSeq has not yet been annotated on the genome, only the RefSeq (NG_ accession) sequence will be available for graphical viewing. For RefSeq graphical images, individual feature types, if not already viewable by default, may be viewed by turning on desired track types in the 'Features' track group (e.g., 'regulatory' Features, 'protein_bind' Features).</p>
<p><em>Graphical view example for HBB-LCR, GeneID:109580095:</em>
<a href="https://go.usa.gov/xf9ea">https://go.usa.gov/xf9ea</a></p>
<p><img src="/core/assets/refseq/images/Fig4ConfigPg_8-19.png" alt="Configure Page" id="Figure4" /></p>
<p><strong>Figure 4.</strong> NCBI genome browser track configuration dialog box. The most recent 'Biological regions' track can be selected in the 'Genes/Products' track group and 'NCBI Other Features' category.</p>
<h3 id="Access_via_Track_Hub">Access via the RefSeq Functional Elements Track Hub</h3>
<p>The RefSeq Functional Elements track hub (RefSeqFE Hub) includes tracks for biological regions, features, regulatory interactions and recombination partners. The hub is in <a href="http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html">UCSC track hub</a> format and can be viewed on a compatible genome browser, including the <a href="http://genome.ucsc.edu/cgi-bin/hgGateway">UCSC Genome Browser</a> (all tracks and metadata, see <a href="#Figure5">Figure 5</a> example), the NCBI <a href="https://www.ncbi.nlm.nih.gov/genome/gdv/">Genome Data Viewer</a> (biological region and feature tracks only; metadata is best viewed in the existing 'Biological regions' track described <a href="#Access_via_NCBI_Graphical_Displays">above</a>) or the <a href="http://www.ensembl.org/">Ensembl genome browser</a> (biological region and feature tracks only). </p>
<p>Use the following URL to connect to the RefSeqFE Hub: <span style="color:blue">https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/hub.txt</span></p>
<p>Please see additional information in the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/RefSeqFE_Hub.html">track hub document</a>. Briefly, the following tracks are represented:</p>
<ul>
<li>
<p>RefSeq Functional Element biological regions (FE_biol_regions): Biological regions with metadata from the 'FEbiolregions_AR##.bb' bigBed file described <a href="#bigBed">below</a>. See the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/hg38/RefSeqFE_biolregions.html">biological regions track document</a> for more details.</p>
</li>
<li>
<p>RefSeq Functional Element features (FE_features): Functional features with metadata from the 'FEfeats_AR##.bb' bigBed file described <a href="#bigBed">below</a>. See the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/hg38/RefSeqFE_features.html">features track document</a> for more details.</p>
</li>
<li>
<p>RefSeq Functional Element recombination partners (Recomb_partners; currently human only): Recombination interactions from the 'FErecombpartners_AR##.inter.bb' bigInteract file described <a href="#bigInteract">above</a>. See the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/hg38/RefSeqFE_recombination_partners.html">recombination partners track document</a> for more details.</p>
</li>
<li>
<p>RefSeq Functional Element regulatory interactions (Reg_interactions): Regulatory interactions from the 'FEregintxns_AR##.inter.bb' bigInteract file described <a href="#bigInteract">above</a>. See the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/hg38/RefSeqFE_regulatory_interactions.html">regulatory interactions track document</a> for more details.</p>
</li>
</ul>
<p><strong>Note on track display in other genome browsers:</strong> While not all commonly used genome browsers are compatible with UCSC-formatted track hubs, many browsers, including those mentioned above, may offer options to view RefSeq Functional Element tracks by remote connection from file URLs, or as custom tracks based on downloaded files saved locally. These browsers will vary in their ability to accept bigBed or bigInteract files, or to display color-coding or metadata, and some may require the conversion of binary files to non-binary formats. Please check the relevant genome browser documentation for futher information.</p>
<p><img src="/core/assets/refseq/images/Fig5FETrackHub_7-20.png" alt="Configure Page" id="Figure5" /></p>
<p><strong>Figure 5.</strong> UCSC Genome Browser image displaying tracks from the RefSeqFE Hub at the human alpha-globin locus.</p>
<h3 id="Access_via_FTP">Access via FTP</h3>
<p>RefSeq Functional Elements are available for FTP download in the <a href="https://ftp.ncbi.nlm.nih.gov/">NCBI FTP site</a>, including the following specific subsites:</p>
<h4 id="RefSeq_FTP"><a href="https://ftp.ncbi.nlm.nih.gov/refseq/">RefSeq FTP</a></h4>
<p>All RefSeq records including those not yet annotated on a genome assembly. Includes a <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/">FunctionalElements directory</a> with the following content: </p>
<p>Weekly updated RefSeq accession files: </p>
<blockquote>
<ul>
<li>[human/mouse].biological_region.fna.gz -- RefSeq accessions for genomic biological regions (NG_ prefix) in FASTA format </li>
<li>[human/mouse].biological_region.gbff.gz -- RefSeq accessions for genomic biological regions (NG_ prefix) in GenBank flatfile format </li>
</ul>
</blockquote>
<p>These files can also be found in species-specific 'biological_region' directories for <a href="https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/biological_region/">human</a> and <a href="https://ftp.ncbi.nlm.nih.gov/refseq/M_musculus/biological_region/">mouse</a>.</p>
<p><a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/">trackhub directory</a>: </p>
<p>Files and subdirectories necessary to support the RefSeq Functional Elements track hub described <a href="#Access_via_Track_Hub">above</a>. See <a href="http://genome.ucsc.edu/goldenPath/help/hgTrackHubHelp.html">UCSC track hub documentation</a> for an explanation of track hub structure and the necessary components. The following data files are provided within the <a href="https://ftp.ncbi.nlm.nih.gov/refseq/FunctionalElements/trackhub/data/">data subdirectory</a> and the species-specific annotation release (AR##) subdirectories therein: </p>
<ul>
<li>
<p id="bigBed">Genome-annotated biological region and feature files in <a href="http://genome.ucsc.edu/goldenPath/help/bigBed.html">bigBed</a> format: </p>
<ul>
<li>FEbiolregions_AR##.bb -- biological regions with metadata, provided as binary indexed BED 9+8<sup>1</sup> files where columns 1-9 are in standard BED format, with eight additional custom columns representing: <ul>
<li>Column 10: the <a href="https://www.ncbi.nlm.nih.gov/gene/">Gene</a> database identifier cross-reference </li>
<li>Column 11: the locus name </li>
<li>Column 12: the locus summary </li>
<li>Column 13: known interacting loci, regulatory interactions or recombination partners as described <a href="#Interactions">above</a> </li>
<li>Column 14: the RefSeq accession ID </li>
<li>Column 15: symbol aliases; empty if not applicable <sup>2</sup></li>
<li>Column 16: name aliases; empty if not applicable <sup>2</sup></li>
<li>Column 17: supporting publications from PubMed; includes all publications associated with the biological region <sup>2</sup></li>
</ul>
</li>
</ul>
<p><sup>1</sup> BED 9+5 format for biological region bigBed files predating February 2024 <br />
<sup>2</sup> only present in biological region bigBed files produced from February 2024 onwards</p>
<p>Note that the locus symbol is used for the name (column 4). These files use UCSC-style chromosome notation (column 1), except for human T2T-CHM13v2.0 assembly files, which use GenBank accession notation.</p>
<ul>
<li>FEfeats_AR##.bb -- functional features with metadata, provided as binary indexed BED 9+5<sup>1</sup> files where columns 1-9 are in standard BED format, with five additional custom columns representing: <ul>
<li>Column 10: database cross-references, including to the <a href="https://www.ncbi.nlm.nih.gov/gene/">Gene</a> record and other database records if applicable </li>
<li>Column 11: functional and descriptive information including experimental evidence and publication support </li>
<li>Column 12: supporting publications from PubMed; publications with evidence for the specific feature <sup>2</sup></li>
<li>Column 13: the RefSeq accession ID <sup>3</sup> </li>
<li>Column 14: cell or tissue type activity <sup>4</sup> </li>
<li>Column 15: <a href="https://www.ncbi.nlm.nih.gov/snp/">dbSNP</a> cross-reference for functionally validated genetic variants corresponding to the RefSeqFE feature, if applicable <sup>5</sup> </li>
</ul>
</li>
</ul>
<p><sup>1</sup> BED 9+3 format for the human <code>FEfeats_AR109.20200522.bb</code> and mouse <code>FEfeats_AR108.20200622.bb</code> files, BED 9+4 format for subsequent annotation release files predating March 2023 <br />
<sup>2</sup> not included in the human <code>FEfeats_AR109.20200522.bb</code> and mouse <code>FEfeats_AR108.20200622.bb</code> files; publications can be extracted from column 11 <br />
<sup>3</sup> this is column 12 in the human <code>FEfeats_AR109.20200522.bb</code> and mouse <code>FEfeats_AR108.20200622.bb</code> files <br />
<sup>4</sup> only present in feature bigBed files produced from March 2023 onwards <br />
<sup>5</sup> only present in feature bigBed files produced from October 2023 onwards, human only, including for T2T-CHM13v2.0 assembly features </p>
<p>Note that the name (column 4) is in the format <code>&lt;SO_type&gt;_&lt;GeneID&gt;</code> for most features, where the 'SO_type' is the 'GFF3 column 3 SO_type' in the <a href="#Feature_table">feature table</a> below. Protein binding site features are labeled in the format <code>protein_binding_site_&lt;bound_moiety&gt;_&lt;GeneID&gt;</code>, where the bound_moiety is the HGNC official full name for the encoding gene. These files use UCSC-style chromosome notation (column 1), except for human T2T-CHM13v2.0 assembly files, which use GenBank accession notation. </p>
</li>
<li>
<p>Pairwise interaction data files in <a href="http://genome.ucsc.edu/goldenPath/help/interact.html">bigInteract</a> format, provided as binary indexed BED 5+14 files where columns 1-18 are in standard bigInteract format, and column 19 is a custom column listing supporting publications: </p>
<ul>
<li>FErecombpartners_AR##.inter.bb -- recombination interactions as described <a href="#bigInteract">above</a> </li>
<li>FEregintxns_AR##.inter.bb -- regulatory interactions as described <a href="#bigInteract">above</a> </li>
</ul>
<p>These files use UCSC-style chromosome notation (columns 1, 9 and 14), except for human T2T-CHM13v2.0 assembly files, which use GenBank accession notation.</p>
</li>
</ul>
<p>All of the bigBed and bigInteract files can also be found in species-specific 'biological_region' directories and annotation release (AR##) subdirectories therein for <a href="https://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/biological_region/human_data/">human</a> and <a href="https://ftp.ncbi.nlm.nih.gov/refseq/M_musculus/biological_region/mouse_data/">mouse</a>.</p>
<h4 id="Gene_FTP"><a href="https://ftp.ncbi.nih.gov/gene/">Gene FTP</a></h4>
<p>All RefSeqs and associated Gene data, and genomic context if annotated on a genome assembly. See the Gene <a href="https://ftp.ncbi.nlm.nih.gov/gene/README">README file</a> for a description of the Gene FTP directory contents.</p>
<h4 id="Genomes_FTP"><a href="https://ftp.ncbi.nlm.nih.gov/genomes/">Genomes FTP</a></h4>
<p>Genome-annotated feature data for <a href="https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Homo_sapiens/latest_assembly_versions/">human</a> and <a href="https://ftp.ncbi.nlm.nih.gov/genomes/refseq/vertebrate_mammalian/Mus_musculus/latest_assembly_versions/">mouse</a> provided in <a href="https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md">GFF3</a> format. See the Feature Annotation Glossary <a href="#Feature_Annotation_Glossary">above</a> for descriptions of RefSeq Functional Element feature types. These GFF3 files use RefSeq accession chromosome notation (column 1). Effective as of human Updated Annotation Release 109.20200522 and mouse Updated Annotation Release 108.20200622, RefSeq Functional Element features are indicated by the 'RefSeqFE' source in column 2 of NCBI GFF3 files.</p>
<p>For extracting specific feature types from GFF3 files, please note that equivalent <a href="http://www.sequenceontology.org/">Sequence Ontology</a> (SO) terms are used as SO_types in column 3. Alternatively, specific feature types may be extracted from GFF3 files based on feature key, feature class or controlled vocabulary attributes in column 9. The following table shows how features are indicated in columns 3 and 9 of GFF3 files:</p>
<h4 id="Feature_table">Feature Table</h4>
<table>
<thead>
<tr>
<th align="left">INSDC feature</th>
<th align="left">Feature class or controlled vocabulary</th>
<th align="left">SO ID</th>
<th align="left">GFF3 column 3 SO_type</th>
<th align="left">GFF3 column 9 specific attribute(s)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">misc_feature</td>
<td align="left">biological_region</td>
<td align="left">SO:0001411</td>
<td align="left">biological_region</td>
<td align="left">gbkey=Region</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">CAGE_cluster</td>
<td align="left">SO:0001917</td>
<td align="left">CAGE_cluster</td>
<td align="left">feat_class=CAGE_cluster</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">conserved_region</td>
<td align="left">SO:0000330</td>
<td align="left">conserved_region</td>
<td align="left">feat_class=conserved_region</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">nucleotide_cleavage_site</td>
<td align="left">SO:0002204</td>
<td align="left">nucleotide_cleavage_site</td>
<td align="left">feat_class=nucleotide_cleavage_site</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">nucleotide_motif</td>
<td align="left">SO:0000714</td>
<td align="left">nucleotide_motif</td>
<td align="left">feat_class=nucleotide_motif</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">repeat_instability_region</td>
<td align="left">SO:0002202</td>
<td align="left">repeat_instability_region</td>
<td align="left">feat_class=repeat_instability_region</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">replication_start_site</td>
<td align="left">SO:0002203</td>
<td align="left">replication_start_site</td>
<td align="left">feat_class=replication_start_site</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">sequence_alteration</td>
<td align="left">SO:0001059</td>
<td align="left">sequence_alteration</td>
<td align="left">feat_class=sequence_alteration</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">sequence_comparison</td>
<td align="left">SO:0002072</td>
<td align="left">sequence_comparison</td>
<td align="left">feat_class=sequence_comparison</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">sequence_feature</td>
<td align="left">SO:0000110</td>
<td align="left">sequence_feature</td>
<td align="left">feat_class=sequence_feature</td>
</tr>
<tr>
<td align="left">misc_feature</td>
<td align="left">transcription_start_site</td>
<td align="left">SO:0000315</td>
<td align="left">TSS</td>
<td align="left">feat_class=transcription_start_site</td>
</tr>
<tr>
<td align="left">misc_recomb</td>
<td align="left">chromosome_breakpoint</td>
<td align="left">SO:0001021</td>
<td align="left">chromosome_breakpoint</td>
<td align="left">recombination_class=chromosome_breakpoint</td>
</tr>
<tr>
<td align="left">misc_recomb</td>
<td align="left">meiotic</td>
<td align="left">SO:0002155</td>
<td align="left">meiotic_recombination_region</td>
<td align="left">recombination_class=meiotic</td>
</tr>
<tr>
<td align="left">misc_recomb</td>
<td align="left">mitotic</td>
<td align="left">SO:0002154</td>
<td align="left">mitotic_recombination_region</td>
<td align="left">recombination_class=mitotic</td>
</tr>
<tr>
<td align="left">misc_recomb</td>
<td align="left">non_allelic_homologous</td>
<td align="left">SO:0002094</td>
<td align="left">non_allelic_homologous_recombination_region</td>
<td align="left">recombination_class=non_allelic_homologous</td>
</tr>
<tr>
<td align="left">misc_recomb</td>
<td align="left">recombination_hotspot</td>
<td align="left">SO:0000298</td>
<td align="left">recombination_feature</td>
<td align="left">recombination_class=recombination_hotspot</td>
</tr>
<tr>
<td align="left">misc_structure</td>
<td align="left">n/a</td>
<td align="left">SO:0000002</td>
<td align="left">sequence_secondary_structure</td>
<td align="left">gbkey=misc_structure</td>
</tr>
<tr>
<td align="left">mobile_element</td>
<td align="left">n/a</td>
<td align="left">SO:0001037</td>
<td align="left">mobile_genetic_element</td>
<td align="left">gbkey=mobile_element</td>
</tr>
<tr>
<td align="left">protein_bind</td>
<td align="left">n/a</td>
<td align="left">SO:0000410</td>
<td align="left">protein_binding_site</td>
<td align="left">gbkey=protein_bind or bound_moiety=</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">CAAT_signal</td>
<td align="left">SO:0000172</td>
<td align="left">CAAT_signal</td>
<td align="left">regulatory_class=CAAT_signal</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">DNase_I_hypersensitive_site</td>
<td align="left">SO:0000685</td>
<td align="left">DNaseI_hypersensitive_site<sup>1</sup></td>
<td align="left">regulatory_class=DNase_I_hypersensitive_site</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">enhancer</td>
<td align="left">SO:0000165</td>
<td align="left">enhancer</td>
<td align="left">regulatory_class=enhancer</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">enhancer_blocking_element</td>
<td align="left">SO:0002190</td>
<td align="left">enhancer_blocking_element</td>
<td align="left">regulatory_class=enhancer_blocking_element</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">GC_signal</td>
<td align="left">SO:0000173</td>
<td align="left">GC_rich_promoter_region</td>
<td align="left">regulatory_class=GC_signal</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">imprinting_control_region</td>
<td align="left">SO:0002191</td>
<td align="left">imprinting_control_region</td>
<td align="left">regulatory_class=imprinting_control_region</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">insulator</td>
<td align="left">SO:0000627</td>
<td align="left">insulator</td>
<td align="left">regulatory_class=insulator</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">locus_control_region</td>
<td align="left">SO:0000037</td>
<td align="left">locus_control_region</td>
<td align="left">regulatory_class=locus_control_region</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">matrix_attachment_region</td>
<td align="left">SO:0000036</td>
<td align="left">matrix_attachment_site</td>
<td align="left">regulatory_class=matrix_attachment_region</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">epigenetically_modified_region</td>
<td align="left">SO:0001720</td>
<td align="left">epigenetically_modified_region</td>
<td align="left">regulatory_class=epigenetically_modified_region</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">micrococcal_nuclease_hypersensitive_site</td>
<td align="left">SO:0005836</td>
<td align="left">regulatory_region</td>
<td align="left">regulatory_class=micrococcal_nuclease_hypersensitive_site</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">promoter</td>
<td align="left">SO:0000167</td>
<td align="left">promoter</td>
<td align="left">regulatory_class=promoter</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">response_element</td>
<td align="left">SO:0002205</td>
<td align="left">response_element</td>
<td align="left">regulatory_class=response_element</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">replication_regulatory_region</td>
<td align="left">SO:0001682</td>
<td align="left">replication_regulatory_region</td>
<td align="left">regulatory_class=replication_regulatory_region</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">silencer</td>
<td align="left">SO:0000625</td>
<td align="left">silencer</td>
<td align="left">regulatory_class=silencer</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">TATA_box</td>
<td align="left">SO:0000174</td>
<td align="left">TATA_box</td>
<td align="left">regulatory_class=TATA_box</td>
</tr>
<tr>
<td align="left">regulatory</td>
<td align="left">transcriptional_cis_regulatory_region</td>
<td align="left">SO:0001055</td>
<td align="left">transcriptional_cis_regulatory_region</td>
<td align="left">regulatory_class=transcriptional_cis_regulatory_region</td>
</tr>
<tr>
<td align="left">repeat_region</td>
<td align="left">minisatellite</td>
<td align="left">SO:0000643</td>
<td align="left">minisatellite</td>
<td align="left">satellite=minisatellite</td>
</tr>
<tr>
<td align="left">repeat_region</td>
<td align="left">microsatellite</td>
<td align="left">SO:0000289</td>
<td align="left">microsatellite</td>
<td align="left">satellite=microsatellite</td>
</tr>
<tr>
<td align="left">repeat_region</td>
<td align="left">direct_repeat</td>
<td align="left">SO:0000314</td>
<td align="left">direct_repeat</td>
<td align="left">rpt_type=direct</td>
</tr>
<tr>
<td align="left">repeat_region</td>
<td align="left">dispersed_repeat</td>
<td align="left">SO:0000658</td>
<td align="left">dispersed_repeat</td>
<td align="left">rpt_type=dispersed</td>
</tr>
<tr>
<td align="left">repeat_region</td>
<td align="left">inverted_repeat</td>
<td align="left">SO:0000294</td>
<td align="left">inverted_repeat</td>
<td align="left">rpt_type=inverted</td>
</tr>
<tr>
<td align="left">repeat_region</td>
<td align="left">n/a</td>
<td align="left">SO:0000001</td>
<td align="left">region</td>
<td align="left">gbkey=repeat_region</td>
</tr>
<tr>
<td align="left">repeat_region</td>
<td align="left">n/a</td>
<td align="left">SO:0000657</td>
<td align="left">repeat_region</td>
<td align="left">gbkey=repeat_region</td>
</tr>
<tr>
<td align="left">repeat_region</td>
<td align="left">tandem_repeat</td>
<td align="left">SO:0000705</td>
<td align="left">tandem_repeat</td>
<td align="left">rpt_type=tandem</td>
</tr>
<tr>
<td align="left">rep_origin</td>
<td align="left">n/a</td>
<td align="left">SO:0000296</td>
<td align="left">origin_of_replication</td>
<td align="left">gbkey=rep_origin</td>
</tr>
<tr>
<td align="left">stem_loop<sup>2</sup></td>
<td align="left">n/a</td>
<td align="left">SO:0000313</td>
<td align="left">stem_loop<sup>2</sup></td>
<td align="left">gbkey=stem_loop<sup>2</sup></td>
</tr>
</tbody>
</table>
<p><sup>1</sup> Denoted as 'DNAseI_hypersensitive_site' in older annotation releases (ARs) up to human AR 109.20201120 and mouse AR 109 <br />
<sup>2</sup> No longer used for Functional Element features as of August 2020, replaced with misc_structure</p>
<h4 id="Feat_extraction">Feature and Metadata Extraction Examples</h4>
<p>Annotated RefSeq Functional Element features can be extracted from NCBI-provided GFF3 files or from the 'FEfeats_AR##.bb' bigBed files following their conversion to a non-binary format using the <code>bigBedToBed</code> application provided in the <a href="http://hgdownload.soe.ucsc.edu/admin/exe/">UCSC binary utilities directory</a>. The following data extraction examples are based on the Unix command line.</p>
<p><strong>Feature extraction from GFF3 files:</strong></p>
<p>The GFF3 file from NCBI's latest human or mouse annotation release can be downloaded to a local directory (see the human and mouse Genomes FTP links <a href="#Genomes_FTP">above</a>), with substitution of the GFF3 file names (if applicable) in the following commands. Please refer to the feature table <a href="#Feature_table">above</a> to determine appropriate strings for extraction of desired feature types.</p>
<ul>
<li>
<p>Extraction of all RefSeq Functional Element features using <code>awk</code>, excluding parental 'biological_region' features, for GFF3 files produced from human AR 109.20200522 and mouse AR 108.20200622 onwards. This command uses the 'RefSeqFE' source to extract all relevant features:</p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | awk 'BEGIN{FS="\t";OFS"\t"}$2=="RefSeqFE"&amp;&amp;$3!="biological_region"'</code> </p>
</li>
<li>
<p>Extraction of all RefSeq Functional Element features using <code>awk</code>, excluding parental 'biological_region' features, for GFF3 files prior to human AR 109.20200522 and mouse AR 108.20200622. This command takes advantage of the fact that RefSeq Functional Element features are not stranded (except for 'stem_loop' features), while all other NCBI-annotated features are stranded, including 'gene', 'exon' and 'CDS' features:</p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | awk 'BEGIN{FS="\t";OFS"\t"}$7=="."||$3=="stem_loop"'</code> </p>
</li>
<li>
<p>Extraction of all regulatory_class features using <code>awk</code>:</p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | awk 'BEGIN{FS="\t";OFS"\t"}$9~/regulatory_class=/'</code> </p>
<p>Alternatively, <code>$9~/regulatory_class=/</code> can be substituted with <code>$9~/gbkey=regulatory/</code></p>
</li>
<li>
<p>Extraction of enhancer features based on the column 3 SO_type using <code>awk</code>: </p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | awk 'BEGIN{FS="\t";OFS"\t"}$3=="enhancer"'</code> </p>
</li>
<li>
<p>Extraction of enhancer features based on the column 9 specific attribute using <code>awk</code>: </p>
<p><code>zgrep -v "^#" GCF_000001405.39_GRCh38.p13_genomic.gff.gz | awk 'BEGIN{FS="\t";OFS"\t"}$9~/regulatory_class=enhancer/&amp;&amp;$9!~/regulatory_class=enhancer_blocking_element/'</code> </p>
</li>
<li>
<p>Extraction of enhancer features based on the column 9 specific attribute using <code>grep</code>: </p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | grep regulatory_class=enhancer | grep regulatory_class=enhancer_blocking_element -v</code> </p>
</li>
<li>
<p>Extraction of features from human records associated with coronavirus biology, here selecting relevant GeneIDs based on flat file 'coronavirus related' attributes found in the <code>human.biological_region.gbff.gz</code> file described <a href="#RefSeq_FTP">above</a>: </p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | awk -F '\t' '$2=="RefSeqFE"&amp;&amp;$3!="biological_region"' | fgrep -f &lt;(zgrep -A30 "coronavirus related" human.biological_region.gbff.gz | grep -o "GeneID:[0-9]*" | sort -u) -w</code> </p>
</li>
<li>
<p>Extraction of features with <a href="https://www.ncbi.nlm.nih.gov/snp/">dbSNP</a> cross-references for functionally validated genetic variants corresponding to the RefSeqFE feature: </p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | awk -F '\t' '$2=="RefSeqFE"&amp;&amp;$9~/Dbxref=dbSNP:/'</code> </p>
</li>
</ul>
<p id="gff_metadata"><strong>Metadata extraction from GFF3 files:</strong> <br /></p>
<ul>
<li>
<p>To extract feature types, feature IDs and cell/tissue types for all features containing cell/tissue activity data: </p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | awk -F '\t' '$2=="RefSeqFE"&amp;&amp;$3!="biological_region"&amp;&amp;$9~/active_cell\/tissue|VISTA/' | cut -f3,9 | sed 's/ID=id-\([^;]*\).*active_cell\/tissue: \(.*\)}.*/\1\t\2/; s/ID=id-\([^;]*\).*function=enhancer in: \([^;]*\);.*/\1\t\2/'</code></p>
</li>
<li>
<p>To extract features that are active in a cell/tissue type of interest, HepG2 cells in this example: </p>
<p><code>zgrep -v "^#" GCF_000001405.40_GRCh38.p14_genomic.gff.gz | awk -F '\t' '$2=="RefSeqFE"&amp;&amp;$3!="biological_region"&amp;&amp;$9~/active_cell\/tissue|VISTA/&amp;&amp;$9~/HepG2/'</code> </p>
</li>
</ul>
<p><strong>Feature extraction from bigBed files:</strong></p>
<p>The 'FEfeats_AR##.bb' file from NCBI's latest human or mouse annotation release can be downloaded to a local directory (see file availability from the RefSeq FTP site <a href="#bigBed">above</a>). </p>
<ul>
<li>
<p>File conversion to non-binary format using the <code>bigBedToBed</code> application provided in the <a href="http://hgdownload.soe.ucsc.edu/admin/exe/">UCSC binary utilities directory</a>: </p>
<p><code>bigBedToBed FEfeats_RS_2023_03_GRCh38.p14.bb FEfeats_RS_2023_03_GRCh38.p14.bed</code> </p>
</li>
<li>
<p>Extraction of enhancer features from the bigBedToBed-converted file based on the column 4 name using <code>awk</code>: </p>
<p><code>awk -F '\t' '$4~/enhancer_GeneID/' FEfeats_RS_2023_03_GRCh38.p14.bed</code> </p>
</li>
<li>
<p>Extraction of features from the bigBedToBed-converted file for human functional elements associated with coronavirus biology, here selecting relevant GeneIDs based on flat file 'coronavirus related' attributes found in the 'human.biological_region.gbff.gz' file described <a href="#RefSeq_FTP">above</a>: </p>
<p><code>cat FEfeats_RS_2023_03_GRCh38.p14.bed | fgrep -f &lt;(zgrep -A30 "coronavirus related" human.biological_region.gbff.gz | grep -o "GeneID:[0-9]*" | sort -u) -w</code></p>
</li>
</ul>
<p><strong>Caution for extraction of specific features:</strong> </p>
<p>Some feature strings may also be present in names or other free-text attributes in GFF3 and bigBedToBed-converted files. The use of explicit terms with associated strings is therefore recommended to avoid the extraction of non-specific features. Explicit features can be extracted by using <code>awk</code> based on the SO_type in column 3 of GFF3 files in the format <code>$3=="&lt;feature_name&gt;"</code>, while full attribute strings may be necessary for extractions based on GFF3 column 9 attributes. For example, the use of <code>regulatory_class=enhancer</code> with <code>awk</code> or <code>grep</code> as in the GFF3 examples above, where it is additionally necessary to exclude 'enhancer_blocking_element' features given that the 'enhancer' string is also found within <code>regulatory_class=enhancer_blocking_element</code>. In bigBedToBed-converted files, 'enhancer' features can be distinguished from 'enhancer_blocking_element' features by specifying <code>$4~/enhancer_GeneID/</code> in an <code>awk</code> command given that an <code>&lt;SO_type&gt;_&lt;GeneID&gt;</code> naming format is used in column 4.</p>
<p id="bb_metadata"><strong>Metadata extraction from bigBedToBed-converted files:</strong><br /></p>
<p>Specific types of metadata may also be extracted from either the 'FEbiolregions_AR##.bb' or 'FEfeats_AR##.bb' files following their conversion to non-binary format. See <a href="#bigBed">above</a> for a description of the custom metadata columns within these files.</p>
<ul>
<li>
<p>Extraction of basic enhancer features (columns 1-4) together with just functional and descriptive information (column 11) from a converted non-binary <code>FEfeats_AR109.20201120.bed</code> file: </p>
<p><code>awk -F '\t' '$4~/enhancer_GeneID/' FEfeats_RS_2023_10_GRCh38.p14.bed | cut -f1-4,11</code> </p>
</li>
<li>
<p>To extract the basic enhancer features (columns 1-4) along with just publications from column 12: </p>
<p><code>awk -F '\t' '$4~/enhancer_GeneID/' FEfeats_RS_2023_10_GRCh38.p14.bed | cut -f1-4,12</code></p>
</li>
<li>
<p>Using <code>sed</code> to extract the basic enhancer features (columns 1-4) along with just publications from column 11 for the human <code>FEfeats_AR109.20200522.bb</code> and mouse <code>FEfeats_AR108.20200622.bb</code> files, which lack a standalone supporting publications column: </p>
<p><code>awk -F '\t' '$4~/enhancer_GeneID/' FEfeats_AR109.20200522.bed | cut -f1-4,11 | sed 's/\(_GeneID:[0-9]*\t\).*\[PMID/\1PMID/; s/\(PMID:[0-9]*\)\].*/\1/'</code></p>
</li>
<li>
<p>To extract feature types, feature IDs and cell/tissue types for all features containing cell/tissue activity data (applies to feature files produced from March 2023 onwards): </p>
<p><code>awk -F '\t' '$14!=""' FEfeats_RS_2023_10_GRCh38.p14.bed | cut -f4,14</code></p>
</li>
<li>
<p>To extract features that are active in a cell/tissue type of interest, K562 cells in this example: </p>
<p><code>awk -F '\t' '$14~/K562/' FEfeats_RS_2023_10_GRCh38.p14.bed</code></p>
</li>
<li>
<p>To retrieve the full list of cell/tissue types represented in a given bigBedToBed-converted feature file, here removing appended conditions and scores to display root names only: </p>
<p><code>awk -F '\t' '$14!=""' FEfeats_RS_2023_10_GRCh38.p14.bed | cut -f14 | sed 's/ | /\n/g; s/(.*//; s/\[.*//; s/}.*//; s/ $//' | sort | uniq</code> </p>
</li>
<li>
<p>Extraction of GeneIDs, locus summaries and RefSeq accessions from a similarly converted non-binary 'FEbiolregions_AR##.bed' file: </p>
<p><code>cut -f10,12,14 FEbiolregions_RS_2023_10_GRCh38.p14.bed</code> </p>
</li>
<li>
<p>Extraction of locus symbols, locus names and interacting loci for biological regions with regulatory interaction data: </p>
<p><code>awk -F '\t' '$13~/regulatory_interactions/' FEbiolregions_RS_2023_10_GRCh38.p14.bed | cut -f4,11,13</code></p>
</li>
<li>
<p>Extraction of target genes with regulatory interactions from a bigBedToBed-converted 'FEregintxns_AR##.inter.bb' file: </p>
<p><code>awk -F '\t' '$4~/target_gene/' FEregintxns_RS_2023_10_GRCh38.p14.inter.bed | cut -f4 | sed 's/:/\t/g; s/_t/\tt/; s/_r/\tr/' | awk '{if ($3~/target/) {print $1} else {print $2}}' | sort | uniq</code></p>
</li>
<li>
<p>Extraction of features with <a href="https://www.ncbi.nlm.nih.gov/snp/">dbSNP</a> cross-references for functionally validated genetic variants corresponding to the RefSeqFE feature, where the SNPs can be independently extracted from column 15 (human files produced from October 2023 onwards): </p>
<p><code>awk -F '\t' '$15~/rs/' FEbiolregions_RS_2023_10_GRCh38.p14.bed</code> </p>
</li>
</ul>
<p>Similar command line strategies may also be used to extract feature-specific metadata from column 9 of GFF3 files, where several semi-colon-delimited functional and descriptive attributes are present.</p>
<p>All of the above examples are just a sampling. The extraction of numerous other combinations of features or metadata is also possible, including by using other computer programming languages, tools or data parsers. We encourage users to explore our data based on individual data needs.</p>
<h2 id="References">References</h2>
<p>Please use the following citation for RefSeq Functional Elements:</p>
<ul>
<li>Farrell CM, Goldfarb T, Rangwala SH, Astashyn A, Ermolaeva OD, Hem V, Katz KS, Kodali VK, Ludwig F, Wallin CL, Pruitt KD, Murphy TD. RefSeq Functional Elements as experimentally assayed nongenic reference standards and functional interactions in human and mouse. <em>Genome Res.</em> 2022 Jan;32(1):175-188. doi: <a href="https://doi.org/10.1101/gr.275819.121">10.1101/gr.275819.121</a>. Epub 2021 Dec 7. PMID: <a href="https://pubmed.ncbi.nlm.nih.gov/34876495/">34876495</a>; PMCID: <a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC8744684/">PMC8744684</a>.</li>
</ul>
<h2 id="Feedback">Feedback</h2>
<p>We welcome user comments and suggestions. Please use the yellow vertical Feedback tab on the bottom right of the page to enter your comments, or contact us through the <a href="https://support.nlm.nih.gov/support/create-case/">NLM Support Center interface</a>, or by e-mailing refseq-support@nlm.nih.gov.</p>
<p><span id="shared-content-1"></span></p>
</div>
<!--/.col1-->
<div class="col2">
</div>
<!--/.col2-->
<div class="col3">
</div>
<!--/.col3-->
<div class="col4">
</div>
<!--/.col4-->
<div class="col5">
</div>
<div class="col6">
</div>
<div class="col7">
</div>
<div class="col8">
</div>
<div class="col9">
</div>
</div><!--/.content-->
</div><!--/.container-->
<div id="NCBIFooter_dynamic">
<!--<component id="NCBIBreadcrumbs"/>
<component id="NCBIHelpDesk"/>-->
<noscript><img alt="" src="/stat?jsdisabled=true&amp;ncbi_app=refseq&amp;ncbi_db=&amp;ncbi_pdid=static&amp;ncbi_phid=CE8B0C0C7C8015F10000000000CD0097" /></noscript>
</div>
<div xmlns:xi="http://www.w3.org/2001/XInclude">
<div xmlns="http://www.w3.org/1999/xhtml" class="footer" id="footer" xml:base="http://127.0.0.1/sites/static/header_footer">
<section class="icon-section">
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
<div class="grid-container container">
<div class="icon-section_container">
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter">
<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
<title>Twitter</title>
<g id="twitterx1008">
<path id="path1008" d="M6.06736 7L16.8778 20.8991L6.00001 32.2H10.2L18.6 23.1L25.668 32.2H34L22.8 17.5L31.9 7H28.4L20.7 15.4L14.401 7H6.06898H6.06736ZM9.66753 8.73423H12.9327L29.7327 30.4658H26.5697L9.66753 8.73423Z" fill="#5B616B"></path>
</g>
</svg>
</a>
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>Facebook</title>
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>LinkedIn</title>
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11,
.cls-12 {
fill: #737373;
}
.cls-11 {
fill-rule: evenodd;
}
</style>
</defs>
<title>GitHub</title>
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
</path>
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
</path>
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
</path>
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
</path>
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
</path>
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
</path>
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
</svg></a>
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
<defs><style>.cls-1{fill:#737373;}</style></defs>
<title>NCBI Insights Blog</title>
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
</svg>
</a>
</div>
</div>
</section>
<section class="container-fluid bg-primary">
<div class="container pt-5">
<div class="row mt-3">
<div class="col-lg-3 col-12">
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
<ul class="list-inline social_media">
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Twitter</title>
<g id="twitterx1009" clip-path="url(#clip0_65276_3946)">
<path id="Vector_Twitter" d="M17.5006 34.6565C26.9761 34.6565 34.6575 26.9751 34.6575 17.4996C34.6575 8.02416 26.9761 0.342773 17.5006 0.342773C8.02514 0.342773 0.34375 8.02416 0.34375 17.4996C0.34375 26.9751 8.02514 34.6565 17.5006 34.6565Z" fill="#205493" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
<path id="path1009" d="M8.54811 8.5L16.2698 18.4279L8.50001 26.5H11.5L17.5 20L22.5486 26.5H28.5L20.5 16L27 8.5H24.5L19 14.5L14.5007 8.5H8.54927H8.54811ZM11.1197 9.73873H13.4519L25.4519 25.2613H23.1926L11.1197 9.73873Z" fill="white"></path>
</g>
<defs>
<clipPath id="clip0_65276_3946">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Facebook</title>
<g id="Facebook" clip-path="url(#clip0_1717_1086)">
<path id="Vector_Facebook" d="M15.1147 29.1371C15.1147 29.0822 15.1147 29.0296 15.1147 28.9747V18.9414H11.8183C11.6719 18.9414 11.6719 18.9414 11.6719 18.8018C11.6719 17.5642 11.6719 16.3289 11.6719 15.0937C11.6719 14.9793 11.7062 14.9518 11.816 14.9518C12.8683 14.9518 13.9206 14.9518 14.9751 14.9518H15.1215V14.8329C15.1215 13.8057 15.1215 12.774 15.1215 11.7492C15.1274 10.9262 15.3148 10.1146 15.6706 9.37241C16.1301 8.38271 16.9475 7.60378 17.9582 7.19235C18.6492 6.90525 19.3923 6.76428 20.1405 6.7783C21.0029 6.79202 21.8653 6.83091 22.7278 6.86065C22.8879 6.86065 23.048 6.89496 23.2082 6.90182C23.2974 6.90182 23.3271 6.94071 23.3271 7.02993C23.3271 7.54235 23.3271 8.05477 23.3271 8.5649C23.3271 9.16882 23.3271 9.77274 23.3271 10.3767C23.3271 10.4819 23.2974 10.5139 23.1921 10.5116C22.5379 10.5116 21.8814 10.5116 21.2271 10.5116C20.9287 10.5184 20.6316 10.5528 20.3395 10.6146C20.0822 10.6619 19.8463 10.7891 19.6653 10.9779C19.4842 11.1668 19.3672 11.4078 19.3307 11.6669C19.2857 11.893 19.2612 12.1226 19.2575 12.3531C19.2575 13.1904 19.2575 14.0299 19.2575 14.8695C19.2575 14.8946 19.2575 14.9198 19.2575 14.9564H23.0229C23.1807 14.9564 23.183 14.9564 23.1624 15.1074C23.0778 15.7662 22.9885 16.425 22.9039 17.0816C22.8322 17.6321 22.7636 18.1827 22.698 18.7332C22.6729 18.9437 22.6797 18.9437 22.4693 18.9437H19.2644V28.8992C19.2644 28.9793 19.2644 29.0593 19.2644 29.1394L15.1147 29.1371Z" fill="white"></path>
<path id="Vector_2_Facebook" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1086">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer">
<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
<title>Youtube</title>
<g id="YouTube" clip-path="url(#clip0_1717_1101)">
<path id="Vector_Youtube" d="M26.2571 11.4791C25.9025 11.1589 25.5709 10.9576 24.228 10.834C22.5512 10.6785 20.2797 10.6556 18.564 10.6533H16.4365C14.7208 10.6533 12.4493 10.6785 10.7725 10.834C9.43196 10.9576 9.09798 11.1589 8.7434 11.4791C7.81464 12.321 7.6202 14.6268 7.59961 16.8938C7.59961 17.3178 7.59961 17.741 7.59961 18.1635C7.62706 20.4121 7.82837 22.686 8.7434 23.521C9.09798 23.8412 9.42967 24.0425 10.7725 24.1661C12.4493 24.3216 14.7208 24.3445 16.4365 24.3468H18.564C20.2797 24.3468 22.5512 24.3216 24.228 24.1661C25.5686 24.0425 25.9025 23.8412 26.2571 23.521C27.1722 22.6929 27.3735 20.451 27.4009 18.2206C27.4009 17.7402 27.4009 17.2599 27.4009 16.7795C27.3735 14.5491 27.1699 12.3072 26.2571 11.4791ZM15.5604 20.5311V14.652L20.561 17.5001L15.5604 20.5311Z" fill="white"></path>
<path id="Vector_2_Youtube" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
</g>
<defs>
<clipPath id="clip0_1717_1101">
<rect width="35" height="35" fill="white"></rect>
</clipPath>
</defs>
</svg>
</a></li>
</ul>
</div>
<div class="col-lg-3 col-12">
<p class="address_footer text-white">National Library of Medicine<br />
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
Bethesda, MD 20894</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
</div>
</div>
<div class="row">
<div class="col-lg-12 centered-lg">
<nav class="bottom-links">
<ul class="mt-3">
<li>
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
</li>
<li>
<a class="text-white" href="https://www.nih.gov/">NIH</a>
</li>
<li>
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
</li>
<li>
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
</section>
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
</div>
<!--/.footer-->
<p class="last-updated small">Last updated: 2024-04-05T14:48:24Z</p>
</div>
<!--/.page-->
</div>
<!--/.wrap-->
<span class="PAFAppResources"></span>
</div><!-- /.twelve_col -->
</div>
<!-- /.grid -->
<!-- usually for JS scripts at page bottom -->
<span class="pagefixtures"></span>
<!-- CE8B5AF87C7FFCB1_0191SID /projects/refseq/refseq@1.21 portal104 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type='text/javascript' src='/portal/js/portal.js'></script><script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4218658/js/3879255/4121861/4218656/4087685.js" snapshot="refseq"></script></body>
</html>