793 lines
No EOL
78 KiB
HTML
793 lines
No EOL
78 KiB
HTML
<?xml version="1.0" encoding="utf-8"?>
|
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
|
|
|
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
|
<!-- AppResources meta begin -->
|
|
<meta name="paf-app-resources" content="" />
|
|
<script type="text/javascript">var ncbi_startTime = new Date();</script>
|
|
|
|
<!-- AppResources meta end -->
|
|
|
|
<!-- TemplateResources meta begin -->
|
|
<meta name="paf_template" content="" />
|
|
|
|
<!-- TemplateResources meta end -->
|
|
|
|
<!-- Logger begin -->
|
|
<meta name="ncbi_db" content="books" /><meta name="ncbi_pdid" content="book-part" /><meta name="ncbi_acc" content="NBK1058" /><meta name="ncbi_domain" content="coursework" /><meta name="ncbi_report" content="record" /><meta name="ncbi_type" content="fulltext" /><meta name="ncbi_objectid" content="" /><meta name="ncbi_pcid" content="/NBK1058/" /><meta name="ncbi_pagename" content="Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils) - NCBI Short Courses - NCBI Bookshelf" /><meta name="ncbi_bookparttype" content="chapter" /><meta name="ncbi_app" content="bookshelf" />
|
|
<!-- Logger end -->
|
|
|
|
<title>Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils) - NCBI Short Courses - NCBI Bookshelf</title>
|
|
|
|
<!-- AppResources external_resources begin -->
|
|
<link rel="stylesheet" href="/core/jig/1.15.2/css/jig.min.css" /><script type="text/javascript" src="/core/jig/1.15.2/js/jig.min.js"></script>
|
|
|
|
<!-- AppResources external_resources end -->
|
|
|
|
<!-- Page meta begin -->
|
|
<meta name="robots" content="INDEX,FOLLOW,NOARCHIVE" /><meta name="citation_inbook_title" content="NCBI Short Courses [Internet]" /><meta name="citation_title" content="Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils)" /><meta name="citation_publisher" content="National Center for Biotechnology Information (US)" /><meta name="citation_date" content="2004" /><meta name="citation_author" content="Eric Sayers" /><meta name="citation_author" content="David Wheeler" /><meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK1058/" /><link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" /><meta name="DC.Title" content="Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils)" /><meta name="DC.Type" content="Text" /><meta name="DC.Publisher" content="National Center for Biotechnology Information (US)" /><meta name="DC.Contributor" content="Eric Sayers" /><meta name="DC.Contributor" content="David Wheeler" /><meta name="DC.Date" content="2004" /><meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK1058/" /><meta name="description" content="The Entrez Programming Utilities (eUtils) are a set of seven server-side programs that provide a stable interface into the Entrez query and database system at the National Center for Biotechnology Information (NCBI). The eUtils use a fixed URL syntax that translates a standard set of input parameters into the values necessary for various NCBI software components to search for and retrieve the requested data. The eUtils are therefore the structured interface to the Entrez system, which currently includes 23 databases covering a variety of biomedical data, including nucleotide and protein sequences, gene records, three-dimensional molecular structures, and the biomedical literature." /><meta name="og:title" content="Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils)" /><meta name="og:type" content="book" /><meta name="og:description" content="The Entrez Programming Utilities (eUtils) are a set of seven server-side programs that provide a stable interface into the Entrez query and database system at the National Center for Biotechnology Information (NCBI). The eUtils use a fixed URL syntax that translates a standard set of input parameters into the values necessary for various NCBI software components to search for and retrieve the requested data. The eUtils are therefore the structured interface to the Entrez system, which currently includes 23 databases covering a variety of biomedical data, including nucleotide and protein sequences, gene records, three-dimensional molecular structures, and the biomedical literature." /><meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK1058/" /><meta name="og:site_name" content="NCBI Bookshelf" /><meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-coursework-lrg.png" /><meta name="twitter:card" content="summary" /><meta name="twitter:site" content="@ncbibooks" /><meta name="bk-non-canon-loc" content="/books/n/coursework/eutils/" /><link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK1058/" /><link rel="stylesheet" href="/corehtml/pmc/css/figpopup.css" type="text/css" media="screen" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_print.min.css" type="text/css" media="print" /><style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style><script type="text/javascript" src="/corehtml/pmc/js/jquery.hoverIntent.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/common.min.js?_=3.18"> </script><script type="text/javascript" src="/corehtml/pmc/js/large-obj-scrollbars.min.js"> </script><script type="text/javascript">window.name="mainwindow";</script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/book-toc.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/books.min.js"> </script><meta name="book-collection" content="NONE" />
|
|
|
|
<!-- Page meta end -->
|
|
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8C86D47C8236F1000000000037002A.m_12" />
|
|
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3985586/3808861/4121862/3974050/3917732/251717/4216701/14534/45193/4113719/3849091/3984811/3751656/4033350/3840896/3577051/3852958/4008682/4207974/4206132/4062871/12930/3964959/3854974/36029/4128070/9685/3549676/3609192/3609193/3609213/3395586.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3411343/3882866.css" media="print" /></head>
|
|
<body class="book-part">
|
|
<div class="grid">
|
|
<div class="col twelve_col nomargin shadow">
|
|
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
|
|
<div class="sysmessages">
|
|
<noscript>
|
|
<p class="nojs">
|
|
<strong>Warning:</strong>
|
|
The NCBI web site requires JavaScript to function.
|
|
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
|
|
</p>
|
|
</noscript>
|
|
</div>
|
|
<!--/.sysmessage-->
|
|
<div class="wrap">
|
|
<div class="page">
|
|
<div class="top">
|
|
<div id="universal_header">
|
|
<section class="usa-banner">
|
|
<div class="usa-accordion">
|
|
<header class="usa-banner-header">
|
|
<div class="usa-grid usa-banner-inner">
|
|
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
|
|
<p>An official website of the United States government</p>
|
|
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
|
|
<span class="usa-banner-button-text">Here's how you know</span>
|
|
</button>
|
|
</div>
|
|
</header>
|
|
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
|
|
<div class="usa-banner-guidance-gov usa-width-one-half">
|
|
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
|
|
<div class="usa-media_block-body">
|
|
<p>
|
|
<strong>The .gov means it's official.</strong>
|
|
<br />
|
|
Federal government websites often end in .gov or .mil. Before
|
|
sharing sensitive information, make sure you're on a federal
|
|
government site.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
<div class="usa-banner-guidance-ssl usa-width-one-half">
|
|
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
|
|
<div class="usa-media_block-body">
|
|
<p>
|
|
<strong>The site is secure.</strong>
|
|
<br />
|
|
The <strong>https://</strong> ensures that you are connecting to the
|
|
official website and that any information you provide is encrypted
|
|
and transmitted securely.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
<div class="usa-overlay"></div>
|
|
<header class="ncbi-header" role="banner" data-section="Header">
|
|
|
|
<div class="usa-grid">
|
|
<div class="usa-width-one-whole">
|
|
|
|
<div class="ncbi-header__logo">
|
|
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
|
|
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
|
|
</a>
|
|
</div>
|
|
|
|
<div class="ncbi-header__account">
|
|
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
|
|
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
|
|
<span class="fa fa-user" aria-hidden="true">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
|
|
<g style="fill: #fff">
|
|
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
|
|
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
|
|
</g>
|
|
</svg>
|
|
</span>
|
|
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
|
|
<span class="sr-only">Show account info</span>
|
|
</button>
|
|
</div>
|
|
|
|
<div class="ncbi-popup-anchor">
|
|
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
|
|
<div class="ncbi-popup-head">
|
|
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
|
|
<span class="fa fa-times">
|
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
|
|
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
|
|
</svg>
|
|
</span>
|
|
<span class="usa-sr-only">Close</span></button>
|
|
<h4>Account</h4>
|
|
</div>
|
|
<div class="account-user-info">
|
|
Logged in as:<br />
|
|
<b><span class="username" id="uname_long">username</span></b>
|
|
</div>
|
|
<div class="account-links">
|
|
<ul class="usa-unstyled-list">
|
|
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
|
|
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
|
|
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
|
|
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</div>
|
|
</header>
|
|
<div role="navigation" aria-label="access keys">
|
|
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
|
|
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
|
|
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
|
|
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
|
|
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
|
|
</div>
|
|
<section data-section="Alerts">
|
|
<div class="ncbi-alerts-placeholder"></div>
|
|
</section>
|
|
</div>
|
|
<div class="header">
|
|
<div class="res_logo"><h1 class="res_name"><a href="/books/" title="Bookshelf home">Bookshelf</a></h1><h2 class="res_tagline"></h2></div>
|
|
<div class="search"><form method="get" action="/books/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="books" selected="selected" data-ac_dict="bookshelf-search">Books</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="pmc" class="last">PMC</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books" data-ac_dict="bookshelf-search">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Books. Use up and down arrows to choose an item from the autocomplete." value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="dictionary:'bookshelf-search',disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'no'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class="searchlinks inline_list"><li>
|
|
<a href="/books/browse/">Browse Titles</a>
|
|
</li><li>
|
|
<a href="/books/advanced/">Advanced</a>
|
|
</li><li class="help">
|
|
<a href="/books/NBK3833/">Help</a>
|
|
</li><li class="disclaimer">
|
|
<a target="_blank" data-ga-category="literature_resources" data-ga-action="link_click" data-ga-label="disclaimer_link" href="https://www.ncbi.nlm.nih.gov/books/about/disclaimer/">Disclaimer</a>
|
|
</li></ul></div>
|
|
</div>
|
|
|
|
|
|
|
|
<!--<component id="Page" label="headcontent"/>-->
|
|
|
|
</div>
|
|
<div class="content">
|
|
<!-- site messages -->
|
|
<!-- Custom content 1 -->
|
|
<div class="col1">
|
|
|
|
</div>
|
|
|
|
<div class="container">
|
|
<div id="maincontent" class="content eight_col col">
|
|
<!-- Custom content in the left column above book nav -->
|
|
<div class="col2">
|
|
|
|
</div>
|
|
|
|
<!-- Book content -->
|
|
|
|
|
|
<!-- Custom content between navigation and content -->
|
|
<div class="col3">
|
|
|
|
</div>
|
|
|
|
<div class="document">
|
|
<div class="pre-content"><div><div class="bk_prnt"><p class="small">NCBI Bookshelf. A service of the National Library of Medicine, National Institutes of Health.</p><p>NCBI Short Courses [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2004-. </p></div><div class="iconblock clearfix whole_rhythm no_top_margin bk_noprnt"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/coursework/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-coursework-lrg.png" alt="Cover of NCBI Short Courses" height="100px" width="80px" /></a><div class="icnblk_cntnt eight_col"><h2>NCBI Short Courses [Internet].</h2><a data-jig="ncbitoggler" href="#__NBK1058_dtls__">Show details</a><div style="display:none" class="ui-widget" id="__NBK1058_dtls__"><div>Bethesda (MD): <a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&targetsite=external&targetcat=link&targettype=publisher">National Center for Biotechnology Information (US)</a>; 2004-.</div></div><div class="half_rhythm"><ul class="inline_list"><li style="margin-right:1em"><a class="bk_cntns" href="/books/n/coursework/">Contents</a></li></ul></div><div class="bk_noprnt"><form method="get" action="/books/n/coursework/" id="bk_srch"><div class="bk_search"><label for="bk_term" class="offscreen_noflow">Search term</label><input type="text" title="Search this book" id="bk_term" name="term" value="" data-jig="ncbiclearbutton" /> <input type="submit" class="jig-ncbibutton" value="Search this book" submit="false" style="padding: 0.1em 0.4em;" /></div></form></div></div><div class="icnblk_cntnt two_col"><div class="pagination bk_noprnt"></div></div></div></div></div>
|
|
<div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><h1 id="_NBK1058_"><span class="title" itemprop="name">Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils)</span></h1><p class="contrib-group"><span itemprop="author">Eric Sayers</span> and <span itemprop="author">David Wheeler</span>.</p></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><div id="_abs_rndgid_" itemprop="description"><h2 id="__abs_rndgid__">Introduction</h2><p>The Entrez Programming Utilities (eUtils) are a set of seven server-side programs that provide a stable interface into the <a href="/Entrez/index.html" ref="pagearea=abstract&targetsite=external&targetcat=link&targettype=uri">Entrez query and database system</a> at the National Center for Biotechnology Information (<a href="http://www.ncbi.nlm.nih.gov/" ref="pagearea=abstract&targetsite=external&targetcat=link&targettype=uri">NCBI</a>). The eUtils use a fixed URL syntax that translates a standard set of input parameters into the values necessary for various NCBI software components to search for and retrieve the requested data. The eUtils are therefore the structured interface to the Entrez system, which currently includes 23 databases covering a variety of biomedical data, including nucleotide and protein sequences, gene records, three-dimensional molecular structures, and the biomedical literature. </p><p>To access these data, a piece of software first posts an eUtils URL to NCBI, then retrieves the results of this posting, after which it processes the data as required. The software can thus use any computer language that can send a URL to the eUtils server and interpret the XML response; examples of such languages are Perl, Python, Java, and C++. Combining eUtils components to form customized data pipelines within these applications is a powerful approach to data manipulation. </p><p>This guide first describes the general function and use of the eUtils and then outlines strategies for creating customized data pipelines with examples in Perl.</p></div><div id="remember"><h2 id="_remember_">Two Things to Remember Before Using the eUtils</h2><div id="eutils_esayers-1-1"><h3>The eUtils Access Entrez Databases</h3><p>The eUtils access the core search and retrieval engine of the Entrez system and, therefore, are only capable of retrieving data that are already in Entrez. Although the majority of data at NCBI is in Entrez, there are several datasets that exist outside of the Entrez system. Before beginning a project with the eUtils, check that the desired data can be found within an Entrez database.</p></div><div id="eutils_esayers-1-2"><h3>The Entrez System Identifies Database Records Using UIDs</h3><p>Each Entrez database refers to the data records within it by an integer ID called a UID. Examples of UIDs are GI numbers for Nucleotide and Protein, PMIDs for PubMed, or MMDB-IDs for Structure. The eUtils use UIDs for both data input and output, and thus it is often critical, especially for advanced data pipelines, to know how to find the UIDs associated with the desired data before beginning a project with the eUtils.</p></div></div><div id="understanding"><h2 id="_understanding_">Understanding Entrez</h2><div id="eutils_esayers-2-1"><h3>The Entrez Engine: EGQuery, ESearch, and ESummary</h3><p>The core of Entrez is an engine that performs two basic tasks for any Entrez database: 1) assemble a list of UIDs that match a text query, and 2) retrieve a brief summary record called a Document Summary (DocSum) for each UID. In Entrez, UIDs are always integers, and each refers to a unique record in a given Entrez database; the <a href="http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html#PrimaryIDs" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">common names of the UIDS</a> are listed. Document Summaries are a familiar sight in any Entrez Web search and are shown in the results display seen immediately after a search is executed. </p><p>These two basic tasks of the Entrez engine are performed by ESearch and ESummary. ESearch returns a list of UIDs that match a text query in a given Entrez database, and ESummary returns DocSums that match a list of input UIDs. EGQuery is a global version of ESearch that searches all Entrez databases simultaneously. Because these three eUtils perform the two core Entrez functions, they function well for all Entrez databases.</p></div><div id="eutils_esayers-2-2"><h3>Entrez Databases: EInfo, EFetch*, and ELink</h3><p>A growing <a href="http://www.ncbi.nih.gov/Database/datamodel/index.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">number of databases</a>, such as PubMed, Nucleotide, Protein, and Structure, use the core Entrez search and retrieval engine. EInfo provides detailed information about a given database, including lists of the indexing fields in the database and the available links to other Entrez databases. Each Entrez database includes two primary enhancements to the raw data records: 1) software for producing a variety of display formats appropriate to the given database, and 2) each record may be linked to records in other Entrez databases via a list of associated UIDs. </p><p>The display format function is performed by EFetch, which generates formatted output for a list of input UIDs. For example, EFetch can produce abstracts from Entrez PubMed or FASTA format from Entrez Protein. The linked-records function is performed by ELink, which generates a list of UIDs in a specified Entrez database that are linked to a set of input UIDs. For example, ELink can find Entrez SNP records linked to records in Entrez Nucleotide, or Entrez Domain records linked to records in Entrez Protein.</p><p>
|
|
<b>
|
|
<i>*Note:</i>
|
|
</b> EFetch is currently supported only in the following databases: PubMed, PubMed Central, Journals, Nucleotide, Protein, Genome, Gene, SNP, PopSet, and Taxonomy.
|
|
</p></div><div id="eutils_esayers-2-3"><h3>Using the Entrez History Server: EPost <i>et al.</i></h3><p>A powerful feature of the Entrez system is that it can store retrieved sets of UIDs temporarily on the servers so that they can be combined subsequently or otherwise manipulated. The Entrez History server provides this service and is accessed on the Web using either the Preview/Index or History tabs on Entrez search pages. Each of the eUtils can also use the History server, which assigns each set of UIDs an integer label called a query key (&query_key) and an encoded server address called a Web environment (&WebEnv). EPost allows any list of Primary IDs (UIDs) to be uploaded to the History Server and returns the query key and Web environment. ESearch can also post its output set of UIDs to the History Server. The resulting query key and Web environment from either EPost or ESearch can then be used in place of a UID list in ESummary, EFetch, and ELink, which is very convenient when dealing with large datasets.</p></div></div><div id="constructing-urls"><h2 id="_constructing-urls_">Guidelines for Constructing URLs</h2><div id="eutils_esayers-3-1"><h3>Special Characters</h3><p>When constructing URLs for the eUtils, please use lowercase characters for all parameters except <tt>&WebEnv</tt>. There is no required order for the URL parameters in an eUtils URL, and null values or inappropriate parameters are ignored. Avoid placing spaces in the URLs, particularly in queries. If a space is required, use a plus sign (+) instead of a space:</p>
|
|
<ul><li class="half_rhythm"><div> Incorrect: <tt>&id=352, 25125, 234,</tt> ...</div></li><li class="half_rhythm"><div> Correct: <tt>&id=352,25125,234,</tt>...</div></li><li class="half_rhythm"><div> Incorrect: <tt>&term=biomol mrna[properties] AND mouse[organism]</tt></div></li><li class="half_rhythm"><div> Correct: <tt>&term=biomol+mrna[properties]+AND+mouse[organism]</tt></div></li></ul>
|
|
<p>Other special characters, such as the # symbol used in referring to a query key on the History server, should be represented by their URL encodings (%23 for #).</p></div><div id="eutils_esayers-3-2"><h3>Identifying Your Queries</h3><p>NCBI recommends that you use the <tt>&tool</tt> and <tt>&email</tt> parameters to identify all of your eUtils URLs. For <tt>&tool</tt>, choose a value that uniquely identifies your software. If your name is John Smith, use, for example, <tt>&tool=johnsmithsoft</tt>. If your email address is jsmith@hotmail.com, use <tt>&email=jsmith@hotmail.com</tt>. This email address is used only to inform the creator of the software of any problems. The NCBI does not use these addresses for mailing lists, although you can join the <a href="/mailman/listinfo/utilities-announce" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">eUtils-announce</a> mailing list if you wish.</p></div></div><div id="brief"><h2 id="_brief_">The Seven eUtils in Brief</h2>
|
|
<ul><li class="half_rhythm"><div>
|
|
<b>EInfo:</b> provides the number of records indexed in each field of a given database, the date of the last update of the database, and the available links from the database to other Entrez databases. [<a href="http://eutils.ncbi.nlm.nih.gov/entrez/query/static/einfo_help.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">reference documentation</a>]
|
|
</div></li><li class="half_rhythm"><div>
|
|
<b>EGQuery:</b> responds to a text query with the number of records matching the query in each Entrez database. [<a href="http://eutils.ncbi.nlm.nih.gov/entrez/query/static/egquery_help.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">reference documentation</a>]
|
|
</div></li><li class="half_rhythm"><div>
|
|
<b>ESearch:</b> responds to a text query with the list of UIDs matching the query in a given database, along with the term translations of the query. [<a href="http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esearch_help.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">reference documentation</a>]
|
|
</div></li><li class="half_rhythm"><div>
|
|
<b>ESummary:</b> responds to a list of UIDs with the corresponding document summaries. [<a href="http://eutils.ncbi.nlm.nih.gov/entrez/query/static/esummary_help.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">reference documentation</a>]
|
|
</div></li><li class="half_rhythm"><div>
|
|
<b>EPost:</b> accepts a list of UIDs, stores the set on the History Server, and responds with the corresponding query key and Web environment. [<a href="http://eutils.ncbi.nlm.nih.gov/entrez/query/static/epost_help.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">reference documentation</a>]
|
|
</div></li><li class="half_rhythm"><div>
|
|
<b>EFetch:</b> responds to a list of UIDs with the corresponding data records. [<a href="http://eutils.ncbi.nlm.nih.gov/entrez/query/static/efetch_help.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">reference documentation</a>]
|
|
</div></li><li class="half_rhythm"><div>
|
|
<b>ELink:</b> responds to a list of UIDs in a given database with either a list of related IDs in the same database or a list of linked IDs in another Entrez database. [<a href="http://eutils.ncbi.nlm.nih.gov/entrez/query/static/elink_help.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">reference documentation</a>]
|
|
</div></li></ul>
|
|
</div><div id="syntax"><h2 id="_syntax_">Syntax and Initial Parsing of Entrez Queries</h2><p>Text search strings entered into the Entrez system are converted into Entrez queries with the following format:</p><p>
|
|
<i>term1[field1]</i>
|
|
<b>Op</b>
|
|
<i>term2[field2]</i>
|
|
<b>Op</b>
|
|
<i>term3[field3]</i>
|
|
<b>Op</b> ...
|
|
</p><p>where the <tt>terms</tt> are search terms each limited to a particular Entrez <tt>field</tt> in square brackets, all combined using one of three Boolean operators: <tt>Op = AND, OR, or NOT</tt>. These Boolean operators must be typed in all capital letters.</p><p>Example: <tt>human[organism] AND topoisomerase[protein name]</tt></p><p>Entrez initially splits the query into a series of items that were originally separated by spaces in the query; therefore it is critical that spaces separate each term and Boolean operator. If the query consists <i>only</i> of a list of UID numbers (unique identifiers) or accession numbers, the Entrez system simply returns the corresponding records and no further parsing is performed. If the query contains any Boolean operators (<tt>AND</tt>, <tt>OR</tt>, or <tt>NOT</tt>), the query is split into the terms separated by these operators, and then each term is parsed independently. The results of these searches are then combined according to the Boolean operators. Further details about the parsing of Entrez queries are given in the <a href="#eutils_esayers-6-1">Appendix</a>.</p><p>A full account of how to search Entrez can be found in the <a href="/entrez/query/static/help/helpdoc.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Entrez Help Document</a>. Additional information is available from <a href="/books/n/helpentrez/">Entrez Help</a>.</p></div><div id="large-datasets"><h2 id="_large-datasets_">Handling Large Datasets</h2><div id="eutils_esayers-5-1-1"><h3>Uploading Large UID Lists</h3><p>When uploading a large list of UIDs using ESearch or EPost, or when using such a list as input to ESummary, EFetch, or ELink, it is a good idea to split the list into smaller batches of around 500 records. A series of URLs can then be posted to upload the entire set.</p></div><div id="eutils_esayers-5-1-2"><h3>Downloading Large Datasets</h3><p>When using ESummary or EFetch to download large datasets, it can be more efficient to use a series of URLs governed by the <tt>retstart</tt> and <tt>retmax</tt> parameters to download smaller batches of records. An example of doing this is given in <a href="#eutils_esayers-5-4-3">Application 3: Retrieving Large Datasets.</a></p></div></div><div id="elink-considerations"><h2 id="_elink-considerations_">Special Considerations When Using ELink</h2><div id="eutils_esayers-5-2-1"><h3>Preserving Record-to-Record Correspondence in Links</h3><p>ELink can find links to not only one set of UIDs but also to multiple sets of UIDs simultaneously. This is very useful for preserving specific record-to-record links after the ELink call. To do this, use a separate <tt>&id</tt> parameter for each group of UIDs that requires a separate list of linked UIDs. Consider the following URLs:</p><p>
|
|
<b>URL 1:</b>
|
|
<tt> elink.fcgi?dbfrom=nucleotide&db=protein&id=41282244,41282247,40789264</tt>
|
|
</p><p>
|
|
<b>URL 2:</b>
|
|
<tt> elink.fcgi?dbfrom=nucleotide&db=protein&id=41282244&id=41282247&id=40789264</tt>
|
|
</p><p>
|
|
<b>URL 3:</b>
|
|
<tt> elink.fcgi?dbfrom=nucleotide&db=protein&Webenv=Webenv&query_key=key</tt>
|
|
</p><p>Both URLs 1 and 2 return the same protein GI numbers (41282245, 4759258, 40789265), but URL 1 returns them as a group without information about which nucleotide record is linked to which protein record. URL 2, on the other hand, returns three groups of links, one for each <tt>&id</tt> parameter, preserving the nucleotide-to-protein links. URL 3 is functionally equivalent to URL 1 (assuming that the three GIs in the example are stored in that Web Enviroment).</p></div><div id="eutils_esayers-5-2-2"><h3>ELink and the History Server</h3><p>Although ELink can accept a stored set of UIDs from the History server as input, this eUtil cannot load its output onto the History server. The consequence of this is that the linked UIDs found by ELink must be parsed out of the XML output and then provided as input to another eUtil, either directly using the <tt>&id</tt> parameter or by using EPost to store them explicitly on the History server. Then they can be passed to other eUtils.</p></div></div><div id="combining-utility-calls"><h2 id="_combining-utility-calls_">Combining eUtils Calls to Create Entrez Applications</h2><p>The eUtils are useful when used by themselves in single URLs; however, their full potential is reached when successive eUtil URLs are combined to create a data pipeline. When used within such pipelines, the Entrez History server simplifies complex retrieval tasks by allowing easy data transfer between successive eUtil calls. Listed below are several examples of pipelines produced by combining eUtils, with the arrows representing the passing of <tt>WebEnv</tt> and <tt>query_key</tt> values from one eUtil to another. These pipelines are discussed in detail below.</p><div id="eutils_esayers-5-3-1"><h3>
|
|
<a href="#basic-pipelines">Basic Pipelines</a>
|
|
</h3>
|
|
<ul><li class="half_rhythm"><div> Retrieving data records matching an Entrez query<ul><li class="half_rhythm"><div>ESearch → ESummary</div></li><li class="half_rhythm"><div>ESearch → EFetch</div></li></ul>
|
|
</div></li><li class="half_rhythm"><div>
|
|
  Retrieving data records matching a list of UIDs<ul><li class="half_rhythm"><div>EPost → ESummary</div></li><li class="half_rhythm"><div>EPost → EFetch</div></li></ul>
|
|
</div></li><li class="half_rhythm"><div>
|
|
  Finding IDs linked to records matching an Entrez query<ul><li class="half_rhythm"><div>ESearch → ELink</div></li></ul>
|
|
</div></li><li class="half_rhythm"><div>
|
|
  Finding IDs linked to other UIDs<ul><li class="half_rhythm"><div>EPost → ELink</div></li></ul>
|
|
</div></li></ul>
|
|
</div><div id="eutils_esayers-5-3-2"><h3>
|
|
<a href="#advanced-pipelines">Advanced Pipelines</a>
|
|
</h3>
|
|
<ul><li class="half_rhythm"><div> Retrieving data records in database B linked to records in database A matching an Entrez query<ul><li class="half_rhythm"><div>ESearch → ELink → ESummary</div></li><li class="half_rhythm"><div>ESearch → ELink → EFetch</div></li></ul>
|
|
</div></li><li class="half_rhythm"><div>
|
|
  Retrieving data records from a subset of an ID list defined by an Entrez query<ul><li class="half_rhythm"><div>EPost → ESearch → ESummary</div></li><li class="half_rhythm"><div>EPost → ESearch → EFetch</div></li></ul>
|
|
</div></li><li class="half_rhythm"><div>
|
|
  Retrieving a subset of data records, defined by an Entrez query, from a set of records in database B linked to a list of UIDs in database A<ul><li class="half_rhythm"><div>ELink → EPost → ESearch → ESummary</div></li><li class="half_rhythm"><div>ELink → EPost → ESearch → EFetch</div></li></ul>
|
|
</div></li></ul>
|
|
</div><div id="basic-pipelines"><h3>Basic Pipelines</h3><div id="eutils_esayers-5-3-3-1"><h4>ESearch → ESummary/EFetch</h4><p>
|
|
<i>Input:</i> Entrez query
|
|
</p><p>
|
|
<i>Output:</i> DocSums (ESummary) or formatted data (EFetch) that match the Entrez query
|
|
</p><p>
|
|
<i>Step 1.</i> Use ESearch to find IDs that match an Entrez query and store them on the History server.
|
|
</p><p>
|
|
<pre>esearch.fcgi?db=database&term=query&usehistory=y</pre>
|
|
</p><p>
|
|
<i>Step 2.</i> Parse the Web Environment (<tt>Webenv</tt>) and query key (<tt>key</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 3.</i> Use ESummary or EFetch to retrieve records for the stored dataset.
|
|
</p><p>
|
|
<pre>esummary.fcgi?db=database&WebEnv=Webenv&query_key=key</pre>
|
|
</p><p>
|
|
<pre>efetch.fcgi?db=database&WebEnv=Webenv&query_key=key</pre>
|
|
</p></div><div id="eutils_esayers-5-3-3-2"><h4>EPost → ESummary/EFetch</h4><p>
|
|
<i>Input:</i> List of UIDs
|
|
</p><p>
|
|
<i>Output:</i> DocSums (ESummary) or formatted data (EFetch) that match the Entrez query
|
|
</p><p>
|
|
<i>Step 1.</i> Use EPost to store the IDs on the History server.
|
|
</p><p>
|
|
<pre>epost.fcgi?db=database&id=id_list</pre>
|
|
</p><p>
|
|
<i>Step 2.</i> Parse the Web Environment (<tt>Webenv</tt>) and query key (<tt>key</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 3.</i> Use ESummary or EFetch to retrieve records for the stored dataset.
|
|
</p><p>
|
|
<pre>esummary.fcgi?db=database&WebEnv=Webenv&query_key=key</pre>
|
|
</p><p>
|
|
<pre>efetch.fcgi?db=database&WebEnv=Webenv&query_key=key</pre>
|
|
</p></div><div id="eutils_esayers-5-3-3-3"><h4>ESearch → ELink</h4><p>
|
|
<i>Input:</i> Entrez query
|
|
</p><p>
|
|
<i>Output:</i> Primary IDs in database B that are linked to records in database A matching the Entrez query
|
|
</p><p>
|
|
<i>Step 1.</i> Use ESearch to find IDs that match an Entrez query and store them on the History server.
|
|
</p><p>
|
|
<pre>esearch.fcgi?db=databaseA&term=query&usehistory=y</pre>
|
|
</p><p>
|
|
<i>Step 2.</i> Parse the Web Environment (<tt>Webenv</tt>) and query key (<tt>key</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 3.</i> Use ELink to retrieve linked IDs for the stored dataset.
|
|
</p><p>
|
|
<pre>elink.fcgi?dbfrom=databaseA&db=databaseB&WebEnv=Webenv&query_key=key</pre>
|
|
</p></div><div id="eutils_esayers-5-3-3-4"><h4>EPost → ELink</h4><p>
|
|
<i>Input:</i> List of UIDs in database A
|
|
</p><p>
|
|
<i>Output:</i> List of UIDs in database B linked to the IDs in database A
|
|
</p><p>
|
|
<i>Step 1.</i> Use EPost to store the IDs on the History server.
|
|
</p><p>
|
|
<pre>epost.fcgi?db=databaseA&id=id_list</pre>
|
|
</p><p>
|
|
<i>Step 2.</i> Parse the Web Environment (<tt>Webenv</tt>) and query key (<tt>key</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 3.</i> Use ELink to retrieve linked IDs for the stored dataset.
|
|
</p><p>
|
|
<pre>elink.fcgi?dbfrom=databaseA&db=databaseB&WebEnv=Webenv&query_key=key</pre>
|
|
</p></div></div><div id="advanced-pipelines"><h3>Advanced Pipelines</h3><div id="eutils_esayers-5-3-4-1"><h4>ESearch → ELink → ESummary/EFetch</h4><p>
|
|
<i>Input:</i> Entrez query
|
|
</p><p>
|
|
<i>Output:</i> DocSums (ESummary) or formatted data records (EFetch) in database B that are linked to records in database A matching the Entrez query
|
|
</p><p>
|
|
<i>Step 1.</i> Use ESearch to find IDs that match an Entrez query and store them on the History server.
|
|
</p><p>
|
|
<pre>esearch.fcgi?db=databaseA&term=query&usehistory=y</pre>
|
|
</p><p>
|
|
<i>Step 2.</i> Parse the Web Environment (<tt>Webenv</tt>) and query key (<tt>key</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 3.</i> Use ELink to retrieve linked IDs for the stored dataset.
|
|
</p><p>
|
|
<pre>elink.fcgi?dbfrom=databaseA&db=databaseB&WebEnv=Webenv&query_key=key</pre>
|
|
</p><p>
|
|
<i>Step 4.</i> Parse the UIDs from the ELink XML output and assemble as a comma-delimited list.
|
|
</p><p>
|
|
<i>Step 5.</i> Use ESummary or EFetch to retrieve data records corresponding to the ID list
|
|
</p><p>
|
|
<pre>esummary.fcgi?db=databaseB&id=id_list</pre>
|
|
</p><p>
|
|
<pre>efetch.fcgi?db=databaseB&id=id_list</pre>
|
|
</p></div><div id="eutils_esayers-5-3-4-2"><h4>EPost → ESearch → ESummary/EFetch</h4><p>
|
|
<i>Input:</i> List of UIDs
|
|
</p><p>
|
|
<i>Output:</i> DocSums (ESummary) or formatted data (EFetch) that correspond to the input list of IDs limited by an Entrez query
|
|
</p><p>
|
|
<i>Step 1.</i> Use EPost to store the IDs on the History server.
|
|
</p><p>
|
|
<pre>epost.fcgi?db=database&id=id_list</pre>
|
|
</p><p>
|
|
<i>Step 2.</i> Parse the Web Environment (<tt>Webenv</tt>) and query key (<tt>key</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 3.</i> Use ESearch to limit the stored dataset by an Entrez query.
|
|
</p><p>
|
|
<pre>esearch.fcgi?db=database&term=query+AND+%23key&WebEnv=Webenv&usehistory=y</pre>
|
|
</p><p>
|
|
<i>Step 4.</i> Parse the new Web Environment (<tt>Webenv2</tt>) and query key (<tt>key2</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 5.</i> Use ESummary or EFetch to retrieve records for the stored dataset.
|
|
</p><p>
|
|
<pre>esummary.fcgi?db=database&WebEnv=Webenv2&query_key=key2</pre>
|
|
</p><p>
|
|
<pre>efetch.fcgi?db=database&WebEnv=Webenv2&query_key=key2</pre>
|
|
</p></div><div id="eutils_esayers-5-3-4-3"><h4>ELink → EPost → ESearch → ESummary/EFetch</h4><p>
|
|
<i>Input:</i> List of UIDs
|
|
</p><p>
|
|
<i>Output:</i> DocSums (ESummary) or formatted data (EFetch) in database B that are both linked to input list of IDs in database A and match the Entrez query
|
|
</p><p>
|
|
<i>Step 1.</i> Use ELink to retrieve IDs in database B linked to IDs in database A.
|
|
</p><p>
|
|
<pre>elink.fcgi?dbfrom=databaseA&db=databaseB&id=id_list</pre>
|
|
</p><p>
|
|
<i>Step 2.</i> Parse the linked UIDs from the ELink XML output and assemble as a comma-delimited list (<tt>id_list2</tt>) for posting onto the History server.
|
|
</p><p>
|
|
<i>Step 3.</i> Use EPost to store the IDs on the History server.
|
|
</p><p>
|
|
<pre>epost.fcgi?db=databaseB&id=id_list2</pre>
|
|
</p><p>
|
|
<i>Step 4.</i> Parse the Web Environment (<tt>Webenv</tt>) and query key (<tt>key</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 5.</i> Use ESearch to limit the stored dataset by an Entrez query.
|
|
</p><p>
|
|
<pre>esearch.fcgi?db=databaseB&term=query+AND+%23key&WebEnv=Webenv&usehistory=y</pre>
|
|
</p><p>
|
|
<i>Step 6.</i> Parse the new Web Environment (<tt>Webenv2</tt>) and query key (<tt>key2</tt>) parameters from the XML ouput.
|
|
</p><p>
|
|
<i>Step 7.</i> Use ESummary or EFetch to retrieve records for the stored dataset.
|
|
</p><p>
|
|
<pre>esummary.fcgi?db=databaseB&WebEnv=Webenv2&query_key=key2</pre>
|
|
</p><p>
|
|
<pre>efetch.fcgi?db=databaseB&WebEnv=Webenv2&query_key=key2</pre>
|
|
</p></div></div><div id="sample-apps"><h3>Sample Applications of the eUtils</h3><p>In the applications below, it is assumed that Perl is being used to create eUtils pipelines. In Perl, scalar variable names are preceded by a “$” symbol, and array names are preceded by a “@”. In several instances, results will be stored in such variables for use in subsequent URLs.</p><div id="eutils_esayers-5-4-1"><h4>Application 1: Converting GI Numbers to Accession Numbers</h4><p>I have a list of nucleotide GI numbers and I want the corresponding accession numbers.</p><p>
|
|
<b>Solution:</b> Use EFetch with <tt>&rettype=acc</tt>
|
|
</p><p>
|
|
<b>URL:</b>
|
|
<tt> efetch.fcgi?db=nucleotide&id=$gi_list&rettype=acc</tt>
|
|
</p></div><div id="eutils_esayers-5-4-2"><h4>Application 2: Converting Accession Numbers to Data</h4><p>I have a list of genome Accession numbers (<tt>$acc_list</tt>) and I want the sequences in FASTA format.</p><p>
|
|
<b>Solution:</b> Use EFetch with <tt>&rettype=fasta</tt>
|
|
</p><p>
|
|
<b>URL:</b>
|
|
<tt> efetch.fcgi?db=genome&id=$acc_list&rettype=fasta</tt>
|
|
</p></div><div id="eutils_esayers-5-4-3"><h4>Application 3: Retrieving Large Datasets</h4><p>I want to retrieve an arbitrary number of formatted records that match an Entrez query.</p><p>
|
|
<b>Solution:</b> First, run ESearch in Web Environment mode to retrieve the total number of UIDs that match the Entrez query (<tt><Count></tt> tag in the ESearch output). Then store this number into <tt>$count</tt>, and store the values of <tt>WebEnv</tt> and <tt>query_key</tt> into <tt>$Webenv</tt> and <tt>$key</tt>. Next, run EFetch multiple times, each time retrieving a batch of size <tt>$retmax</tt> (for example, <tt>$retmax = 500</tt>). Accomplish this by incrementing <tt>$retstart</tt> iteratively in a “for” loop to retrieve successive batches of records of size <tt>$retmax</tt>:
|
|
</p><p>
|
|
<pre>use LWP::Simple;</pre>
|
|
</p><p>
|
|
<b>URL 1:</b>
|
|
<tt> esearch.fcgi?db=database&term=$query&usehistory=y</tt>
|
|
</p><p>
|
|
<b>URL 2+:</b> produced by the following loop:
|
|
</p><p>
|
|
<b>Perl:</b>
|
|
</p><p>
|
|
<pre>for ($retstart = 0; $retstart < $count; $restart += $retmax) {</pre>
|
|
</p><p>
|
|
<pre> $efetch_url = $base ."db=$db&WebEnv=$Webenv&query_key=$key";</pre>
|
|
</p><p>
|
|
<pre> $efetch_url .= "&retstart=$retstart&retmax=$retmax";</pre>
|
|
</p><p>
|
|
<pre> $efetch_out = get($efetch_url);</pre>
|
|
</p><p>
|
|
<pre> print "$efetch_out";</pre>
|
|
</p><p>
|
|
<pre>}</pre>
|
|
</p><p>where <tt>$base</tt> = <tt>http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?</tt>, <tt>$db</tt> is the database, and <tt>$efetch_url</tt> is a string containing the EFetch URL. This Perl code assumes that the <tt>LWP::Simple</tt> module is installed. This module allows the use of the get command for retrieving data from a URL.</p></div><div id="eutils_esayers-5-4-4"><h4>Application 4: Downloading Contigs</h4><p>I want to download a flatfile with the full sequence of an assembly (e.g., a contig).</p><p>
|
|
<b>Solution:</b> Use EFetch with <tt>&rettype=gbwithparts</tt>
|
|
</p><p>
|
|
<b>URL:</b>
|
|
<tt> efetch.fcgi?db=nucleotide&id=27479347&rettype=gbwithparts</tt>
|
|
</p></div><div id="eutils_esayers-5-4-5"><h4>Application 5: Limiting and Converting GI Lists</h4><p>I have list of protein GI numbers from a BLAST search and I want to download the document summaries of only those protein records that are mammalian sequences with annotated SNPs.</p><p>
|
|
<b>Solution:</b> Use EPost to upload the GI list, then use ESearch to limit the list, followed by EFetch to download the FASTA formatted data.
|
|
</p><p>
|
|
<b>URL 1:</b>
|
|
<tt> epost.fcgi?db=protein&id=$gi_list</tt>
|
|
</p><p>
|
|
<b>Result:</b> In Perl, store WebEnv as <tt>$Webenv1</tt>, query_key as <tt>$key1</tt>
|
|
</p><p>
|
|
<b>URL 2:</b>
|
|
<tt> esearch.fcgi?db=protein&term=%23$key1+AND+mammalia[organism]+AND+protein+snp[filter]&usehistory=y&WebEnv=$Webenv1</tt>
|
|
</p><p>
|
|
<b>Result:</b> In Perl, store WebEnv as <tt>$Webenv2</tt>, query_key as <tt>$key2</tt>
|
|
</p><p>Note: The <tt>%23</tt> resolves to the # symbol, so that <tt>%23$key1</tt> → <tt>#2</tt>.</p><p>
|
|
<b>URL 3:</b>
|
|
<tt> esummary.fcgi?db=protein&WebEnv=$Webenv2&query_key=$key2</tt>
|
|
</p></div><div id="eutils_esayers-5-4-6"><h4>Application 6: Finding Related Records in Other Entrez Databases</h4><p>I want to find all available 3D structure records similar to protein <a href="/protein/2208903/?report=GenPept" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=genpept">BAA20519</a>.</p><p>
|
|
<b>Solution:</b> Use ESearch to find the GI number, then ELink to find related sequences to that protein. Then use ELink again to find linked MMDB-IDs, and finally ESummary to download the document summaries of the structure records.
|
|
</p><p>
|
|
<b>URL 1:</b>
|
|
<tt> esearch.fcgi?db=protein&term=<a href="/protein/2208903/?report=GenPept" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=genpept">BAA20519</a></tt>
|
|
</p><p>
|
|
<b>Result:</b> Find GI 2208903.
|
|
</p><p>
|
|
<b>URL 2:</b>
|
|
<tt> elink.fcgi?dbfrom=protein&db=protein&id=2208903</tt>
|
|
</p><p>
|
|
<b>Result:</b> Find 1084 related sequences, extract into <tt>$gi_list1</tt>
|
|
</p><p>
|
|
<b>URL 3:</b>
|
|
<tt> elink.fcgi?dbfrom=protein&db=structure&id=$gi_list1</tt>
|
|
</p><p>
|
|
<b>Result:</b> Find 9 related structures, extract into <tt>$gi_list2</tt>
|
|
</p><p>
|
|
<b>URL 4:</b>
|
|
<tt> esummary.fcgi?db=structure&id=$gi_list2</tt>
|
|
</p></div><div id="eutils_esayers-5-4-7"><h4>Application 7: Entrez TBLASTX</h4><p>I want to download all mRNAs from green plants that are related <i>at the protein level</i> to human <a href="/nuccore/1519245088" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_001126</a>, in flatfile format.</p><p>
|
|
<b>Motivation:</b> For finding distant homologs, protein BLAST searches are generally more sensitive than nucleotide BLAST searches. In this specific case, a nucleotide BLAST search finds no significant matches to <a href="/nuccore/1519245088" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_001126</a> from green plants, whereas TBLASTX will find several homologous sequences. However, TBLASTX is the most time-consuming version of BLAST, and therefore using the pre-computed results in Entrez saves significant computing time.
|
|
</p><p>
|
|
<b>Solution:</b> Use ESearch to retrieve the record for <a href="/nuccore/1519245088" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_001126</a>, and then use ELink to find the linked protein sequence. Then use ELink again to find all related sequences to that protein, and then use ELink a third time to find all nucleotide records linked to those related proteins and then limit them to mRNAs from green plants. Finally, download the formatted data with EFetch.
|
|
</p><p>
|
|
<b>URL 1:</b>
|
|
<tt> esearch.fcgi?db=nucleotide&term=<a href="/nuccore/1519245088" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_001126</a></tt>
|
|
</p><p>
|
|
<b>Result:</b> Find GI = 4557270.
|
|
</p><p>
|
|
<b>URL 2:</b>
|
|
<tt> elink.fcgi?dbfrom=nucleotide&db=protein&id=4557270</tt>
|
|
</p><p>
|
|
<b>Result:</b> Find GI = 4557271.
|
|
</p><p>
|
|
<b>URL 3:</b>
|
|
<tt> elink.fcgi?dbfrom=protein&db=protein&id=4557271</tt>
|
|
</p><p>
|
|
<b>Result:</b> Extract the 507 GI numbers into <tt>$gi_list1</tt>, and if desired, the raw BLAST scores reported by ELink into <tt>@scores</tt>
|
|
</p><p>
|
|
<b>URL 4:</b>
|
|
<tt> elink.fcgi?dbfrom=protein&db=nucleotide&id=$gi_list1&term=biomol+mrna[properties]+AND+viridiplantae[organism]</tt>
|
|
</p><p>
|
|
<b>Result:</b> Extract the 7 GI numbers into <tt>$gi_list2</tt>
|
|
</p><p>
|
|
<b>URL 5:</b>
|
|
<tt> efetch.fcgi?db=nucleotide&WebEnv=$Webenv2&query_key=$key2&rettype=gb</tt>
|
|
</p><p>
|
|
<b>Result:</b> Download the 7 plant mRNAs, none of which are found using Related Sequences to <a href="/nuccore/1519245088" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_001126</a>
|
|
</p></div></div><div id="course"><h3>eUtils Course</h3><p>The National Center for Biotechnology Information (NCBI) presents <i>NCBI PowerScripting</i>, a 3-day course that includes both lectures and computer workshops on using the NCBI eUtils effectively within scripts to automate search-and-retrieval operations across the entire suite of Entrez databases.</p><p>
|
|
<b>
|
|
<a href="/Class/PowerTools/eutils/course.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Full details of the course</a>.
|
|
</b>
|
|
</p></div><div id="appendix"><h3>Appendix</h3><div id="eutils_esayers-6-1"><h4>Automatic Term Mapping in Entrez Queries</h4><p>After the initial parsing, each resulting <tt>term</tt> in the Entrez query is then searched against three lists in the following order, and if a match is found, the indicated search is performed:</p>
|
|
<dl class="temp-labeled-list"><dt>1.</dt><dd><p class="no_top_margin">Taxonomic nodes → <tt>taxonomic node[organism] OR term[All Fields]</tt></p></dd><dt>2.</dt><dd><p class="no_top_margin">Journal names → <tt>term[Journal]</tt></p></dd><dt>3.</dt><dd><p class="no_top_margin">Author names → <tt>term[Author]</tt></p></dd></dl>
|
|
<p>A valid author name is any word followed by a space and then one or two letters.</p></div><div id="eutils_esayers-6-2"><h4>Subsequent Parsing</h4><p>If no matches are found after automatic term mapping, the rightmost word of the term is removed, and automatic term mapping is repeated. This continues until either a match is found or the term is exhausted. If there is still no match, each word of the term is limited to <i>All Fields</i> and all terms are combined with <tt>AND</tt>.</p><div id="eutils_esayers-6-2-1"><h5>Examples</h5>
|
|
<ul><li class="half_rhythm"><div> cancer cell receptor → <tt>"Cancer Cell"[Journal] AND receptor[All Fields]</tt></div></li><li class="half_rhythm"><div> cell receptor cancer → <tt>(cell[All Fields] AND receptor[All Fields]) AND ("Cancer"[Organism] OR cancer[All Fi</tt>
|
|
<tt>elds])</tt>
|
|
</div></li><li class="half_rhythm"><div> human c-src kinase → <tt>(("Homo sapiens"[Organism] OR human[All Fields]) AND c-src[All Fields]) AND kinase[All Fields]</tt></div></li><li class="half_rhythm"><div> wheat nuclear protein → <tt>(("Triticum aestivum"[Organism] OR wheat[All Fields]) AND nuclear[All Fields]) AND protein[All Fields]</tt></div></li><li class="half_rhythm"><div> wheat w nuclear protein → <tt>(wheat w[Author] AND nuclear[All Fields]) AND protein[All Fields]</tt></div></li></ul>
|
|
</div></div></div></div><div id="bk_toc_contnr"></div></div></div>
|
|
<div class="post-content"><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div><div class="small"><span class="label">Bookshelf ID: NBK1058</span></div><div style="margin-top:2em" class="bk_noprnt"><a class="bk_cntns" href="/books/n/coursework/">Contents</a><div class="pagination bk_noprnt"></div></div></div></div>
|
|
|
|
</div>
|
|
|
|
<!-- Custom content below content -->
|
|
<div class="col4">
|
|
|
|
</div>
|
|
|
|
|
|
<!-- Book content -->
|
|
|
|
<!-- Custom contetnt below bottom nav -->
|
|
<div class="col5">
|
|
|
|
</div>
|
|
</div>
|
|
|
|
<div id="rightcolumn" class="four_col col last">
|
|
<!-- Custom content above discovery portlets -->
|
|
<div class="col6">
|
|
<div id="ncbi_share_book"><a href="#" class="ncbi_share" data-ncbi_share_config="popup:false,shorten:true" ref="id=NBK1058&db=books">Share</a></div>
|
|
|
|
</div>
|
|
<div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Views</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="PDF_download" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="/books/NBK1058/?report=reader">PubReader</a></li><li><a href="/books/NBK1058/?report=printable">Print View</a></li><li><a data-jig="ncbidialog" href="#_ncbi_dlg_citbx_NBK1058" data-jigconfig="width:400,modal:true">Cite this Page</a><div id="_ncbi_dlg_citbx_NBK1058" style="display:none" title="Cite this Page"><div class="bk_tt">Sayers E, Wheeler D. Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils) In: NCBI Short Courses [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2004-. <span class="bk_cite_avail"></span></div></div></li><li><a href="/books/NBK1058/pdf/Bookshelf_NBK1058.pdf">PDF version of this page</a> (373K)</li><li><a href="/books/n/coursework/pdf/">PDF version of this title</a> (1.2M)</li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>In this Page</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="page-toc" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="#_abs_rndgid_" ref="log$=inpage&link_id=inpage">Introduction</a></li><li><a href="#remember" ref="log$=inpage&link_id=inpage">Two Things to Remember Before Using the eUtils</a></li><li><a href="#understanding" ref="log$=inpage&link_id=inpage">Understanding Entrez</a></li><li><a href="#constructing-urls" ref="log$=inpage&link_id=inpage">Guidelines for Constructing URLs</a></li><li><a href="#brief" ref="log$=inpage&link_id=inpage">The Seven eUtils in Brief</a></li><li><a href="#syntax" ref="log$=inpage&link_id=inpage">Syntax and Initial Parsing of Entrez Queries</a></li><li><a href="#large-datasets" ref="log$=inpage&link_id=inpage">Handling Large Datasets</a></li><li><a href="#elink-considerations" ref="log$=inpage&link_id=inpage">Special Considerations When Using ELink</a></li><li><a href="#combining-utility-calls" ref="log$=inpage&link_id=inpage">Combining eUtils Calls to Create Entrez Applications</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Related Items in Bookshelf</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="source-links" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="https://www.ncbi.nlm.nih.gov/books?term="textbooks"%5BResource%20Type%5D" ref="pagearea=source-links&targetsite=external&targetcat=link&targettype=uri">All Textbooks</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Links</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="document-links" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li>
|
|
<a href="/Class/PowerTools/eutils/course.html" ref="pagearea=document-links&targetsite=external&targetcat=link&targettype=uri">eUtils course</a>
|
|
</li><li>
|
|
<a href="/mailman/listinfo/utilities-announce" ref="pagearea=document-links&targetsite=external&targetcat=link&targettype=uri">eUtils-announce mailing list</a>
|
|
</li><li>
|
|
<a href="https://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html" ref="pagearea=document-links&targetsite=external&targetcat=link&targettype=uri">eUtils help documents</a>
|
|
</li><li>
|
|
<a href="http://www.ncbi.nih.gov/Entrez/index.html" ref="pagearea=document-links&targetsite=external&targetcat=link&targettype=uri">Entrez search</a>
|
|
</li><li>
|
|
<a href="http://www.ncbi.nih.gov/Database/datamodel/index.html" ref="pagearea=document-links&targetsite=external&targetcat=link&targettype=uri">Entrez database model</a>
|
|
</li><li>
|
|
<a href="/entrez/query/static/help/helpdoc.html" ref="pagearea=document-links&targetsite=external&targetcat=link&targettype=uri">Entrez help</a>
|
|
</li><li>
|
|
<a href="https://eutils.ncbi.nlm.nih.gov/entrez/query/static/advancedentrez.html" ref="pagearea=document-links&targetsite=external&targetcat=link&targettype=uri">Entrez tools</a>
|
|
</li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Recent Activity</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="recent_activity" id="Shutter"></a></div><div class="portlet_content"><div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="HTDisplay" class=""><div class="action"><a href="javascript:historyDisplayState('ClearHT')">Clear</a><a href="javascript:historyDisplayState('HTOff')" class="HTOn">Turn Off</a><a href="javascript:historyDisplayState('HTOn')" class="HTOff">Turn On</a></div><ul id="activity"><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&linkpos=1" href="/portal/utils/pageresolver.fcgi?recordid=67c826646d1ec11b6f63a5d5">Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtil...</a><div class="ralinkpop offscreen_noflow">Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils) - NCBI Short Courses<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_qry two_line"><a class="htb" ref="log$=activity&linkpos=2" href="/portal/utils/pageresolver.fcgi?recordid=67c82660d5edb449bf47ab9b">GSE258773[ACCN] AND gsm[ETYP] <span class="number">(2)</span></a><div class="tertiary">GEO DataSets</div></li><li class="ra_qry two_line"><a class="htb" ref="log$=activity&linkpos=3" href="/portal/utils/pageresolver.fcgi?recordid=67c826606d1ec11b6f638d81">GSE258764[ACCN] AND gsm[ETYP] <span class="number">(2)</span></a><div class="tertiary">GEO DataSets</div></li><li class="ra_qry two_line"><a class="htb" ref="log$=activity&linkpos=4" href="/portal/utils/pageresolver.fcgi?recordid=67c8265fb70fbb19600a4cfa">GSE258796[ACCN] AND gsm[ETYP] <span class="number">(2)</span></a><div class="tertiary">GEO DataSets</div></li><li class="ra_qry two_line"><a class="htb" ref="log$=activity&linkpos=5" href="/portal/utils/pageresolver.fcgi?recordid=67c8265efeee5b00acfff55c">GSE258809[ACCN] AND gsm[ETYP] <span class="number">(2)</span></a><div class="tertiary">GEO DataSets</div></li></ul><p class="HTOn">Your browsing activity is empty.</p><p class="HTOff">Activity recording is turned off.</p><p id="turnOn" class="HTOff"><a href="javascript:historyDisplayState('HTOn')">Turn recording back on</a></p><a class="seemore" href="/sites/myncbi/recentactivity">See more...</a></div></div></div>
|
|
|
|
<!-- Custom content below discovery portlets -->
|
|
<div class="col7">
|
|
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Custom content after all -->
|
|
<div class="col8">
|
|
|
|
</div>
|
|
<div class="col9">
|
|
|
|
</div>
|
|
|
|
<script type="text/javascript" src="/corehtml/pmc/js/jquery.scrollTo-1.4.2.js"></script>
|
|
<script type="text/javascript">
|
|
(function($){
|
|
$('.skiplink').each(function(i, item){
|
|
var href = $($(item).attr('href'));
|
|
href.attr('tabindex', '-1').addClass('skiptarget'); // ensure the target can receive focus
|
|
$(item).on('click', function(event){
|
|
event.preventDefault();
|
|
$.scrollTo(href, 0, {
|
|
onAfter: function(){
|
|
href.focus();
|
|
}
|
|
});
|
|
});
|
|
});
|
|
})(jQuery);
|
|
</script>
|
|
</div>
|
|
<div class="bottom">
|
|
|
|
<div id="NCBIFooter_dynamic">
|
|
<!--<component id="Breadcrumbs" label="breadcrumbs"/>
|
|
<component id="Breadcrumbs" label="helpdesk"/>-->
|
|
|
|
</div>
|
|
|
|
<div class="footer" id="footer">
|
|
<section class="icon-section">
|
|
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
|
|
<div class="grid-container container">
|
|
<div class="icon-section_container">
|
|
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
|
<defs>
|
|
<style>
|
|
.cls-11 {
|
|
fill: #737373;
|
|
}
|
|
</style>
|
|
</defs>
|
|
<title>Twitter</title>
|
|
<path class="cls-11" d="M250.11,105.48c-7,3.14-13,3.25-19.27.14,8.12-4.86,8.49-8.27,11.43-17.46a78.8,78.8,0,0,1-25,9.55,39.35,39.35,0,0,0-67,35.85,111.6,111.6,0,0,1-81-41.08A39.37,39.37,0,0,0,81.47,145a39.08,39.08,0,0,1-17.8-4.92c0,.17,0,.33,0,.5a39.32,39.32,0,0,0,31.53,38.54,39.26,39.26,0,0,1-17.75.68,39.37,39.37,0,0,0,36.72,27.3A79.07,79.07,0,0,1,56,223.34,111.31,111.31,0,0,0,116.22,241c72.3,0,111.83-59.9,111.83-111.84,0-1.71,0-3.4-.1-5.09C235.62,118.54,244.84,113.37,250.11,105.48Z">
|
|
</path>
|
|
</svg></a>
|
|
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
|
<title>Facebook</title>
|
|
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
|
|
</path>
|
|
</svg></a>
|
|
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
|
<title>LinkedIn</title>
|
|
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
|
|
</path>
|
|
</svg></a>
|
|
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
|
|
<defs>
|
|
<style>
|
|
.cls-11,
|
|
.cls-12 {
|
|
fill: #737373;
|
|
}
|
|
|
|
.cls-11 {
|
|
fill-rule: evenodd;
|
|
}
|
|
</style>
|
|
</defs>
|
|
<title>GitHub</title>
|
|
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
|
|
</path>
|
|
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
|
|
</path>
|
|
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
|
|
</path>
|
|
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
|
|
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
|
|
</path>
|
|
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
|
|
</path>
|
|
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
|
|
</path>
|
|
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
|
|
</svg></a>
|
|
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
|
|
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
|
|
<defs><style>.cls-1{fill:#737373;}</style></defs>
|
|
<title>NCBI Insights Blog</title>
|
|
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
|
|
</svg>
|
|
</a>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
|
|
<section class="container-fluid bg-primary">
|
|
<div class="container pt-5">
|
|
<div class="row mt-3">
|
|
<div class="col-lg-3 col-12">
|
|
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
|
|
<ul class="list-inline social_media">
|
|
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
|
<style type="text/css">
|
|
.st20 {
|
|
fill: #FFFFFF;
|
|
}
|
|
|
|
.st30 {
|
|
fill: none;
|
|
stroke: #FFFFFF;
|
|
stroke-width: 8;
|
|
stroke-miterlimit: 10;
|
|
}
|
|
</style>
|
|
<title>Twitter</title>
|
|
<g>
|
|
<g>
|
|
<g>
|
|
<path class="st20" d="M192.9,88.1c-5,2.2-9.2,2.3-13.6,0.1c5.7-3.4,6-5.8,8.1-12.3c-5.4,3.2-11.4,5.5-17.6,6.7 c-10.5-11.2-28.1-11.7-39.2-1.2c-7.2,6.8-10.2,16.9-8,26.5c-22.3-1.1-43.1-11.7-57.2-29C58,91.6,61.8,107.9,74,116 c-4.4-0.1-8.7-1.3-12.6-3.4c0,0.1,0,0.2,0,0.4c0,13.2,9.3,24.6,22.3,27.2c-4.1,1.1-8.4,1.3-12.5,0.5c3.6,11.3,14,19,25.9,19.3 c-11.6,9.1-26.4,13.2-41.1,11.5c12.7,8.1,27.4,12.5,42.5,12.5c51,0,78.9-42.2,78.9-78.9c0-1.2,0-2.4-0.1-3.6 C182.7,97.4,189.2,93.7,192.9,88.1z"></path>
|
|
</g>
|
|
</g>
|
|
<circle class="st30" cx="124.4" cy="128.8" r="108.2"></circle>
|
|
</g>
|
|
</svg></a></li>
|
|
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
|
|
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
|
<style type="text/css">
|
|
.st10 {
|
|
fill: #FFFFFF;
|
|
}
|
|
|
|
.st110 {
|
|
fill: none;
|
|
stroke: #FFFFFF;
|
|
stroke-width: 8;
|
|
stroke-miterlimit: 10;
|
|
}
|
|
</style>
|
|
<title>Facebook</title>
|
|
<g>
|
|
<g>
|
|
<path class="st10" d="M159,99.1h-24V88.4c0-5,3.3-6.2,5.7-6.2h16.8V60l-24.4-0.1c-22.1,0-26.2,16.5-26.2,27.1v12.1H90v22.5h16.9 v67.5H135v-67.5h21.7L159,99.1z"></path>
|
|
</g>
|
|
</g>
|
|
<circle class="st110" cx="123.6" cy="123.2" r="108.2"></circle>
|
|
</svg>
|
|
</a></li>
|
|
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
|
|
<title>Youtube</title>
|
|
<style type="text/css">
|
|
.st4 {
|
|
fill: none;
|
|
stroke: #FFFFFF;
|
|
stroke-width: 8;
|
|
stroke-miterlimit: 10;
|
|
}
|
|
|
|
.st5 {
|
|
fill: #FFFFFF;
|
|
}
|
|
</style>
|
|
<circle class="st4" cx="124.2" cy="123.4" r="108.2"></circle>
|
|
<g transform="translate(0,-952.36218)">
|
|
<path class="st5" d="M88.4,1037.4c-10.4,0-18.7,8.3-18.7,18.7v40.1c0,10.4,8.3,18.7,18.7,18.7h72.1c10.4,0,18.7-8.3,18.7-18.7 v-40.1c0-10.4-8.3-18.7-18.7-18.7H88.4z M115.2,1058.8l29.4,17.4l-29.4,17.4V1058.8z"></path>
|
|
</g>
|
|
</svg></a></li>
|
|
</ul>
|
|
</div>
|
|
<div class="col-lg-3 col-12">
|
|
<p class="address_footer text-white">National Library of Medicine<br />
|
|
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
|
|
Bethesda, MD 20894</a></p>
|
|
</div>
|
|
<div class="col-lg-3 col-12 centered-lg">
|
|
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
|
|
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
|
|
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
|
|
</div>
|
|
<div class="col-lg-3 col-12 centered-lg">
|
|
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
|
|
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
|
|
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
|
|
</div>
|
|
</div>
|
|
<div class="row">
|
|
<div class="col-lg-12 centered-lg">
|
|
<nav class="bottom-links">
|
|
<ul class="mt-3">
|
|
<li>
|
|
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
|
|
</li>
|
|
<li>
|
|
<a class="text-white" href="https://www.nih.gov/">NIH</a>
|
|
</li>
|
|
<li>
|
|
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
|
|
</li>
|
|
<li>
|
|
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
|
|
</li>
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
|
|
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<!--/.page-->
|
|
</div>
|
|
<!--/.wrap-->
|
|
</div><!-- /.twelve_col -->
|
|
</div>
|
|
<!-- /.grid -->
|
|
|
|
<span class="PAFAppResources"></span>
|
|
|
|
<!-- BESelector tab -->
|
|
|
|
|
|
|
|
<noscript><img alt="statistics" src="/stat?jsdisabled=true&ncbi_db=books&ncbi_pdid=book-part&ncbi_acc=NBK1058&ncbi_domain=coursework&ncbi_report=record&ncbi_type=fulltext&ncbi_objectid=&ncbi_pcid=/NBK1058/&ncbi_pagename=Building Customized Data Pipelines Using the Entrez Programming Utilities (eUtils) - NCBI Short Courses - NCBI Bookshelf&ncbi_bookparttype=chapter&ncbi_app=bookshelf" /></noscript>
|
|
|
|
|
|
<!-- usually for JS scripts at page bottom -->
|
|
<!--<component id="PageFixtures" label="styles"></component>-->
|
|
|
|
|
|
<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal105 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
|
|
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
|
|
|
|
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3879255/4121861/3501987/4008961/3893018/3821238/4062932/4209313/4212053/4076480/3921943/3400083/3426610.js" snapshot="books"></script></body>
|
|
</html> |