115 lines
49 KiB
Text
115 lines
49 KiB
Text
<!DOCTYPE html>
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" class="no-js no-jr">
|
|
<head>
|
|
<!-- For pinger, set start time and add meta elements. -->
|
|
<script type="text/javascript">var ncbi_startTime = new Date();</script>
|
|
|
|
<!-- Logger begin -->
|
|
<meta name="ncbi_db" content="books">
|
|
<meta name="ncbi_pdid" content="book-part">
|
|
<meta name="ncbi_acc" content="NBK25498">
|
|
<meta name="ncbi_domain" content="helpeutils">
|
|
<meta name="ncbi_report" content="reader">
|
|
<meta name="ncbi_type" content="fulltext">
|
|
<meta name="ncbi_objectid" content="">
|
|
<meta name="ncbi_pcid" content="/NBK25498/?report=reader">
|
|
<meta name="ncbi_pagename" content="Sample Applications of the E-utilities - Entrez Programming Utilities Help - NCBI Bookshelf">
|
|
<meta name="ncbi_bookparttype" content="chapter">
|
|
<meta name="ncbi_app" content="bookshelf">
|
|
<!-- Logger end -->
|
|
|
|
<!--component id="Page" label="meta"/-->
|
|
<script type="text/javascript" src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.boots.min.js"> </script><title>Sample Applications of the E-utilities - Entrez Programming Utilities Help - NCBI Bookshelf</title>
|
|
<meta charset="utf-8">
|
|
<meta name="apple-mobile-web-app-capable" content="no">
|
|
<meta name="viewport" content="initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no">
|
|
<meta name="jr-col-layout" content="auto">
|
|
<meta name="jr-prev-unit" content="/books/n/helpeutils/chapter2/?report=reader">
|
|
<meta name="jr-next-unit" content="/books/n/helpeutils/chapter4/?report=reader">
|
|
<meta name="bk-toc-url" content="/books/n/helpeutils/?report=toc">
|
|
<meta name="robots" content="INDEX,FOLLOW,NOARCHIVE">
|
|
<meta name="citation_inbook_title" content="Entrez Programming Utilities Help [Internet]">
|
|
<meta name="citation_title" content="Sample Applications of the E-utilities">
|
|
<meta name="citation_publisher" content="National Center for Biotechnology Information (US)">
|
|
<meta name="citation_date" content="2017/11/01">
|
|
<meta name="citation_author" content="Eric Sayers">
|
|
<meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK25498/">
|
|
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0/">
|
|
<meta name="DC.Title" content="Sample Applications of the E-utilities">
|
|
<meta name="DC.Type" content="Text">
|
|
<meta name="DC.Publisher" content="National Center for Biotechnology Information (US)">
|
|
<meta name="DC.Contributor" content="Eric Sayers">
|
|
<meta name="DC.Date" content="2017/11/01">
|
|
<meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK25498/">
|
|
<meta name="description" content='This chapter presents several examples of how the E-utilities can be used to build useful applications. These examples use Perl to create the E-utility pipelines, and assume that the LWP::Simple module is installed. This module includes the get function that supports HTTP GET requests. One example (Application 4) uses an HTTP POST request, and requires the LWP::UserAgent module. In Perl, scalar variable names are preceded by a "$" symbol, and array names are preceded by a "@". In several instances, results will be stored in such variables for use in subsequent E-utility calls. The code examples here are working programs that can be copied to a text editor and executed directly. Equivalent HTTP requests can be constructed in many modern programming languages; all that is required is the ability to create and post an HTTP request.'>
|
|
<meta name="og:title" content="Sample Applications of the E-utilities">
|
|
<meta name="og:type" content="book">
|
|
<meta name="og:description" content='This chapter presents several examples of how the E-utilities can be used to build useful applications. These examples use Perl to create the E-utility pipelines, and assume that the LWP::Simple module is installed. This module includes the get function that supports HTTP GET requests. One example (Application 4) uses an HTTP POST request, and requires the LWP::UserAgent module. In Perl, scalar variable names are preceded by a "$" symbol, and array names are preceded by a "@". In several instances, results will be stored in such variables for use in subsequent E-utility calls. The code examples here are working programs that can be copied to a text editor and executed directly. Equivalent HTTP requests can be constructed in many modern programming languages; all that is required is the ability to create and post an HTTP request.'>
|
|
<meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK25498/">
|
|
<meta name="og:site_name" content="NCBI Bookshelf">
|
|
<meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-helpeutils-lrg.png">
|
|
<meta name="twitter:card" content="summary">
|
|
<meta name="twitter:site" content="@ncbibooks">
|
|
<meta name="bk-non-canon-loc" content="/books/n/helpeutils/chapter3/?report=reader">
|
|
<link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK25498/">
|
|
<link href="https://fonts.googleapis.com/css?family=Archivo+Narrow:400,700,400italic,700italic&subset=latin" rel="stylesheet" type="text/css">
|
|
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/libs.min.css">
|
|
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/jr.min.css">
|
|
<meta name="format-detection" content="telephone=no">
|
|
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css">
|
|
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css//books_print.min.css" type="text/css" media="print">
|
|
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_reader.min.css" type="text/css">
|
|
<style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style>
|
|
|
|
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico">
|
|
<meta name="ncbi_phid" content="CE8B44AF7C8056C10000000000F300B5.m_5">
|
|
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3849091.css"></head>
|
|
<body>
|
|
<!-- Book content! -->
|
|
|
|
|
|
<div id="jr" data-jr-path="/corehtml/pmc/jatsreader/ptpmc_3.22/"><div class="jr-unsupported"><table class="modal"><tr><td><span class="attn inline-block"></span><br />Your browser does not support the NLM PubReader view.<br />Go to <a href="/pmc/about/pr-browsers/">this page</a> to see a list of supported browsers<br />or return to the <br /><a href="/books/NBK25498/?report=classic">regular view</a>.</td></tr></table></div><div id="jr-ui" class="hidden"><nav id="jr-head"><div class="flexh tb"><div id="jr-tb1"><a id="jr-links-sw" class="hidden" title="Links"><svg xmlns="http://www.w3.org/2000/svg" version="1.1" x="0px" y="0px" viewBox="0 0 70.6 85.3" style="enable-background:new 0 0 70.6 85.3;vertical-align:middle" xml:space="preserve" width="24" height="24">
|
|
<style type="text/css">.st0{fill:#939598;}</style>
|
|
<g>
|
|
<path class="st0" d="M36,0C12.8,2.2-22.4,14.6,19.6,32.5C40.7,41.4-30.6,14,35.9,9.8"></path>
|
|
<path class="st0" d="M34.5,85.3c23.2-2.2,58.4-14.6,16.4-32.5c-21.1-8.9,50.2,18.5-16.3,22.7"></path>
|
|
<path class="st0" d="M34.7,37.1c66.5-4.2-4.8-31.6,16.3-22.7c42.1,17.9,6.9,30.3-16.4,32.5h1.7c-66.2,4.4,4.8,31.6-16.3,22.7 c-42.1-17.9-6.9-30.3,16.4-32.5"></path>
|
|
</g>
|
|
</svg> Books</a></div><div class="jr-rhead f1 flexh"><div class="head"><a href="/books/n/helpeutils/chapter2/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="body"><div class="t">Sample Applications of the E-utilities</div><div class="j">Entrez Programming Utilities Help [Internet]</div></div><div class="tail"><a href="/books/n/helpeutils/chapter4/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div></div><div id="jr-tb2"><a id="jr-bkhelp-sw" class="btn wsprkl hidden" title="Help with NLM PubReader">?</a><a id="jr-help-sw" class="btn wsprkl hidden" title="Settings and typography in NLM PubReader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" preserveAspectRatio="none"><path d="M462,283.742v-55.485l-29.981-10.662c-11.431-4.065-20.628-12.794-25.274-24.001 c-0.002-0.004-0.004-0.009-0.006-0.013c-4.659-11.235-4.333-23.918,0.889-34.903l13.653-28.724l-39.234-39.234l-28.72,13.652 c-10.979,5.219-23.68,5.546-34.908,0.889c-0.005-0.002-0.01-0.003-0.014-0.005c-11.215-4.65-19.933-13.834-24-25.273L283.741,50 h-55.484l-10.662,29.981c-4.065,11.431-12.794,20.627-24.001,25.274c-0.005,0.002-0.009,0.004-0.014,0.005 c-11.235,4.66-23.919,4.333-34.905-0.889l-28.723-13.653l-39.234,39.234l13.653,28.721c5.219,10.979,5.545,23.681,0.889,34.91 c-0.002,0.004-0.004,0.009-0.006,0.013c-4.649,11.214-13.834,19.931-25.271,23.998L50,228.257v55.485l29.98,10.661 c11.431,4.065,20.627,12.794,25.274,24c0.002,0.005,0.003,0.01,0.005,0.014c4.66,11.236,4.334,23.921-0.888,34.906l-13.654,28.723 l39.234,39.234l28.721-13.652c10.979-5.219,23.681-5.546,34.909-0.889c0.005,0.002,0.01,0.004,0.014,0.006 c11.214,4.649,19.93,13.833,23.998,25.271L228.257,462h55.484l10.595-29.79c4.103-11.538,12.908-20.824,24.216-25.525 c0.005-0.002,0.009-0.004,0.014-0.006c11.127-4.628,23.694-4.311,34.578,0.863l28.902,13.738l39.234-39.234l-13.66-28.737 c-5.214-10.969-5.539-23.659-0.886-34.877c0.002-0.005,0.004-0.009,0.006-0.014c4.654-11.225,13.848-19.949,25.297-24.021 L462,283.742z M256,331.546c-41.724,0-75.548-33.823-75.548-75.546s33.824-75.547,75.548-75.547 c41.723,0,75.546,33.824,75.546,75.547S297.723,331.546,256,331.546z"></path></svg></a><a id="jr-fip-sw" class="btn wsprkl hidden" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-rtoc-sw" class="btn wsprkl hidden" title="Table of Contents"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,20h10v8H20V20zM36,20h44v8H36V20zM20,37.33h10v8H20V37.33zM36,37.33h44v8H36V37.33zM20,54.66h10v8H20V54.66zM36,54.66h44v8H36V54.66zM20,72h10v8 H20V72zM36,72h44v8H36V72z"></path></svg></a></div></div></nav><nav id="jr-dash" class="noselect"><nav id="jr-dash" class="noselect"><div id="jr-pi" class="hidden"><a id="jr-pi-prev" class="hidden" title="Previous page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a><div class="pginfo">Page <i class="jr-pg-pn">0</i> of <i class="jr-pg-lp">0</i></div><a id="jr-pi-next" class="hidden" title="Next page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div><div id="jr-is-tb"><a id="jr-is-sw" class="btn wsprkl hidden" title="Switch between Figures/Tables strip and Progress bar"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><rect x="10" y="40" width="20" height="20"></rect><rect x="40" y="40" width="20" height="20"></rect><rect x="70" y="40" width="20" height="20"></rect></svg></a></div><nav id="jr-istrip" class="istrip hidden"><a id="jr-is-prev" href="#" class="hidden" title="Previous"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M80,40 60,65 80,90 70,90 50,65 70,40z M50,40 30,65 50,90 40,90 20,65 40,40z"></path><text x="35" y="25" textLength="60" style="font-size:25px">Prev</text></svg></a><a id="jr-is-next" href="#" class="hidden" title="Next"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,40 40,65 20,90 30,90 50,65 30,40z M50,40 70,65 50,90 60,90 80,65 60,40z"></path><text x="15" y="25" textLength="60" style="font-size:25px">Next</text></svg></a></nav><nav id="jr-progress"></nav></nav></nav><aside id="jr-links-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">NCBI Bookshelf</div></div><div class="cnt lol f1"><a href="/books/">Home</a><a href="/books/browse/">Browse All Titles</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://www.facebook.com/sharer/sharer.php?u=https://www.ncbi.nlm.nih.gov/books/NBK25498/"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24" preserveAspectRatio="none"><g><path d="M 17.996,32L 12,32 L 12,16 l-4,0 l0-5.514 l 4-0.002l-0.006-3.248C 11.993,2.737, 13.213,0, 18.512,0l 4.412,0 l0,5.515 l-2.757,0 c-2.063,0-2.163,0.77-2.163,2.209l-0.008,2.76l 4.959,0 l-0.585,5.514L 18,16L 17.996,32z"></path></g></svg> Share on Facebook</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://twitter.com/intent/tweet?url=https://www.ncbi.nlm.nih.gov/books/NBK25498/&text=Sample%20Applications%20of%20the%20E-utilities"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24"><g><path d="M 32,6.076c-1.177,0.522-2.443,0.875-3.771,1.034c 1.355-0.813, 2.396-2.099, 2.887-3.632 c-1.269,0.752-2.674,1.299-4.169,1.593c-1.198-1.276-2.904-2.073-4.792-2.073c-3.626,0-6.565,2.939-6.565,6.565 c0,0.515, 0.058,1.016, 0.17,1.496c-5.456-0.274-10.294-2.888-13.532-6.86c-0.565,0.97-0.889,2.097-0.889,3.301 c0,2.278, 1.159,4.287, 2.921,5.465c-1.076-0.034-2.088-0.329-2.974-0.821c-0.001,0.027-0.001,0.055-0.001,0.083 c0,3.181, 2.263,5.834, 5.266,6.438c-0.551,0.15-1.131,0.23-1.73,0.23c-0.423,0-0.834-0.041-1.235-0.118 c 0.836,2.608, 3.26,4.506, 6.133,4.559c-2.247,1.761-5.078,2.81-8.154,2.81c-0.53,0-1.052-0.031-1.566-0.092 c 2.905,1.863, 6.356,2.95, 10.064,2.95c 12.076,0, 18.679-10.004, 18.679-18.68c0-0.285-0.006-0.568-0.019-0.849 C 30.007,8.548, 31.12,7.392, 32,6.076z"></path></g></svg> Share on Twitter</a></div></aside><aside id="jr-rtoc-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Table of Content</div></div><div class="cnt lol f1"><a href="/books/n/helpeutils/?report=reader">Title Information</a><a href="/books/n/helpeutils/toc/?report=reader">Table of Contents Page</a></div></aside><aside id="jr-help-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Settings</div></div><div class="cnt f1"><div id="jr-typo-p" class="typo"><div><a class="sf btn wsprkl">A-</a><a class="lf btn wsprkl">A+</a></div><div><a class="bcol-auto btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 100" preserveAspectRatio="none"><text x="10" y="70" style="font-size:60px;font-family: Trebuchet MS, ArialMT, Arial, sans-serif" textLength="180">AUTO</text></svg></a><a class="bcol-1 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M15,25 85,25zM15,40 85,40zM15,55 85,55zM15,70 85,70z"></path></svg></a><a class="bcol-2 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M5,25 45,25z M55,25 95,25zM5,40 45,40z M55,40 95,40zM5,55 45,55z M55,55 95,55zM5,70 45,70z M55,70 95,70z"></path></svg></a></div></div><div class="lol"><a class="" href="/books/NBK25498/?report=classic">Switch to classic view</a><a href="/books/NBK25498/pdf/Bookshelf_NBK25498.pdf">PDF (106K)</a><a href="/books/n/helpeutils/pdf/">PDF (2.4M)</a><a href="/books/NBK25498/?report=printable">Print View</a></div></div></aside><aside id="jr-bkhelp-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Help</div></div><div class="cnt f1 lol"><a id="jr-helpobj-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/help.xml" href="">Help</a><a href="mailto:info@ncbi.nlm.nih.gov?subject=PubReader%20feedback%20%2F%20NBK25498%20%2F%20sid%3ACE8B5AF87C7FFCB1_0191SID%20%2F%20phid%3ACE8B44AF7C8056C10000000000F300B5.4">Send us feedback</a><a id="jr-about-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/about.xml" href="">About PubReader</a></div></aside><aside id="jr-objectbox" class="thidden hidden"><div class="jr-objectbox-close wsprkl">✘</div><div class="jr-objectbox-inner cnt"><div class="jr-objectbox-drawer"></div></div></aside><nav id="jr-pm-left" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Previous Page</text></svg></nav><nav id="jr-pm-right" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Next Page</text></svg></nav><nav id="jr-fip" class="hidden"><nav id="jr-fip-term-p"><input type="search" placeholder="search this page" id="jr-fip-term" autocorrect="off" autocomplete="off" /><a id="jr-fip-mg" class="wsprkl btn" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-fip-done" class="wsprkl btn" title="Dismiss find">✘</a></nav><nav id="jr-fip-info-p"><a id="jr-fip-prev" class="wsprkl btn" title="Jump to previuos match">◀</a><button id="jr-fip-matches">no matches yet</button><a id="jr-fip-next" class="wsprkl btn" title="Jump to next match">▶</a></nav></nav></div><div id="jr-epub-interstitial" class="hidden"></div><div id="jr-content"><article data-type="main"><div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><div class="fm-sec"><h1 id="_NBK25498_"><span class="title" itemprop="name">Sample Applications of the E-utilities</span></h1><p class="contribs">Sayers E.</p><p class="fm-aai"><a href="#_NBK25498_pubdet_">Publication Details</a></p><p><em>Estimated reading time: 9 minutes</em></p></div></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><div id="chapter3.Introduction"><h2 id="_chapter3_Introduction_">Introduction</h2><p>This chapter presents several examples of how the E-utilities can be used to build useful applications. These examples use Perl to create the E-utility pipelines, and assume that the LWP::Simple module is installed. This module includes the <i>get</i> function that supports HTTP GET requests. One example (Application 4) uses an HTTP POST request, and requires the LWP::UserAgent module. In Perl, scalar variable names are preceded by a "$" symbol, and array names are preceded by a "@". In several instances, results will be stored in such variables for use in subsequent E-utility calls. The code examples here are working programs that can be copied to a text editor and executed directly. Equivalent HTTP requests can be constructed in many modern programming languages; all that is required is the ability to create and post an HTTP request.</p></div><div id="chapter3.Basic_Pipelines"><h2 id="_chapter3_Basic_Pipelines_">Basic Pipelines</h2><p>All E-utility applications consist of a series of calls that we will refer to as a pipeline. The simplest E-utility pipelines consist of two calls, and any arbitrary pipeline can be assembled from these basic building blocks. Many of these pipelines conclude with either ESummary (to retrieve DocSums) or EFetch (to retrieve full records). The comments indicate those portions of the code that are required for either call.</p></div><div id="chapter3.ESearch__ESummaryEFetch"><h2 id="_chapter3_ESearch__ESummaryEFetch_">ESearch – ESummary/EFetch</h2><p><b>Input:</b> Entrez text query</p><p><b>ESummary Output:</b> XML Document Summaries</p><p><b>EFetch Output:</b> Formatted data records (e.g. abstracts, FASTA)</p><pre>use LWP::Simple;<br /><br /># Download PubMed records that are indexed in MeSH for both asthma and <br /># leukotrienes and were also published in 2009.<br /><br />$db = 'pubmed';<br />$query = 'asthma[mesh]+AND+leukotrienes[mesh]+AND+2009[pdat]';<br /><br />#assemble the esearch URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "esearch.fcgi?db=$db&term=$query&usehistory=y";<br /><br />#post the esearch URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />### include this code for ESearch-ESummary<br />#assemble the esummary URL<br />$url = $base . "esummary.fcgi?db=$db&query_key=$key&WebEnv=$web";<br /><br />#post the esummary URL<br />$docsums = get($url);<br />print "$docsums";<br /><br />### include this code for ESearch-EFetch<br />#assemble the efetch URL<br />$url = $base . "efetch.fcgi?db=$db&query_key=$key&WebEnv=$web";<br />$url .= "&rettype=abstract&retmode=text";<br /><br />#post the efetch URL<br />$data = get($url);<br />print "$data";</pre></div><div id="chapter3.EPost__ESummaryEFetch"><h2 id="_chapter3_EPost__ESummaryEFetch_">EPost – ESummary/EFetch</h2><p><b>Input:</b> List of Entrez UIDs (integer identifiers, e.g. PMID, GI, Gene ID)</p><p><b>ESummary Output:</b> XML Document Summaries</p><p><b>EFetch Output:</b> Formatted data records (e.g. abstracts, FASTA)</p><pre>use LWP::Simple;<br /><br /># Download protein records corresponding to a list of GI numbers.<br /><br />$db = 'protein';<br />$id_list = '194680922,50978626,28558982,9507199,6678417';<br /><br />#assemble the epost URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "epost.fcgi?db=$db&id=$id_list";<br /><br />#post the epost URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />### include this code for EPost-ESummary<br />#assemble the esummary URL<br />$url = $base . "esummary.fcgi?db=$db&query_key=$key&WebEnv=$web";<br /><br />#post the esummary URL<br />$docsums = get($url);<br />print "$docsums";<br /><br />### include this code for EPost-EFetch<br />#assemble the efetch URL<br />$url = $base . "efetch.fcgi?db=$db&query_key=$key&WebEnv=$web";<br />$url .= "&rettype=fasta&retmode=text";<br /><br />#post the efetch URL<br />$data = get($url);<br />print "$data";</pre><p><b><i>Note:</i></b>
|
|
<i>To post a large number (more than a few hundred) UIDs in a single URL, please use the HTTP POST method for the EPost call (see</i>
|
|
<a href="#chapter3.Application_4_Finding_unique_se"><i>Application 4</i></a><i>).</i></p></div><div id="chapter3.ELink__ESummaryEfetch"><h2 id="_chapter3_ELink__ESummaryEfetch_">ELink – ESummary/Efetch</h2><p><b>Input:</b> List of Entrez UIDs in database A (integer identifiers, e.g. PMID, GI, Gene ID)</p><p><b>ESummary Output:</b> Linked XML Document Summaries from database B</p><p><b>EFetch Output:</b> Formatted data records (e.g. abstracts, FASTA) from database B</p><pre>use LWP::Simple;<br /><br /># Download gene records linked to a set of proteins corresponding to a list<br /># of GI numbers.<br /><br />$db1 = 'protein'; # &dbfrom<br />$db2 = 'gene'; # &db<br />$linkname = 'protein_gene'; # desired link &linkname<br />#input UIDs in $db1 (protein GIs)<br />$id_list = '194680922,50978626,28558982,9507199,6678417';<br /><br />#assemble the elink URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "elink.fcgi?dbfrom=$db1&db=$db2&id=$id_list";<br />$url .= "&linkname=$linkname&cmd=neighbor_history";<br /><br />#post the elink URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />### include this code for ELink-ESummary<br />#assemble the esummary URL<br />$url = $base . "esummary.fcgi?db=$db&query_key=$key&WebEnv=$web";<br /><br />#post the esummary URL<br />$docsums = get($url);<br />print "$docsums";<br /><br />### include this code for ELink-EFetch<br />#assemble the efetch URL<br />$url = $base . "efetch.fcgi?db=$db2&query_key=$key&WebEnv=$web";<br />$url .= "&rettype=xml&retmode=xml";<br /><br />#post the efetch URL<br />$data = get($url);<br />print "$data";</pre><p><b><i>Notes:</i></b>
|
|
<i>To submit a large number (more than a few hundred) UIDs to ELink in one URL, please use the HTTP POST method for the Elink call (see Application 4). The &linkname parameter is used to force ELink to return only one set of links (one &query_key) to simplify parsing. If more than one link is desired, the above code must be altered to parse the multiple &query_key values from the ELink XML output. This code uses ELink in "batch" mode, in that only one set of gene IDs is returned and the one-to-one correspondence between protein GIs and gene IDs is lost. To preserve this one-to-one correspondence, please see</i>
|
|
<a href="#chapter3.Application_4_Finding_unique_se"><i>Application 4</i></a>
|
|
<i>below.</i></p></div><div id="chapter3.ESearch__ELink__ESummaryEFetch"><h2 id="_chapter3_ESearch__ELink__ESummaryEFetch_">ESearch – ELink – ESummary/EFetch</h2><p><b>Input:</b> Entrez text query in database A</p><p><b>ESummary Output:</b> Linked XML Document Summaries from database B</p><p><b>EFetch Output:</b> Formatted data records (e.g. abstracts, FASTA) from database B</p><pre>use LWP::Simple;<br /># Download protein FASTA records linked to abstracts published <br /># in 2009 that are indexed in MeSH for both asthma and <br /># leukotrienes.<br /><br />$db1 = 'pubmed';<br />$db2 = 'protein';<br />$linkname = 'pubmed_protein';<br />$query = 'asthma[mesh]+AND+leukotrienes[mesh]+AND+2009[pdat]';<br /><br />#assemble the esearch URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "esearch.fcgi?db=$db1&term=$query&usehistory=y";<br />#post the esearch URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web1 = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key1 = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />#assemble the elink URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "elink.fcgi?dbfrom=$db1&db=$db2";<br />$url .= "&query_key=$key1&WebEnv=$web1";<br />$url .= "&linkname=$linkname&cmd=neighbor_history";<br />print "$url\n";<br /><br />#post the elink URL<br />$output = get($url);<br />print "$output\n";<br /><br />#parse WebEnv and QueryKey<br />$web2 = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key2 = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />### include this code for ESearch-ELink-ESummary<br />#assemble the esummary URL<br />$url = $base . "esummary.fcgi?db=$db2&query_key=$key2&WebEnv=$web2";<br />#post the esummary URL<br />$docsums = get($url);<br />print "$docsums";<br /><br />### include this code for ESearch-ELink-EFetch<br />#assemble the efetch URL<br />$url = $base . "efetch.fcgi?db=$db2&query_key=$key2&WebEnv=$web2";<br />$url .= "&rettype=fasta&retmode=text";<br />#post the efetch URL<br />$data = get($url);<br />print "$data";</pre><p><b><i>Notes:</i></b>
|
|
<i>The &linkname parameter is used to force ELink to return only one set of links (one &query_key) to simplify parsing. If more than one link is desired, the above code must be altered to parse the multiple &query_key values from the ELink XML output. This code uses ELink in "batch" mode, in that only one set of PubMed IDs is returned and the one-to-one correspondence between PubMed IDs and their related PubMed IDs is lost. To preserve this one-to-one correspondence, please see</i>
|
|
<a href="#chapter3.Application_4_Finding_unique_se"><i>Application 4</i></a>
|
|
<i>below.</i></p></div><div id="chapter3.EPost__ELink__ESummaryEFetch"><h2 id="_chapter3_EPost__ELink__ESummaryEFetch_">EPost – ELink – ESummary/EFetch</h2><p><b>Input:</b> List of Entrez UIDs (integer identifiers, e.g. PMID, GI, Gene ID) in database A</p><p><b>ESummary Output:</b> Linked XML Document Summaries from database B</p><p><b>EFetch Output:</b> Formatted data records (e.g. abstracts, FASTA) from database B</p><pre>use LWP::Simple;<br /><br /># Downloads gene records linked to a set of proteins corresponding<br /># to a list of protein GI numbers.<br /><br />$db1 = 'protein'; # &dbfrom<br />$db2 = 'gene'; # &db<br />$linkname = 'protein_gene';<br />#input UIDs in $db1 (protein GIs)<br />$id_list = '194680922,50978626,28558982,9507199,6678417';<br /><br />#assemble the epost URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "epost.fcgi?db=$db1&id=$id_list";<br /><br />#post the epost URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web1 = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key1 = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />#assemble the elink URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "elink.fcgi?dbfrom=$db1&db=$db2&query_key=$key1";<br />$url .= "&WebEnv=$web1&linkname=$linkname&cmd=neighbor_history";<br /><br />#post the elink URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web2 = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key2 = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />### include this code for ESearch-ELink-ESummary<br />#assemble the esummary URL<br />$url = $base . "esummary.fcgi?db=$db2&query_key=$key2&WebEnv=$web2";<br /><br />#post the esummary URL<br />$docsums = get($url);<br />print "$docsums";<br /><br />### include this code for ESearch-ELink-EFetch<br />#assemble the efetch URL<br />$url = $base . "efetch.fcgi?db=$db2&query_key=$key2&WebEnv=$web2";<br />$url .= "&rettype=xml&retmode=xml";<br /><br />#post the efetch URL<br />$data = get($url);<br />print "$data";</pre><p><b><i>Notes:</i></b>
|
|
<i>To post a large number (more than a few hundred) UIDs in a single URL, please use the HTTP POST method for the EPost call (see Application 4 below). The &linkname parameter is used to force ELink to return only one set of links (one &query_key) to simplify parsing. If more than one link is desired, the above code must be altered to parse the multiple &query_key values from the ELink XML output. This code uses ELink in "batch" mode, in that only one set of gene IDs is returned and the one-to-one correspondence between protein GIs and Gene IDs is lost. To preserve this one-to-one correspondence, please see</i>
|
|
<a href="#chapter3.Application_4_Finding_unique_se"><i>Application 4</i></a>
|
|
<i>below.</i></p></div><div id="chapter3.EPost__ESearch"><h2 id="_chapter3_EPost__ESearch_">EPost – ESearch</h2><p><b>Input:</b> List of Entrez UIDs (integer identifiers, e.g. PMID, GI, Gene ID)</p><p><b>Output:</b> History set consisting of the subset of posted UIDs that match an Entrez text query</p><pre>use LWP::Simple;<br /><br /># Given an input set of protein GI numbers, this script creates <br /># a history set containing the members of the input set that <br /># correspond to human proteins. <br />#(Which of these proteins are from human?)<br /><br />$db = 'protein';<br />$query = 'human[orgn]';<br />$id_list = '194680922,50978626,28558982,9507199,6678417';<br /><br />#assemble the epost URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "epost.fcgi?db=$db&id=$id_list";<br /><br />#post the epost URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />#assemble the esearch URL<br />$term = "%23$key+AND+$query"; <br /># %23 places a '#' before the query key<br />$url = $base . "esearch.fcgi?db=$db&term=$term";<br />$url .= "&WebEnv=$web&usehistory=y";<br /><br />#post esearch URL<br />$limited = get($url);<br /><br />print "$limited\n";<br /><br /># Output remains on the history server (&query_key, &WebEnv)<br /># Use ESummary or EFetch as above to retrieve them</pre><p><b><i>Note:</i></b>
|
|
<i>To post a large number (more than a few hundred) UIDs in a single URL, please use the HTTP POST method for the EPost call (see</i>
|
|
<a href="#chapter3.Application_4_Finding_unique_se"><i>Application 4</i></a><i>).</i></p></div><div id="chapter3.ELink__ESearch"><h2 id="_chapter3_ELink__ESearch_">ELink – ESearch</h2><p><b>Input:</b> List of Entrez UIDs (integer identifiers, e.g. PMID, GI, Gene ID) in database A</p><p><b>Output:</b> History set consisting of the subset of linked UIDs in database B that match an Entrez text query</p><pre>use LWP::Simple;<br /><br /># Given an input set of protein GI numbers, this script creates a <br /># history set containing the gene IDs linked to members of the input <br /># set that also are on human chromosome X. <br />#(Which of the input proteins are encoded by a gene on human <br /># chromosome X?)<br /><br />$db1 = 'protein'; # &dbfrom<br />$db2 = 'gene'; # &db<br />$linkname = 'protein_gene'; # desired link &linkname<br />$query = 'human[orgn]+AND+x[chr]';<br />#input UIDs in $db1 (protein GIs)<br />$id_list = '148596974,42544182,187937179,4557377,6678417';<br /><br />#assemble the elink URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "elink.fcgi?dbfrom=$db1&db=$db2&id=$id_list";<br />$url .= "&linkname=$linkname&cmd=neighbor_history";<br /><br />#post the elink URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />#assemble the esearch URL<br />$term = "%23$key+AND+$query"; # %23 places a '#' before the query key<br />$url = $base . "esearch.fcgi?db=$db2&term=$term&WebEnv=$web&usehistory=y";<br /><br />#post esearch URL<br />$limited = get($url);<br /><br />print "$limited\n";<br /><br /># Output remains on the history server (&query_key, &WebEnv)<br /># Use ESummary or EFetch as in previous examples to retrieve them</pre><p><b><i>Note:</i></b>
|
|
<i>To submit a large number (more than a few hundred) UIDs to ELink in one URL, please use the HTTP POST method for the Elink call (see</i>
|
|
<a href="#chapter3.Application_4_Finding_unique_se"><i>Application 4</i></a><i>). The &linkname parameter is used to force ELink to return only one set of links (one &query_key) to simplify parsing. If more than one link is desired, the above code must be altered to parse the multiple &query_key values from the ELink XML output. This code uses ELink in "batch" mode, in that only one set of gene IDs is returned and the one-to-one correspondence between protein GIs and Gene IDs is lost. To preserve this one-to-one correspondence, please see</i>
|
|
<a href="#chapter3.Application_4_Finding_unique_se"><i>Application 4</i></a>
|
|
<i>below.</i></p></div><div id="chapter3.Application_1_Converting_GI_num"><h2 id="_chapter3_Application_1_Converting_GI_num_">Application 1: Converting GI numbers to accession numbers</h2><p><b>Goal:</b> Starting with a list of nucleotide GI numbers, prepare a set of corresponding accession numbers.</p><p><b>Solution:</b> Use EFetch with &retttype=acc</p><p><b>Input:</b> $gi_list – comma-delimited list of GI numbers</p><p><b>Output:</b> List of accession numbers.</p><pre>use LWP::Simple;<br />$gi_list = '24475906,224465210,50978625,9507198';<br /><br />#assemble the URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "efetch.fcgi?db=nucleotide&id=$gi_list&rettype=acc";<br /><br />#post the URL<br />$output = get($url);<br />print "$output";</pre><p><b><i>Notes:</i></b>
|
|
<i>The order of the accessions in the output will be the same order as the GI numbers in $gi_list.</i></p></div><div id="chapter3.Application_2_Converting_access"><h2 id="_chapter3_Application_2_Converting_access_">Application 2: Converting accession numbers to data</h2><p><b>Goal:</b> Starting with a list of protein accession numbers, return the sequences in FASTA format.</p><p><b>Solution:</b> Create a string consisting of items separated by 'OR', where each item is an accession number followed by '[accn]'.</p><p>Example: accn1[accn]+OR+accn2[accn]+OR+accn3[accn]+OR+…</p><p>Submit this string as a &term in ESearch, then use EFetch to retrieve the FASTA data.</p><p><b>Input:</b> $acc_list – comma-delimited list of accessions</p><p><b>Output:</b> FASTA data</p><pre>use LWP::Simple;<br />$acc_list = '<a href="/nuccore/927442695" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_009417</a>,<a href="/nuccore/1890295337" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_000547</a>,<a href="/nuccore/350529408" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_001003009</a>,<a href="/nuccore/402766536" class="bk_tag" ref="pagearea=body&targetsite=entrez&targetcat=link&targettype=nuccore">NM_019353</a>';<br />@acc_array = split(/,/, $acc_list);<br /><br />#append [accn] field to each accession<br />for ($i=0; $i < @acc_array; $i++) {<br /> $acc_array[$i] .= "[accn]";<br />}<br /><br />#join the accessions with OR<br />$query = join('+OR+',@acc_array);<br /><br />#assemble the esearch URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "esearch.fcgi?db=nuccore&term=$query&usehistory=y";<br /><br />#post the esearch URL<br />$output = get($url);<br /><br />#parse WebEnv and QueryKey<br />$web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br /><br />#assemble the efetch URL<br />$url = $base . "efetch.fcgi?db=nuccore&query_key=$key&WebEnv=$web";<br />$url .= "&rettype=fasta&retmode=text";<br /><br />#post the efetch URL<br />$fasta = get($url);<br />print "$fasta";</pre><p><b><i>Notes:</i></b>
|
|
<i>For large numbers of accessions, use HTTP POST to submit the esearch request (see</i>
|
|
<a href="#chapter3.Application_4_Finding_unique_se"><i>Application 4</i></a><i>), and see</i>
|
|
<a href="#chapter3.Application_3_Retrieving_large"><i>Application 3</i></a>
|
|
<i>below for downloading the large set in batches.</i></p></div><div id="chapter3.Application_3_Retrieving_large"><h2 id="_chapter3_Application_3_Retrieving_large_">Application 3: Retrieving large datasets</h2><p><b>Goal:</b> Download all chimpanzee mRNA sequences in FASTA format (>50,000 sequences).</p><p><b>Solution:</b> First use ESearch to retrieve the GI numbers for these sequences and post them on the History server, then use multiple EFetch calls to retrieve the data in batches of 500.</p><p><b>Input:</b> $query – chimpanzee[orgn]+AND+biomol+mrna[prop]</p><p><b>Output:</b> A file named "chimp.fna" containing FASTA data.</p><pre>use LWP::Simple;<br />$query = 'chimpanzee[orgn]+AND+biomol+mrna[prop]';<br /><br />#assemble the esearch URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "esearch.fcgi?db=nucleotide&term=$query&usehistory=y";<br /><br />#post the esearch URL<br />$output = get($url);<br /><br />#parse WebEnv, QueryKey and Count (# records retrieved)<br />$web = $1 if ($output =~ /<WebEnv>(\S+)<\/WebEnv>/);<br />$key = $1 if ($output =~ /<QueryKey>(\d+)<\/QueryKey>/);<br />$count = $1 if ($output =~ /<Count>(\d+)<\/Count>/);<br /><br />#open output file for writing<br />open(OUT, ">chimp.fna") || die "Can't open file!\n";<br /><br />#retrieve data in batches of 500<br />$retmax = 500;<br />for ($retstart = 0; $retstart < $count; $retstart += $retmax) {<br /> $efetch_url = $base ."efetch.fcgi?db=nucleotide&WebEnv=$web";<br /> $efetch_url .= "&query_key=$key&retstart=$retstart";<br /> $efetch_url .= "&retmax=$retmax&rettype=fasta&retmode=text";<br /> $efetch_out = get($efetch_url);<br /> print OUT "$efetch_out";<br />}<br />close OUT;</pre></div><div id="chapter3.Application_4_Finding_unique_se"><h2 id="_chapter3_Application_4_Finding_unique_se_">Application 4: Finding unique sets of linked records for each member of a large dataset</h2><p><b>Goal:</b> Download separately the SNP rs numbers (identifiers) for each current gene on human chromosome 20.</p><p><b>Solution:</b> First use ESearch to retrieve the Gene IDs for the genes, and then assemble an ELink URL where each Gene ID is submitted as a separate &id parameter.</p><p><b>Input:</b> $query – human[orgn]+AND+20[chr]+AND+alive[prop]</p><p><b>Output:</b> A file named "snp_table" containing on each line the gene id followed by a colon (":") followed by a comma-delimited list of the linked SNP rs numbers.</p><pre>use LWP::Simple;<br />use LWP::UserAgent;<br />$query = 'human[orgn]+AND+20[chr]+AND+alive[prop]';<br />$db1 = 'gene';<br />$db2 = 'snp';<br />$linkname = 'gene_snp';<br /><br />#assemble the esearch URL<br />$base = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/';<br />$url = $base . "esearch.fcgi?db=$db1&term=$query&usehistory=y&retmax=5000";<br /><br />#post the esearch URL<br />$output = get($url);<br /><br />#parse IDs retrieved<br />while ($output =~ /<Id>(\d+?)<\/Id>/sg) {<br /> push(@ids, $1);<br />}<br /><br />#assemble the elink URL as an HTTP POST call<br />$url = $base . "elink.fcgi";<br /><br />$url_params = "dbfrom=$db1&db=$db2&linkname=$linkname";<br />foreach $id (@ids) { <br /> $url_params .= "&id=$id";<br />}<br /><br />#create HTTP user agent<br />$ua = new LWP::UserAgent;<br />$ua->agent("elink/1.0 " . $ua->agent);<br /><br />#create HTTP request object<br />$req = new HTTP::Request POST => "$url";<br />$req->content_type('application/x-www-form-urlencoded');<br />$req->content("$url_params");<br /><br />#post the HTTP request<br />$response = $ua->request($req); <br />$output = $response->content;<br /><br />open (OUT, ">snp_table") || die "Can't open file!\n";<br /><br />while ($output =~ /<LinkSet>(.*?)<\/LinkSet>/sg) {<br /><br /> $linkset = $1;<br /> if ($linkset =~ /<IdList>(.*?)<\/IdList>/sg) {<br /> $input = $1;<br /> $input_id = $1 if ($input =~ /<Id>(\d+)<\/Id>/sg); <br /> }<br /><br /> while ($linkset =~ /<Link>(.*?)<\/Link>/sg) {<br /> $link = $1;<br /> push (@output, $1) if ($link =~ /<Id>(\d+)<\/Id>/);<br /> }<br /> <br /> print OUT "$input_id:" . join(',', @output) . "\n";<br /> <br />}<br /><br />close OUT;</pre><p><b><i>Notes:</i></b>
|
|
<i>This example uses an HTTP POST request for the elink call, as the number of Gene IDs is over 500. The &retmax parameter in the ESearch call is set to 5000, as this is a reasonable limit to the number of IDs to send to ELink in one request (if you send 5000 IDs, you are effectively performing 5000 ELink operations). If you need to link more than 5000 records, add &retstart to the ESearch call and repeat the entire procedure for each batch of 5000 IDs, incrementing &retstart for each batch.</i></p></div><div id="chapter3.Demonstration_Programs"><h2 id="_chapter3_Demonstration_Programs_">Demonstration Programs</h2><p>Please see <a href="/books/n/helpeutils/chapter1/?report=reader#chapter1.Demonstration_Programs">Chapter 1</a> for sample Perl scripts.</p></div><div id="chapter3.For_More_Information"><h2 id="_chapter3_For_More_Information_">For More Information</h2><p>Please see <a href="/books/n/helpeutils/chapter1/?report=reader#chapter1.For_More_Information_8">Chapter 1</a> for getting additional information about the E-utilities.</p></div><div id="bk_toc_contnr"></div></div></div><div class="fm-sec"><h2 id="_NBK25498_pubdet_">Publication Details</h2><h3>Author Information and Affiliations</h3><p class="contrib-group"><h4>Authors</h4><span itemprop="author">Eric Sayers</span>, PhD<sup><img src="/corehtml/pmc/pmcgifs/corrauth.gif" alt="corresponding author" /></sup><sup>1</sup>.</p><h4>Affiliations</h4><div class="affiliation"><sup>1</sup> NCBI<div><span class="email-label">Email: </span><a href="mailto:dev@null" data-email="vog.hin.mln.ibcn@sreyas" class="oemail">vog.hin.mln.ibcn@sreyas</a></div></div><div><sup><img src="/corehtml/pmc/pmcgifs/corrauth.gif" alt="corresponding author" /></sup>Corresponding author.</div><h3>Publication History</h3><p class="small">Created: <span itemprop="datePublished">April 24, 2009</span>; Last Update: <span itemprop="dateModified">November 1, 2017</span>.</p><h3>Copyright</h3><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div></div><h3>Publisher</h3><p><a href="http://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&targetsite=external&targetcat=link&targettype=publisher">National Center for Biotechnology Information (US)</a>, Bethesda (MD)</p><h3>NLM Citation</h3><p>Sayers E. Sample Applications of the E-utilities. 2009 Apr 24 [Updated 2017 Nov 1]. In: Entrez Programming Utilities Help [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2010-. <span class="bk_cite_avail"></span></p></div><div class="small-screen-prev"><a href="/books/n/helpeutils/chapter2/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="small-screen-next"><a href="/books/n/helpeutils/chapter4/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div></article></div><div id="jr-scripts"><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/libs.min.js"> </script><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.min.js"> </script></div></div>
|
|
|
|
|
|
|
|
|
|
<!-- Book content -->
|
|
|
|
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js"> </script>
|
|
|
|
|
|
<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal104 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
|
|
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
|
|
|
|
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3968615.js" snapshot="books"></script></body>
|
|
</html>
|