nih-gov/www.ncbi.nlm.nih.gov/research/bionlp/APIs/index.html

435 lines
No EOL
20 KiB
HTML

<!DOCTYPE html>
<html lang="en" >
<head >
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<!-- Mobile properties -->
<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- Stylesheets -->
<link href="/research/bionlp/static/django_uswds/uswds/css/uswds.css" rel="stylesheet" />
<title>
Text Mining Web APIs - NIH
</title>
<link rel="stylesheet" href="/research/bionlp/static/main/css/uswds.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/header.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/footer.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/form.css">
<!-- Labs template -->
<link rel="stylesheet" href="/research/bionlp/static/main/css/atoms.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/docsum.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/media.css">
<!-- Additional template -->
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.molecules.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/custom.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.journal-page.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/featherlight.min.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/styles.css">
<!--[if lt IE 9]>
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs-legacy.css">
<![endif]-->
<!-- Some JS -->
<script src="/research/bionlp/static/main/js/jquery.js"></script>
<script src="/research/bionlp/static/main/js/modernizr.js"></script>
<script src="/research/bionlp/static/main/js/featherlight.min.js"></script>
<script src="/research/bionlp/static/main/js/custom.js"></script>
</head>
<body >
<div>
<a class="skipnav" href="#maincontent">
Skip to main page content
</a>
<header class="ncbi-page-header" role="banner">
<div class="prefix">
<span class="nih" title="National Institutes of Health">
<a href="https://www.nih.gov/" title="To NIH homepage">
<img src="/research/bionlp/static/base/images/nih-logo-header.svg" alt="NIH">
</a>
</span>
<span class="nlm">
<a href="https://www.nlm.nih.gov/" title="To NLM homepage">U.S. National Library of Medicine</a>
</span>
</div>
<div class="ncbi">
<!-- <abbr class="abbr">
<a href="https://www.ncbi.nlm.nih.gov/" title="To NCBI homepage">NCBI</a>
</abbr>
<span class="name">
<a href="https://www.ncbi.nlm.nih.gov/" accesskey="1" title="To NCBI homepage">National Center for Biotechnology Information</a>
</span> -->
<!-- <abbr class="abbr">
<a href="https://www.nlm.nih.gov/research/index.html" title="To DIR homepage">DIR</a>
</abbr> -->
<span class="name">
<a href="https://www.nlm.nih.gov/research/index.html" accesskey="1" title="To DIR homepage">Division of Intramural Research</a>
</span>
<div class="right">
<a id="in" href="/research/bionlp/accounts/login/?next=/research/bionlp/">Log in</a>
</div>
</div>
</header>
<!--app-specific header, something that might want to take full width of screen -->
<a class="skipnav" href="#maincontent">
Skip to main page content
</a>
<div class="breadcrumbs-container menu">
<div class="usa-grid-full">
<ul class="topnav" accesskey="4">
<li class="current">
<a href="/research/bionlp/" title="Home">
Home
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Zhiyong-Lu" title="Zhiyong Lu">
Zhiyong Lu
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/News" title="Media">
Media
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Team" title="Team">
Team
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Research" title="Research">
Research
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Publications/" title="Publications">
Publications
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Tools/" title="Tools">
Tools
</a>
</li>
<li>
<a href="/research/bionlp/APIs/" title="Tools">
Web APIs
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Data/" title="Data">
AI Datasets
</a>
</li>
<li>
<a href="/research/bionlp/Visiting-us" title="Visiting us">
Visiting us
</a>
</li>
<li class="icon">
<a href="#">&#9776;</a>
</li>
</ul>
</div>
</div>
<!-- asign css class in case app will need to alter styles of this div -->
<div id="maincontent" class="usa-grid-full ncbi-base-page-container">
<div class="labs-pagecontent">
<div class="usa-width-one-whole">
<main class="usa-grid journals-lists">
<div>
<h3>Web APIs</h3>
<main class="usa-width-one-whole journal-container">
<div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>How to use</h4>
<div class="usa-width-one-whole">
<p>
<a href="/research/bionlp/APIs/authors/">API for the PubMed Computed Authors</a><br/>
<a href="https://www.ncbi.nlm.nih.gov/research/pubtator3/api" target="_blank">PubTator APIs usage</a><br/>
<a href="https://www.ncbi.nlm.nih.gov/research/pubtator3/api" target="_blank">PubTator APIs with curl</a><br/>
<a href="/research/bionlp/APIs/BioC-PMC/">API for PubMed Central Open Access in BioC
format</a><br/>
<a href="/research/bionlp/APIs/BioC-PubMed/">API for PubMed in BioC format</a><br/>
<a href="/research/bionlp/APIs/SupplMat/">API for PMC Supplementary Materials in BioC format</a><br/>
<a href="/research/bionlp/APIs/format/">Format description</a>
</p>
</div>
</div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Publication</h4>
<div class="usa-width-one-whole">
<p>
Chih-Hsuan Wei, Robert Leaman, Zhiyong Lu (2016). <a
href="http://bioinformatics.oxfordjournals.org/content/early/2016/02/29/bioinformatics.btv760"
target="_blank">Beyond accuracy: Creating interoperable and s
calable text mining web services</a>, Bioinformatics, DOI:10.1093/bioinformatics/btv760.
</p>
</div>
</div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Introduction</h4>
<div class="usa-width-one-whole">
<p>
We report here our recently developed web-based text mining services for biomedical concept
recognition and normalization. The below Figure describes the overall architecture of our web
services, which use sta
ndard HTTP method calls (often known as RESTful services) and allow two access modes: A) a
batch-oriented processing function for any arbitrary text input (abstract, full text, patent,
etc),
submitted via HTTP POST an
d B) instant retrieval of pre-tagged results of PubMed abstracts via HTTP GET. For the batch
processing function, users may submit one or multiple documents per batch, and large requests
will
be sent to a computer clu
ster for parallel processing. When retrieving pre-tagged results of PubMed abstracts, the
request
only requires the PMIDs of the requested abstracts. This option is provided because annotating
biomedical literature is
the most common use case for such a text-mining service. From a technical stand-point, the
preprocessing is made possible by our previous system PubTator, which stores text-mined
annotations
for every article in PubM
ed and keeps in sync with PubMed via nightly updates.
</p>
<div class="figure">
<a href="#" data-featherlight="/research/bionlp/static/main/images/tools/RESTfulAPI.png"><img
src="/research/bionlp/static/main/images/tools/RESTfulAPI.png"
alt="Overview of the NCBI text mining web services"/></a>
<span><b>Figure 1.</b> Overview of the NCBI text mining web services.</span>
</div>
</div>
</div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Performance</h4>
<div class="usa-width-one-whole">
<table class="customtable">
<tbody>
<tr>
<td><strong>Taggers</strong></td>
<td align="center"><strong>Bioconcepts</strong></td>
<td align="center"><strong>Evaluation corpus</strong></td>
<td align="center"><strong>Precision</strong></td>
<td align="center"><strong>Recall</strong></td>
<td align="center"><strong>F-measure</strong></td>
</tr>
<tr>
<td><a href="/bionlp/Tools/gnormplus" target="_blank">GNormPlus (Wei et al., 2015)</a></td>
<td align="center">Gene</td>
<td align="center"><a
href="http://www.biocreative.org/tasks/biocreative-ii/task-1b-human-gene-normalizati/"
target="_blank">BioCreative II - GN
</a></td>
<td align="center">87.08%</td>
<td align="center">86.41%</td>
<td align="center">86.74%</td>
</tr>
<tr>
<td><a href="/bionlp/Tools/tmchem" target="_blank">tmChem (Leaman et al., 2014)</a></td>
<td align="center">Chemical</td>
<td align="center"><a
href="http://www.biocreative.org/resources/biocreative-iv/chemdner-corpus/"
target="_blank">CHEMDNER</a></td>
<td align="center">89.09%</td>
<td align="center">85.75%</td>
<td align="center">87.39%</td>
</tr>
<tr>
<td><a href="/bionlp/Tools/dnorm" target="_blank">DNorm (Leaman et al., 2013)</a></td>
<td align="center">Disease</td>
<td align="center"><a href="/CBBresearch/Dogan/DISEASE/"
target="_blank">NCBI Disease corpus</a></td>
<td align="center">80.30%</td>
<td align="center">76.30%</td>
<td align="center">78.20%</td>
</tr>
<tr>
<td><a href="/bionlp/Tools/tmvar" target="_blank">tmVar (Wei et al., 2013)</a></td>
<td align="center">Mutation</td>
<td align="center"><a
href="http://sourceforge.net/projects/mutationfinder/files/latest/download"
target="_blank">MutationFinder</a></td>
<td align="center">98.80%</td>
<td align="center">89.62%</td>
<td align="center">93.98%</td>
</tr>
<tr>
<td><a href="/bionlp/Tools/sr4gn" target="_blank">SR4GN (Wei et al., 2012)</a></td>
<td align="center">Species</td>
<td align="center"><a
href="http://sourceforge.net/projects/linnaeus/files/Corpora/manual-corpus-species-1.0.tar.gz/download"
target="_blank">Linnaeus corpus</a></td>
<td align="center">85.82%</td>
<td align="center">85.28%</td>
<td align="center">85.55%</td>
</tr>
</tbody>
</table>
<span><b>Table 1.</b> Results of our individual taggers when benchmarked on public test collections.</span>
<table class="customtable">
<tbody>
<tr>
<td><strong>Taggers</strong></td>
<td align="center">HTTP Submission Method</td>
<td align="center">Description</td>
<td align="center">Throughput<br>(seconds per article; averaged over 5000 articles)</td>
</tr>
<tr>
<td>PubTator</td>
<td align="center">GET</td>
<td align="center"><p class="style4" nowrap="">Return Pre-Annotations</td>
<td align="center">0.044s</td>
</tr>
<tr>
<td nowrap="">GNormPlus<sup>*</sup></td>
<td align="center">POST</td>
<td align="center" rowspan="4">On-Demand Processing</td>
<td align="center">0.127s</td>
</tr>
<tr>
<td>tmChem</td>
<td align="center">POST</td>
<td align="center">0.008s</td>
</tr>
<tr>
<td>DNorm</td>
<td align="center">POST</td>
<td align="center">0.007s</td>
</tr>
<tr>
<td>tmVar</td>
<td align="center">POST</td>
<td align="center">0.090s</td>
</tr>
</tbody>
</table>
<span><b>Table 2.</b> Estimated processing time of our web services on NCBI clusters (subject to availability).<br/>
* SR4GN is used by GNormPlus. The results of GNormPlus include both gene and species.
</span>
</div>
</div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Supplementary</h4>
<div class="usa-width-one-whole">
<p>
We provide below several sample codes to show how to use our RESTful API service via
programs.<br/>
<a href="/CBBresearch/Lu/Demo/tmTools/download/RESTfulAPI.client.zip"
target="_blank">RESTful sample codes avaliable in Perl, Python and Java.</a><br/>
We also provide a <a
href="/CBBresearch/Lu/Demo/tmTools/download/FormatChecking.zip"
target="_blank">script for input formats (i.e., BioC, PubTator and JSON) checking.</a>
</p>
</div>
</div>
</div>
</main>
</div>
</main>
</div>
</div>
</div>
<footer class="usa-footer usa-footer-big ncbi-footer" role="contentinfo">
<div class="usa-grid">
<div class="usa-row">
<div class="usa-width-one-half">
<div>
<div class="org-section">
<a href="https://www.hhs.gov/"><img class="usa-footer-logo-img hhs-logo"
src="/research/bionlp/static/base/images/dhhs-logo-white.svg"
alt="U.S. Department of Health & Human Services">
<span class="usa-sr-only">Department of Health and Human Services</span></a>
<a href="https://www.nih.gov/"><img class="usa-footer-logo-img nih-logo"
src="/research/bionlp/static/base/images/nih-logo-white.svg"
alt="National Institutes of Health">
<span class="usa-sr-only">National Institutes of Health</span></a>
<a href="https://www.nlm.nih.gov/"><img class="usa-footer-logo-img nlm-logo"
src="/research/bionlp/static/base/images/nlm-logo-letters-white.svg"
alt="National Library of Medicine">
<span class="usa-sr-only">National Library of Medicine</span></a>
<a href="https://www.usa.gov/"><img class="usa-footer-logo-img usagov-logo"
src="/research/bionlp/static/base/images/usagov-logo-white.svg"
alt="USA.gov"/>
<span class="usa-sr-only">USA.gov</span></a>
</div>
</div>
</div>
<div class="usa-width-one-half">
<div>
<p class="about-links">
<a href="https://www.nlm.nih.gov/research/index.html">About DIR</a>
<a href="https://www.nlm.nih.gov/web_policies.html">Web Policies</a></p>
</div>
</div>
</div>
</div>
</footer>
</div>
<!-- JavaScript -->
<script src="/research/bionlp/static/django_uswds/uswds/js/uswds.js"></script>
<script type="text/javascript" src="/research/bionlp/static/base/header.js"></script>
</body>
</html>