435 lines
No EOL
20 KiB
HTML
435 lines
No EOL
20 KiB
HTML
<!DOCTYPE html>
|
|
|
|
|
|
|
|
|
|
<html lang="en" >
|
|
<head >
|
|
<meta charset="UTF-8">
|
|
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
|
|
|
<!-- Mobile properties -->
|
|
<meta name="HandheldFriendly" content="True">
|
|
<meta name="MobileOptimized" content="320">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
|
|
|
|
<!-- Stylesheets -->
|
|
|
|
<link href="/research/bionlp/static/django_uswds/uswds/css/uswds.css" rel="stylesheet" />
|
|
|
|
|
|
|
|
<title>
|
|
Text Mining Web APIs - NIH
|
|
</title>
|
|
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/uswds.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/header.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/footer.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/form.css">
|
|
|
|
<!-- Labs template -->
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/atoms.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/docsum.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/media.css">
|
|
|
|
<!-- Additional template -->
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.molecules.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/custom.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.journal-page.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/featherlight.min.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/styles.css">
|
|
<!--[if lt IE 9]>
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs-legacy.css">
|
|
<![endif]-->
|
|
|
|
<!-- Some JS -->
|
|
<script src="/research/bionlp/static/main/js/jquery.js"></script>
|
|
<script src="/research/bionlp/static/main/js/modernizr.js"></script>
|
|
<script src="/research/bionlp/static/main/js/featherlight.min.js"></script>
|
|
<script src="/research/bionlp/static/main/js/custom.js"></script>
|
|
|
|
|
|
|
|
|
|
</head>
|
|
<body >
|
|
|
|
|
|
<div>
|
|
<a class="skipnav" href="#maincontent">
|
|
Skip to main page content
|
|
</a>
|
|
<header class="ncbi-page-header" role="banner">
|
|
<div class="prefix">
|
|
<span class="nih" title="National Institutes of Health">
|
|
<a href="https://www.nih.gov/" title="To NIH homepage">
|
|
<img src="/research/bionlp/static/base/images/nih-logo-header.svg" alt="NIH">
|
|
</a>
|
|
</span>
|
|
<span class="nlm">
|
|
<a href="https://www.nlm.nih.gov/" title="To NLM homepage">U.S. National Library of Medicine</a>
|
|
</span>
|
|
</div>
|
|
|
|
<div class="ncbi">
|
|
<!-- <abbr class="abbr">
|
|
<a href="https://www.ncbi.nlm.nih.gov/" title="To NCBI homepage">NCBI</a>
|
|
</abbr>
|
|
<span class="name">
|
|
<a href="https://www.ncbi.nlm.nih.gov/" accesskey="1" title="To NCBI homepage">National Center for Biotechnology Information</a>
|
|
</span> -->
|
|
<!-- <abbr class="abbr">
|
|
<a href="https://www.nlm.nih.gov/research/index.html" title="To DIR homepage">DIR</a>
|
|
</abbr> -->
|
|
<span class="name">
|
|
<a href="https://www.nlm.nih.gov/research/index.html" accesskey="1" title="To DIR homepage">Division of Intramural Research</a>
|
|
</span>
|
|
<div class="right">
|
|
|
|
<a id="in" href="/research/bionlp/accounts/login/?next=/research/bionlp/">Log in</a>
|
|
|
|
</div>
|
|
</div>
|
|
</header>
|
|
|
|
<!--app-specific header, something that might want to take full width of screen -->
|
|
|
|
<a class="skipnav" href="#maincontent">
|
|
Skip to main page content
|
|
</a>
|
|
|
|
<div class="breadcrumbs-container menu">
|
|
<div class="usa-grid-full">
|
|
<ul class="topnav" accesskey="4">
|
|
<li class="current">
|
|
<a href="/research/bionlp/" title="Home">
|
|
Home
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Zhiyong-Lu" title="Zhiyong Lu">
|
|
Zhiyong Lu
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/News" title="Media">
|
|
Media
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Team" title="Team">
|
|
Team
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Research" title="Research">
|
|
Research
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Publications/" title="Publications">
|
|
Publications
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Tools/" title="Tools">
|
|
Tools
|
|
</a>
|
|
</li>
|
|
<li>
|
|
<a href="/research/bionlp/APIs/" title="Tools">
|
|
Web APIs
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Data/" title="Data">
|
|
AI Datasets
|
|
</a>
|
|
</li>
|
|
<li>
|
|
<a href="/research/bionlp/Visiting-us" title="Visiting us">
|
|
Visiting us
|
|
</a>
|
|
</li>
|
|
|
|
<li class="icon">
|
|
<a href="#">☰</a>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<!-- asign css class in case app will need to alter styles of this div -->
|
|
<div id="maincontent" class="usa-grid-full ncbi-base-page-container">
|
|
<div class="labs-pagecontent">
|
|
<div class="usa-width-one-whole">
|
|
<main class="usa-grid journals-lists">
|
|
|
|
|
|
|
|
<div>
|
|
<h3>Web APIs</h3>
|
|
<main class="usa-width-one-whole journal-container">
|
|
<div>
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>How to use</h4>
|
|
<div class="usa-width-one-whole">
|
|
<p>
|
|
<a href="/research/bionlp/APIs/authors/">API for the PubMed Computed Authors</a><br/>
|
|
<a href="https://www.ncbi.nlm.nih.gov/research/pubtator3/api" target="_blank">PubTator APIs usage</a><br/>
|
|
<a href="https://www.ncbi.nlm.nih.gov/research/pubtator3/api" target="_blank">PubTator APIs with curl</a><br/>
|
|
<a href="/research/bionlp/APIs/BioC-PMC/">API for PubMed Central Open Access in BioC
|
|
format</a><br/>
|
|
<a href="/research/bionlp/APIs/BioC-PubMed/">API for PubMed in BioC format</a><br/>
|
|
<a href="/research/bionlp/APIs/SupplMat/">API for PMC Supplementary Materials in BioC format</a><br/>
|
|
<a href="/research/bionlp/APIs/format/">Format description</a>
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Publication</h4>
|
|
<div class="usa-width-one-whole">
|
|
<p>
|
|
Chih-Hsuan Wei, Robert Leaman, Zhiyong Lu (2016). <a
|
|
href="http://bioinformatics.oxfordjournals.org/content/early/2016/02/29/bioinformatics.btv760"
|
|
target="_blank">Beyond accuracy: Creating interoperable and s
|
|
calable text mining web services</a>, Bioinformatics, DOI:10.1093/bioinformatics/btv760.
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Introduction</h4>
|
|
<div class="usa-width-one-whole">
|
|
<p>
|
|
We report here our recently developed web-based text mining services for biomedical concept
|
|
recognition and normalization. The below Figure describes the overall architecture of our web
|
|
services, which use sta
|
|
ndard HTTP method calls (often known as RESTful services) and allow two access modes: A) a
|
|
batch-oriented processing function for any arbitrary text input (abstract, full text, patent,
|
|
etc),
|
|
submitted via HTTP POST an
|
|
d B) instant retrieval of pre-tagged results of PubMed abstracts via HTTP GET. For the batch
|
|
processing function, users may submit one or multiple documents per batch, and large requests
|
|
will
|
|
be sent to a computer clu
|
|
ster for parallel processing. When retrieving pre-tagged results of PubMed abstracts, the
|
|
request
|
|
only requires the PMIDs of the requested abstracts. This option is provided because annotating
|
|
biomedical literature is
|
|
the most common use case for such a text-mining service. From a technical stand-point, the
|
|
preprocessing is made possible by our previous system PubTator, which stores text-mined
|
|
annotations
|
|
for every article in PubM
|
|
ed and keeps in sync with PubMed via nightly updates.
|
|
</p>
|
|
<div class="figure">
|
|
<a href="#" data-featherlight="/research/bionlp/static/main/images/tools/RESTfulAPI.png"><img
|
|
src="/research/bionlp/static/main/images/tools/RESTfulAPI.png"
|
|
alt="Overview of the NCBI text mining web services"/></a>
|
|
<span><b>Figure 1.</b> Overview of the NCBI text mining web services.</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Performance</h4>
|
|
<div class="usa-width-one-whole">
|
|
<table class="customtable">
|
|
<tbody>
|
|
<tr>
|
|
<td><strong>Taggers</strong></td>
|
|
<td align="center"><strong>Bioconcepts</strong></td>
|
|
<td align="center"><strong>Evaluation corpus</strong></td>
|
|
<td align="center"><strong>Precision</strong></td>
|
|
<td align="center"><strong>Recall</strong></td>
|
|
<td align="center"><strong>F-measure</strong></td>
|
|
</tr>
|
|
<tr>
|
|
<td><a href="/bionlp/Tools/gnormplus" target="_blank">GNormPlus (Wei et al., 2015)</a></td>
|
|
<td align="center">Gene</td>
|
|
<td align="center"><a
|
|
href="http://www.biocreative.org/tasks/biocreative-ii/task-1b-human-gene-normalizati/"
|
|
target="_blank">BioCreative II - GN
|
|
</a></td>
|
|
<td align="center">87.08%</td>
|
|
<td align="center">86.41%</td>
|
|
<td align="center">86.74%</td>
|
|
</tr>
|
|
<tr>
|
|
<td><a href="/bionlp/Tools/tmchem" target="_blank">tmChem (Leaman et al., 2014)</a></td>
|
|
<td align="center">Chemical</td>
|
|
<td align="center"><a
|
|
href="http://www.biocreative.org/resources/biocreative-iv/chemdner-corpus/"
|
|
target="_blank">CHEMDNER</a></td>
|
|
<td align="center">89.09%</td>
|
|
<td align="center">85.75%</td>
|
|
<td align="center">87.39%</td>
|
|
</tr>
|
|
<tr>
|
|
<td><a href="/bionlp/Tools/dnorm" target="_blank">DNorm (Leaman et al., 2013)</a></td>
|
|
<td align="center">Disease</td>
|
|
<td align="center"><a href="/CBBresearch/Dogan/DISEASE/"
|
|
target="_blank">NCBI Disease corpus</a></td>
|
|
|
|
<td align="center">80.30%</td>
|
|
<td align="center">76.30%</td>
|
|
<td align="center">78.20%</td>
|
|
</tr>
|
|
<tr>
|
|
<td><a href="/bionlp/Tools/tmvar" target="_blank">tmVar (Wei et al., 2013)</a></td>
|
|
<td align="center">Mutation</td>
|
|
<td align="center"><a
|
|
href="http://sourceforge.net/projects/mutationfinder/files/latest/download"
|
|
target="_blank">MutationFinder</a></td>
|
|
<td align="center">98.80%</td>
|
|
<td align="center">89.62%</td>
|
|
<td align="center">93.98%</td>
|
|
</tr>
|
|
<tr>
|
|
<td><a href="/bionlp/Tools/sr4gn" target="_blank">SR4GN (Wei et al., 2012)</a></td>
|
|
<td align="center">Species</td>
|
|
<td align="center"><a
|
|
href="http://sourceforge.net/projects/linnaeus/files/Corpora/manual-corpus-species-1.0.tar.gz/download"
|
|
target="_blank">Linnaeus corpus</a></td>
|
|
<td align="center">85.82%</td>
|
|
<td align="center">85.28%</td>
|
|
<td align="center">85.55%</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<span><b>Table 1.</b> Results of our individual taggers when benchmarked on public test collections.</span>
|
|
|
|
<table class="customtable">
|
|
<tbody>
|
|
<tr>
|
|
<td><strong>Taggers</strong></td>
|
|
<td align="center">HTTP Submission Method</td>
|
|
<td align="center">Description</td>
|
|
<td align="center">Throughput<br>(seconds per article; averaged over 5000 articles)</td>
|
|
</tr>
|
|
<tr>
|
|
<td>PubTator</td>
|
|
<td align="center">GET</td>
|
|
<td align="center"><p class="style4" nowrap="">Return Pre-Annotations</td>
|
|
<td align="center">0.044s</td>
|
|
</tr>
|
|
<tr>
|
|
<td nowrap="">GNormPlus<sup>*</sup></td>
|
|
<td align="center">POST</td>
|
|
<td align="center" rowspan="4">On-Demand Processing</td>
|
|
<td align="center">0.127s</td>
|
|
</tr>
|
|
<tr>
|
|
<td>tmChem</td>
|
|
<td align="center">POST</td>
|
|
<td align="center">0.008s</td>
|
|
</tr>
|
|
<tr>
|
|
<td>DNorm</td>
|
|
<td align="center">POST</td>
|
|
<td align="center">0.007s</td>
|
|
</tr>
|
|
<tr>
|
|
<td>tmVar</td>
|
|
<td align="center">POST</td>
|
|
<td align="center">0.090s</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<span><b>Table 2.</b> Estimated processing time of our web services on NCBI clusters (subject to availability).<br/>
|
|
* SR4GN is used by GNormPlus. The results of GNormPlus include both gene and species.
|
|
</span>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Supplementary</h4>
|
|
<div class="usa-width-one-whole">
|
|
<p>
|
|
We provide below several sample codes to show how to use our RESTful API service via
|
|
programs.<br/>
|
|
<a href="/CBBresearch/Lu/Demo/tmTools/download/RESTfulAPI.client.zip"
|
|
target="_blank">RESTful sample codes avaliable in Perl, Python and Java.</a><br/>
|
|
We also provide a <a
|
|
href="/CBBresearch/Lu/Demo/tmTools/download/FormatChecking.zip"
|
|
target="_blank">script for input formats (i.e., BioC, PubTator and JSON) checking.</a>
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</main>
|
|
</div>
|
|
|
|
</main>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<footer class="usa-footer usa-footer-big ncbi-footer" role="contentinfo">
|
|
<div class="usa-grid">
|
|
<div class="usa-row">
|
|
<div class="usa-width-one-half">
|
|
<div>
|
|
<div class="org-section">
|
|
<a href="https://www.hhs.gov/"><img class="usa-footer-logo-img hhs-logo"
|
|
src="/research/bionlp/static/base/images/dhhs-logo-white.svg"
|
|
alt="U.S. Department of Health & Human Services">
|
|
<span class="usa-sr-only">Department of Health and Human Services</span></a>
|
|
<a href="https://www.nih.gov/"><img class="usa-footer-logo-img nih-logo"
|
|
src="/research/bionlp/static/base/images/nih-logo-white.svg"
|
|
alt="National Institutes of Health">
|
|
<span class="usa-sr-only">National Institutes of Health</span></a>
|
|
<a href="https://www.nlm.nih.gov/"><img class="usa-footer-logo-img nlm-logo"
|
|
src="/research/bionlp/static/base/images/nlm-logo-letters-white.svg"
|
|
alt="National Library of Medicine">
|
|
<span class="usa-sr-only">National Library of Medicine</span></a>
|
|
<a href="https://www.usa.gov/"><img class="usa-footer-logo-img usagov-logo"
|
|
src="/research/bionlp/static/base/images/usagov-logo-white.svg"
|
|
alt="USA.gov"/>
|
|
<span class="usa-sr-only">USA.gov</span></a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="usa-width-one-half">
|
|
<div>
|
|
<p class="about-links">
|
|
<a href="https://www.nlm.nih.gov/research/index.html">About DIR</a>
|
|
<a href="https://www.nlm.nih.gov/web_policies.html">Web Policies</a></p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
</div>
|
|
|
|
|
|
|
|
<!-- JavaScript -->
|
|
|
|
<script src="/research/bionlp/static/django_uswds/uswds/js/uswds.js"></script>
|
|
|
|
|
|
|
|
|
|
<script type="text/javascript" src="/research/bionlp/static/base/header.js"></script>
|
|
|
|
</body>
|
|
</html> |