nih-gov/www.ncbi.nlm.nih.gov/bionlp/Tools/gnormplus

389 lines
No EOL
17 KiB
Text

<!DOCTYPE html>
<html lang="en" >
<head >
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<!-- Mobile properties -->
<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- Stylesheets -->
<link href="/research/bionlp/static/django_uswds/uswds/css/uswds.css" rel="stylesheet" />
<title>
GNormPlus: An Integrative Approach for Tagging Gene, Gene Family and Protein Domain
</title>
<link rel="stylesheet" href="/research/bionlp/static/main/css/uswds.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/header.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/footer.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/form.css">
<!-- Labs template -->
<link rel="stylesheet" href="/research/bionlp/static/main/css/atoms.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/docsum.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/media.css">
<!-- Additional template -->
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.molecules.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/custom.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.journal-page.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/featherlight.min.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/styles.css">
<!--[if lt IE 9]>
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs-legacy.css">
<![endif]-->
<!-- Some JS -->
<script src="/research/bionlp/static/main/js/jquery.js"></script>
<script src="/research/bionlp/static/main/js/modernizr.js"></script>
<script src="/research/bionlp/static/main/js/featherlight.min.js"></script>
<script src="/research/bionlp/static/main/js/custom.js"></script>
</head>
<body >
<div>
<a class="skipnav" href="#maincontent">
Skip to main page content
</a>
<header class="ncbi-page-header" role="banner">
<div class="prefix">
<span class="nih" title="National Institutes of Health">
<a href="https://www.nih.gov/" title="To NIH homepage">
<img src="/research/bionlp/static/base/images/nih-logo-header.svg" alt="NIH">
</a>
</span>
<span class="nlm">
<a href="https://www.nlm.nih.gov/" title="To NLM homepage">U.S. National Library of Medicine</a>
</span>
</div>
<div class="ncbi">
<!-- <abbr class="abbr">
<a href="https://www.ncbi.nlm.nih.gov/" title="To NCBI homepage">NCBI</a>
</abbr>
<span class="name">
<a href="https://www.ncbi.nlm.nih.gov/" accesskey="1" title="To NCBI homepage">National Center for Biotechnology Information</a>
</span> -->
<!-- <abbr class="abbr">
<a href="https://www.nlm.nih.gov/research/index.html" title="To DIR homepage">DIR</a>
</abbr> -->
<span class="name">
<a href="https://www.nlm.nih.gov/research/index.html" accesskey="1" title="To DIR homepage">Division of Intramural Research</a>
</span>
<div class="right">
<a id="in" href="/research/bionlp/accounts/login/?next=/research/bionlp/">Log in</a>
</div>
</div>
</header>
<!--app-specific header, something that might want to take full width of screen -->
<a class="skipnav" href="#maincontent">
Skip to main page content
</a>
<div class="breadcrumbs-container menu">
<div class="usa-grid-full">
<ul class="topnav" accesskey="4">
<li class="current">
<a href="/research/bionlp/" title="Home">
Home
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Zhiyong-Lu" title="Zhiyong Lu">
Zhiyong Lu
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/News" title="Media">
Media
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Team" title="Team">
Team
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Research" title="Research">
Research
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Publications/" title="Publications">
Publications
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Tools/" title="Tools">
Tools
</a>
</li>
<li>
<a href="/research/bionlp/APIs/" title="Tools">
Web APIs
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Data/" title="Data">
AI Datasets
</a>
</li>
<li>
<a href="/research/bionlp/Visiting-us" title="Visiting us">
Visiting us
</a>
</li>
<li class="icon">
<a href="#">&#9776;</a>
</li>
</ul>
</div>
</div>
<!-- asign css class in case app will need to alter styles of this div -->
<div id="maincontent" class="usa-grid-full ncbi-base-page-container">
<div class="labs-pagecontent">
<div class="usa-width-one-whole">
<main class="usa-grid journals-lists">
<h3>GNormPlus: An Integrative Approach for Tagging Gene, Gene Family and Protein Domain</h3>
<main class="usa-width-one-whole journal-container">
<div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Authors: <a href="https://sites.google.com/site/chihhsuanwei/" target="_blank">Chih-Hsuan Wei</a>, <a
href="http://myweb.ncku.edu.tw/~hykao/" target="_blank">Hung-Yu Kao</a> and <a
href="/bionlp/" target="_blank">Zhiyong Lu</a> (PI)</h4>
<h4>Research highlights</h4>
<div class="usa-width-one-whole">
<p>
GNormPlus: an end-to-end system that handles both gene/protein name and identifier detection in
biomedical literature, including gene/protein mentions, family names and domain names. Moreover,
GNormPlus also integrates several advanced text-mining techniques (i.e., <a
href="http://ikmbio.csie.ncku.edu.tw/GN/" target="_blank">GenNorm</a>, <a
href="/CBBresearch/Lu/downloads/SR4GN/" target="_blank">SR4GN</a>,
<a href="/CBBresearch/Lu/Demo/SimConcept/"
target="_blank">SimConcept</a>, <a href="ftp://ftp.ncbi.nlm.nih.gov/pub/wilbur/" target="_blank">Ab3P</a>
and <a href="http://crfpp.googlecode.com/svn/trunk/doc/index.html?source=navbar" target="_blank">CRF++</a>)
for resolving composite gene names. On two public benchmarking datasets, we show that GNormPlus
compares favorably to the other state-of-the-art methods.
</p>
</div>
</div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Method overview</h4>
<div class="usa-width-one-whole">
<p>
Our proposed approach includes two main steps: mention recognition and concept normalization,
respectively. In the mention recognition step, we developed a new module based on CRF++, together
with our previous species recognition system (i.e., SR4GN) to recognize gene and species names and
match them accordingly. In concept normalization step, we applied our previous system, GenNorm,
combined with a composite mention simplification tool (i.e., SimConcept) and an abbreviation
resolution tool (i.e., Ab3P) for optimized performance.
</p>
</div>
</div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Results</h4>
<div class="usa-width-one-whole">
<p>
The first evaluation is a species-specific experiment where only human genes are considered.
GNormPlus was evaluated on the BioCreative II GN test set. We compared GNormPlus with several
previously reported systems, including our previous system, GenNorm. In the second experiment, we
evaluate GNormPlus in multi-species gene normalization using the BioCreative III GN task data set.
GNormPlus presents a competitive performance in both evaluations.
</p>
<table class="customtable">
<tbody>
<tr>
<td align="center"><strong>Open source tools</strong></td>
<td align="center"><strong>Precision</strong></td>
<td align="center"><strong>Recall</strong></td>
<td align="center"><strong>F-measure</strong></td>
</tr>
<tr>
<td align="center" class="best">GNormPlus</td>
<td align="center" class="best">87.1%</td>
<td align="center" class="best">86.4%</td>
<td align="center" class="best">86.7%</td>
</tr>
<tr>
<td align="center">GenNorm</td>
<td align="center">78.9%</td>
<td align="center">81.4%</td>
<td align="center">80.1%</td>
</tr>
<tr>
<td align="center">GNAT</td>
<td align="center">90.7%</td>
<td align="center">82.4%</td>
<td align="center">86.4%</td>
</tr>
</tbody>
</table>
<span><b>Table 1.</b> The evaluation of human species gene normalization on the BioCreative II GN test set.</span>
<table class="customtable">
<tbody>
<tr>
<td align="center"><strong>Open source tools</strong></td>
<td align="center"><strong>TAP-5</strong></td>
<td align="center"><strong>TAP-10</strong></td>
<td align="center"><strong>TAP-20</strong></td>
<td align="center"><strong>F-measure</strong></td>
</tr>
<tr>
<td align="center" class="best">GNormPlus</td>
<td align="center" class="best">33.3%</td>
<td align="center" class="best">36.7%</td>
<td align="center" class="best">36.7%</td>
<td align="center" class="best">50.1%</td>
</tr>
<tr>
<td align="center">GenNorm</td>
<td align="center">32.8%</td>
<td align="center">35.5%</td>
<td align="center">35.5%</td>
<td align="center">46.9%</td>
</tr>
<tr>
<td align="center">GeneTuKit</td>
<td align="center">29.7%</td>
<td align="center">31.4%</td>
<td align="center">32.5%</td>
<td align="center">-</td>
</tr>
</tbody>
</table>
<span><b>Table 2.</b> The evaluation of multiple species gene normalization on the BioCreative III GN test set.</span>
</div>
</div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Downloads</h4>
<div class="usa-width-one-whole">
<p>
GNormPlus Software in <a
href="/CBBresearch/Lu/Demo/tmTools/download/GNormPlus/GNormPlusJava.zip"
target="_blank">Java</a> or <a
href="/CBBresearch/Lu/Demo/tmTools/download/GNormPlus/GNormPlus.zip"
target="_blank">Perl</a><br/>
<a href="/CBBresearch/Lu/Demo/tmTools/download/GNormPlus/GNormPlusCorpus.zip"
target="_blank">GNormPlus Corpus</a><br/>
GNormPlus-tagged PubMed results in <a
href="/CBBresearch/Lu/Demo/PubTator/"
target="_blank">PubTator</a><br/>
<a href="/research/bionlp/APIs/">GNormPlus
RESTful API</a>
</p>
</div>
</div>
<div class="issue labs-docsums labs-content-box wrappall">
<h4>Please cite</h4>
<div class="usa-width-one-whole">
<ul class="dot-list">
<li>Wei C-H, Kao H-Y, Lu Z. <b>GNormPlus: An Integrative Approach for Tagging Gene, Gene Family and
Protein Domain</b>. BioMed Research International Journal, Text Mining for Translational
Bioinformatics special issue, BioMed Research International Journal, Article ID 918710; DOI:
dx.doi.org/10.1155/2015/918710 (2015)
</li>
</ul>
</div>
</div>
</div>
</main>
</main>
</div>
</div>
</div>
<footer class="usa-footer usa-footer-big ncbi-footer" role="contentinfo">
<div class="usa-grid">
<div class="usa-row">
<div class="usa-width-one-half">
<div>
<div class="org-section">
<a href="https://www.hhs.gov/"><img class="usa-footer-logo-img hhs-logo"
src="/research/bionlp/static/base/images/dhhs-logo-white.svg"
alt="U.S. Department of Health & Human Services">
<span class="usa-sr-only">Department of Health and Human Services</span></a>
<a href="https://www.nih.gov/"><img class="usa-footer-logo-img nih-logo"
src="/research/bionlp/static/base/images/nih-logo-white.svg"
alt="National Institutes of Health">
<span class="usa-sr-only">National Institutes of Health</span></a>
<a href="https://www.nlm.nih.gov/"><img class="usa-footer-logo-img nlm-logo"
src="/research/bionlp/static/base/images/nlm-logo-letters-white.svg"
alt="National Library of Medicine">
<span class="usa-sr-only">National Library of Medicine</span></a>
<a href="https://www.usa.gov/"><img class="usa-footer-logo-img usagov-logo"
src="/research/bionlp/static/base/images/usagov-logo-white.svg"
alt="USA.gov"/>
<span class="usa-sr-only">USA.gov</span></a>
</div>
</div>
</div>
<div class="usa-width-one-half">
<div>
<p class="about-links">
<a href="https://www.nlm.nih.gov/research/index.html">About DIR</a>
<a href="https://www.nlm.nih.gov/web_policies.html">Web Policies</a></p>
</div>
</div>
</div>
</div>
</footer>
</div>
<!-- JavaScript -->
<script src="/research/bionlp/static/django_uswds/uswds/js/uswds.js"></script>
<script type="text/javascript" src="/research/bionlp/static/base/header.js"></script>
</body>
</html>