387 lines
No EOL
17 KiB
Text
387 lines
No EOL
17 KiB
Text
<!DOCTYPE html>
|
|
|
|
|
|
|
|
|
|
<html lang="en" >
|
|
<head >
|
|
<meta charset="UTF-8">
|
|
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
|
|
|
<!-- Mobile properties -->
|
|
<meta name="HandheldFriendly" content="True">
|
|
<meta name="MobileOptimized" content="320">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
|
|
|
|
<!-- Stylesheets -->
|
|
|
|
<link href="/research/bionlp/static/django_uswds/uswds/css/uswds.css" rel="stylesheet" />
|
|
|
|
|
|
|
|
<title>
|
|
SimConcept: A Hybrid Approach for Simplifying Composite Named Entities in Biomedical Text
|
|
</title>
|
|
|
|
|
|
|
|
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/uswds.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/header.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/footer.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/form.css">
|
|
|
|
<!-- Labs template -->
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/atoms.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/docsum.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/media.css">
|
|
|
|
<!-- Additional template -->
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.molecules.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/custom.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.journal-page.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/featherlight.min.css">
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/styles.css">
|
|
<!--[if lt IE 9]>
|
|
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs-legacy.css">
|
|
<![endif]-->
|
|
|
|
<!-- Some JS -->
|
|
<script src="/research/bionlp/static/main/js/jquery.js"></script>
|
|
<script src="/research/bionlp/static/main/js/modernizr.js"></script>
|
|
<script src="/research/bionlp/static/main/js/featherlight.min.js"></script>
|
|
<script src="/research/bionlp/static/main/js/custom.js"></script>
|
|
|
|
|
|
|
|
|
|
</head>
|
|
<body >
|
|
|
|
|
|
<div>
|
|
<a class="skipnav" href="#maincontent">
|
|
Skip to main page content
|
|
</a>
|
|
<header class="ncbi-page-header" role="banner">
|
|
<div class="prefix">
|
|
<span class="nih" title="National Institutes of Health">
|
|
<a href="https://www.nih.gov/" title="To NIH homepage">
|
|
<img src="/research/bionlp/static/base/images/nih-logo-header.svg" alt="NIH">
|
|
</a>
|
|
</span>
|
|
<span class="nlm">
|
|
<a href="https://www.nlm.nih.gov/" title="To NLM homepage">U.S. National Library of Medicine</a>
|
|
</span>
|
|
</div>
|
|
|
|
<div class="ncbi">
|
|
<!-- <abbr class="abbr">
|
|
<a href="https://www.ncbi.nlm.nih.gov/" title="To NCBI homepage">NCBI</a>
|
|
</abbr>
|
|
<span class="name">
|
|
<a href="https://www.ncbi.nlm.nih.gov/" accesskey="1" title="To NCBI homepage">National Center for Biotechnology Information</a>
|
|
</span> -->
|
|
<!-- <abbr class="abbr">
|
|
<a href="https://www.nlm.nih.gov/research/index.html" title="To DIR homepage">DIR</a>
|
|
</abbr> -->
|
|
<span class="name">
|
|
<a href="https://www.nlm.nih.gov/research/index.html" accesskey="1" title="To DIR homepage">Division of Intramural Research</a>
|
|
</span>
|
|
<div class="right">
|
|
|
|
<a id="in" href="/research/bionlp/accounts/login/?next=/research/bionlp/">Log in</a>
|
|
|
|
</div>
|
|
</div>
|
|
</header>
|
|
|
|
<!--app-specific header, something that might want to take full width of screen -->
|
|
|
|
<a class="skipnav" href="#maincontent">
|
|
Skip to main page content
|
|
</a>
|
|
|
|
<div class="breadcrumbs-container menu">
|
|
<div class="usa-grid-full">
|
|
<ul class="topnav" accesskey="4">
|
|
<li class="current">
|
|
<a href="/research/bionlp/" title="Home">
|
|
Home
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Zhiyong-Lu" title="Zhiyong Lu">
|
|
Zhiyong Lu
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/News" title="Media">
|
|
Media
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Team" title="Team">
|
|
Team
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Research" title="Research">
|
|
Research
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Publications/" title="Publications">
|
|
Publications
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Tools/" title="Tools">
|
|
Tools
|
|
</a>
|
|
</li>
|
|
<li>
|
|
<a href="/research/bionlp/APIs/" title="Tools">
|
|
Web APIs
|
|
</a>
|
|
</li>
|
|
<li class="separator"></li>
|
|
<li>
|
|
<a href="/research/bionlp/Data/" title="Data">
|
|
AI Datasets
|
|
</a>
|
|
</li>
|
|
<li>
|
|
<a href="/research/bionlp/Visiting-us" title="Visiting us">
|
|
Visiting us
|
|
</a>
|
|
</li>
|
|
|
|
<li class="icon">
|
|
<a href="#">☰</a>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
|
|
<!-- asign css class in case app will need to alter styles of this div -->
|
|
<div id="maincontent" class="usa-grid-full ncbi-base-page-container">
|
|
<div class="labs-pagecontent">
|
|
<div class="usa-width-one-whole">
|
|
<main class="usa-grid journals-lists">
|
|
|
|
|
|
|
|
<h3>SimConcept: A Hybrid Approach for Simplifying Composite Named Entities in Biomedical Text</h3>
|
|
<main class="usa-width-one-whole journal-container">
|
|
<div>
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Authors: <a href="https://sites.google.com/site/chihhsuanwei/" target="_blank">Chih-Hsuan Wei</a>, <a
|
|
href="mailto:robert.leaman@nih.gov" target="_blank">Robert Leaman</a> and <a
|
|
href="/bionlp/" target="_blank">Zhiyong Lu</a> (PI)</h4>
|
|
<h4>Research highlights</h4>
|
|
<div class="usa-width-one-whole">
|
|
<p>
|
|
Here we propose a hybrid approach by integrating a machine learning model, named SimConcept, with a
|
|
pattern identification strategy to identify individual mentions from a composite named entity. More
|
|
specifically, we first trained and built a Conditional Random Fields model to detect the composite
|
|
mentions and subsequently identify the antecedent (e.g., colorectal) and conjuncts regions (e.g.,
|
|
adenomas and carcinoma) of a composite mention. Next, we manually developed four patterns to model
|
|
the six different types of composite mentions in our study. Finally, by applying our patterns to
|
|
those previously identified regions in the composite mention, individual mentions are generated in
|
|
our final output (e.g. colorectal adenomas and colorectal carcinoma).
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Method overview</h4>
|
|
<div class="usa-width-one-whole">
|
|
<p>
|
|
SimConcept consists of two modules as shown in Figure 1. The first module consists of a conditional
|
|
random field model. In this module, the input mention is separated into tokens and each token
|
|
assigned labels according to the most likely sequence of states through the model. The second module
|
|
reassembles the tokens into individual mentions using a pattern identification method.
|
|
</p>
|
|
<div class="figure">
|
|
<img src="/research/bionlp/static/main/images/tools/SimConcept.png" width="350"/>
|
|
<span><b>Figure 1.</b> An overview of the SimConcept workflow.</span>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Results</h4>
|
|
<div class="usa-width-one-whole">
|
|
<p>
|
|
To evaluate our method, we used leave-one-out cross validation on the three sets (i.e., gene,
|
|
disease and chemical). Table 1 shows the results of our evaluation, where we see that the overall
|
|
performance is high for all three entity types. As mentioned in introduction, this study is aimed at
|
|
helping bioconcept normalization. We therefore applied SimConcept in GenNorm [21] and DNorm [18],
|
|
and evaluated on the test sets of BioCreative II gene normalization task [12] and NCBI disease
|
|
corpus [50], respectively (no normalized chemical corpus is available). To avoid training on the
|
|
test set, the training set for SimConcept excluded the test corpora for GenNorm and DNorm. As shown
|
|
in Table 4 and Table 5, using SimConcept can further improve the state-of-the-art performance for
|
|
1.17% in F-measure (P-value=0.02) for gene normalization and 1.34% in F-measure (P-value=0.03) for
|
|
disease normalization.
|
|
</p>
|
|
<table class="customtable">
|
|
<tbody>
|
|
<tr>
|
|
<td align="center"><strong>Bioconcepts</strong></td>
|
|
<td align="center"><strong>Precision</strong></td>
|
|
<td align="center"><strong>Recall</strong></td>
|
|
<td align="center"><strong>F-measure</strong></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="center">Gene</td>
|
|
<td align="center">89.51%</td>
|
|
<td align="center">91.35%</td>
|
|
<td align="center">90.42%</td>
|
|
</tr>
|
|
<tr>
|
|
<td align="center">Disease</td>
|
|
<td align="center">87.92%</td>
|
|
<td align="center">85.07%</td>
|
|
<td align="center">86.47%</td>
|
|
</tr>
|
|
<tr>
|
|
<td align="center">Chemical</td>
|
|
<td align="center">87.44%</td>
|
|
<td align="center">84.71%</td>
|
|
<td align="center">86.05%</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<span><b>Table 1.</b> The evaluation of SimConcept corpus.</span>
|
|
|
|
<table class="customtable">
|
|
<tbody>
|
|
<tr>
|
|
<td align="center"><strong>Tools</strong></td>
|
|
<td align="center"><strong>Precision</strong></td>
|
|
<td align="center"><strong>Recall</strong></td>
|
|
<td align="center"><strong>F-measure</strong></td>
|
|
</tr>
|
|
<tr>
|
|
<td align="center" class="best">GenNorm + SimConcept</td>
|
|
<td align="center" class="best">87.01%</td>
|
|
<td align="center" class="best">86.13%</td>
|
|
<td align="center" class="best">86.57%</td>
|
|
</tr>
|
|
<tr>
|
|
<td align="center">GenNorm</td>
|
|
<td align="center">86.72%</td>
|
|
<td align="center">84.09%</td>
|
|
<td align="center">85.38%</td>
|
|
</tr>
|
|
<tr>
|
|
<td align="center" class="best">DNorm + SimConcept</td>
|
|
<td align="center" class="best">80.91%</td>
|
|
<td align="center" class="best">79.23%</td>
|
|
<td align="center" class="best">80.06%</td>
|
|
</tr>
|
|
<tr>
|
|
<td align="center">DNorm</td>
|
|
<td align="center">80.69%</td>
|
|
<td align="center">76.85%</td>
|
|
<td align="center">78.72%</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
<span><b>Table 2.</b> The SimConcept contribution on gene, disease normalization performance.</span>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Downloads</h4>
|
|
<div class="usa-width-one-whole">
|
|
<p>
|
|
<a href="/CBBresearch/Lu/Demo/tmTools/download/SimConcept/SimConcept.zip"
|
|
target="_blank">SimConcept Source Code</a><br/>
|
|
</p>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="issue labs-docsums labs-content-box wrappall">
|
|
<h4>Please cite</h4>
|
|
<div class="usa-width-one-whole">
|
|
<ul class="dot-list">
|
|
<li>Wei C-H, Leaman R, Lu Z. <a
|
|
href="http://delivery.acm.org/10.1145/2650000/2649420/p138-wei.pdf?ip=130.14.254.24&id=2649420&acc=OA&key=4D4702B0C3E38B35.4D4702B0C3E38B35.4D4702B0C3E38B35.9DBB59DEA6D22751&CFID=648750863&CFTOKEN=12918478&__acm__=1427473295_f84a1f644b896d02cdbff2edbb320044"
|
|
target="_blank">SimConcept: A Hybrid Approach for Simplifying Composite Named Entities in
|
|
Biomedicine</a>. Proceedings of the ACM Conference on Bioinformatics Computational Biology and
|
|
Health Informatics, Newport Beach, CA, 2014, p138-146
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
|
|
</div>
|
|
</main>
|
|
|
|
</main>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<footer class="usa-footer usa-footer-big ncbi-footer" role="contentinfo">
|
|
<div class="usa-grid">
|
|
<div class="usa-row">
|
|
<div class="usa-width-one-half">
|
|
<div>
|
|
<div class="org-section">
|
|
<a href="https://www.hhs.gov/"><img class="usa-footer-logo-img hhs-logo"
|
|
src="/research/bionlp/static/base/images/dhhs-logo-white.svg"
|
|
alt="U.S. Department of Health & Human Services">
|
|
<span class="usa-sr-only">Department of Health and Human Services</span></a>
|
|
<a href="https://www.nih.gov/"><img class="usa-footer-logo-img nih-logo"
|
|
src="/research/bionlp/static/base/images/nih-logo-white.svg"
|
|
alt="National Institutes of Health">
|
|
<span class="usa-sr-only">National Institutes of Health</span></a>
|
|
<a href="https://www.nlm.nih.gov/"><img class="usa-footer-logo-img nlm-logo"
|
|
src="/research/bionlp/static/base/images/nlm-logo-letters-white.svg"
|
|
alt="National Library of Medicine">
|
|
<span class="usa-sr-only">National Library of Medicine</span></a>
|
|
<a href="https://www.usa.gov/"><img class="usa-footer-logo-img usagov-logo"
|
|
src="/research/bionlp/static/base/images/usagov-logo-white.svg"
|
|
alt="USA.gov"/>
|
|
<span class="usa-sr-only">USA.gov</span></a>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
|
|
<div class="usa-width-one-half">
|
|
<div>
|
|
<p class="about-links">
|
|
<a href="https://www.nlm.nih.gov/research/index.html">About DIR</a>
|
|
<a href="https://www.nlm.nih.gov/web_policies.html">Web Policies</a></p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</footer>
|
|
</div>
|
|
|
|
|
|
|
|
<!-- JavaScript -->
|
|
|
|
<script src="/research/bionlp/static/django_uswds/uswds/js/uswds.js"></script>
|
|
|
|
|
|
|
|
|
|
<script type="text/javascript" src="/research/bionlp/static/base/header.js"></script>
|
|
|
|
</body>
|
|
</html> |