nih-gov/www.ncbi.nlm.nih.gov/research/bionlp/Data/index.html

358 lines
No EOL
15 KiB
HTML

<!DOCTYPE html>
<html lang="en" >
<head >
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<!-- Mobile properties -->
<meta name="HandheldFriendly" content="True">
<meta name="MobileOptimized" content="320">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<!-- Stylesheets -->
<link href="/research/bionlp/static/django_uswds/uswds/css/uswds.css" rel="stylesheet" />
<title>
BioNLP Corpus - NIH
</title>
<link rel="stylesheet" href="/research/bionlp/static/main/css/uswds.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/header.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/footer.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/form.css">
<!-- Labs template -->
<link rel="stylesheet" href="/research/bionlp/static/main/css/atoms.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/docsum.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/media.css">
<!-- Additional template -->
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.molecules.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/custom.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/journals.journal-page.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/featherlight.min.css">
<link rel="stylesheet" href="/research/bionlp/static/main/css/styles.css">
<!--[if lt IE 9]>
<link rel="stylesheet" href="/research/bionlp/static/main/css/iconic-glyphs-legacy.css">
<![endif]-->
<!-- Some JS -->
<script src="/research/bionlp/static/main/js/jquery.js"></script>
<script src="/research/bionlp/static/main/js/modernizr.js"></script>
<script src="/research/bionlp/static/main/js/featherlight.min.js"></script>
<script src="/research/bionlp/static/main/js/custom.js"></script>
</head>
<body >
<div>
<a class="skipnav" href="#maincontent">
Skip to main page content
</a>
<header class="ncbi-page-header" role="banner">
<div class="prefix">
<span class="nih" title="National Institutes of Health">
<a href="https://www.nih.gov/" title="To NIH homepage">
<img src="/research/bionlp/static/base/images/nih-logo-header.svg" alt="NIH">
</a>
</span>
<span class="nlm">
<a href="https://www.nlm.nih.gov/" title="To NLM homepage">U.S. National Library of Medicine</a>
</span>
</div>
<div class="ncbi">
<!-- <abbr class="abbr">
<a href="https://www.ncbi.nlm.nih.gov/" title="To NCBI homepage">NCBI</a>
</abbr>
<span class="name">
<a href="https://www.ncbi.nlm.nih.gov/" accesskey="1" title="To NCBI homepage">National Center for Biotechnology Information</a>
</span> -->
<!-- <abbr class="abbr">
<a href="https://www.nlm.nih.gov/research/index.html" title="To DIR homepage">DIR</a>
</abbr> -->
<span class="name">
<a href="https://www.nlm.nih.gov/research/index.html" accesskey="1" title="To DIR homepage">Division of Intramural Research</a>
</span>
<div class="right">
<a id="in" href="/research/bionlp/accounts/login/?next=/research/bionlp/">Log in</a>
</div>
</div>
</header>
<!--app-specific header, something that might want to take full width of screen -->
<a class="skipnav" href="#maincontent">
Skip to main page content
</a>
<div class="breadcrumbs-container menu">
<div class="usa-grid-full">
<ul class="topnav" accesskey="4">
<li class="current">
<a href="/research/bionlp/" title="Home">
Home
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Zhiyong-Lu" title="Zhiyong Lu">
Zhiyong Lu
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/News" title="Media">
Media
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Team" title="Team">
Team
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Research" title="Research">
Research
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Publications/" title="Publications">
Publications
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Tools/" title="Tools">
Tools
</a>
</li>
<li>
<a href="/research/bionlp/APIs/" title="Tools">
Web APIs
</a>
</li>
<li class="separator"></li>
<li>
<a href="/research/bionlp/Data/" title="Data">
AI Datasets
</a>
</li>
<li>
<a href="/research/bionlp/Visiting-us" title="Visiting us">
Visiting us
</a>
</li>
<li class="icon">
<a href="#">&#9776;</a>
</li>
</ul>
</div>
</div>
<!-- asign css class in case app will need to alter styles of this div -->
<div id="maincontent" class="usa-grid-full ncbi-base-page-container">
<div class="labs-pagecontent">
<div class="usa-width-one-whole">
<main class="usa-grid journals-lists">
<main class="usa-width-one-whole journal-container">
<div>
<h3>AI Datasets</h3>
<div class="issue labs-docsums labs-content-box ">
<div class='tool'>
<div class='usa-width-one-fourth'>
<div class='toolname'>
<a href='https://ftp.ncbi.nlm.nih.gov/pub/lu/ComputedAuthors/'>PubMed Computed Authors (2024)</a>
</div>
</div>
<div class='usa-width-three-fourths tooldesc' id='tool_7'>
<b>Description</b>
<br>The PubMed Computed Authors dataset consists of disambiguated author names from PubMed, freely available via API queries and FTP downloads. Using advanced AI algorithms, the PubMed Computed Authors disambiguated more than 21 million individual authors across nearly 36 million PubMed articles with high accuracy. With regular weekly update, the PubMed Computed Authors continuously provide the most recent disambiguated authors for all PubMed articles.
</div>
</div>
</div>
<div class="issue labs-docsums labs-content-box ">
<div class='tool'>
<div class='usa-width-one-fourth'>
<div class='toolname'>
<a href='https://ftp.ncbi.nlm.nih.gov/pub/lu/NLMChem/'>NLM-Chem corpus</a>
</div>
</div>
<div class='usa-width-three-fourths tooldesc' id='tool_6'>
<b>Description</b>
<br>The NLM-Chem corpus is a manually annotated full-text resource on chemicals in the biomedical literature. The corpus contains 150 full-text journal articles selected both to be rich in chemical mentions and for articles where human annotation was expected to be most valuable. The corpus was doubly annotated by ten expert NLM indexers, with high inter-annotator agreement, and contains ~5000 unique chemical name annotations mapped to ~2000 MeSH identifiers.
</div>
</div>
</div>
<div class="issue labs-docsums labs-content-box ">
<div class='tool'>
<div class='usa-width-one-fourth'>
<div class='toolname'>
<a href='https://ftp.ncbi.nlm.nih.gov/pub/lu/PubMedPhrase/PubMed_Phrases.tar.gz'>PubMed Phrases</a>
</div>
</div>
<div class='usa-width-three-fourths tooldesc' id='tool_5'>
<b>Description</b>
<br>The dataset contains a collection of 705,915 PubMed Phrases (Kim et al., 2018) that are beneficial for information retrieval and human comprehension.
</div>
</div>
</div>
<div class="issue labs-docsums labs-content-box ">
<div class='tool'>
<div class='usa-width-one-fourth'>
<div class='toolname'>
<a href='ftp://ftp.ncbi.nlm.nih.gov/pub/lu/Suppl/Peng2016CID/CID.PubTator.txt.zip'>CTD-Pfizer dataset</a>
</div>
</div>
<div class='usa-width-three-fourths tooldesc' id='tool_4'>
<b>Description</b>
<br>The weakly-labeled corpus used in (<a href="/pubmed/28316651" target="_blank">Peng et al., 2016</a>) consists of 18,410 abstracts and 33,224 CID relations. The raw data was extracted from curated data in the <a href="/pubmed/24288140" target="_blank">CTD-Pfizer collaboration</a> with document-level annotations of drug-disease and drug-phenotype interactions. We applied <a href="/bionlp/Tools/tmchem" target="_blank">tmChem</a> and <a href="/bionlp/Tools/dnorm" target="_blank">DNorm</a> to recognize and normalize chemical and disease mentions, respectively. To maximize recall, we also applied a dictionary look-up method with a controlled vocabulary (MeSH). Finally, we filtered those without CID relations in the title/abstracts as some asserted relations are only in the full text.
</div>
</div>
</div>
<div class="issue labs-docsums labs-content-box ">
<div class='tool'>
<div class='usa-width-one-fourth'>
<div class='toolname'>
<a href='https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/download/tmVar/tmVarCorpus.zip'>tmVar corpus</a>
</div>
</div>
<div class='usa-width-three-fourths tooldesc' id='tool_3'>
<b>Description</b>
<br>tmVar Corpus contains 500 PubMed articles manually annotated with mutation mentions of various kinds.
</div>
</div>
</div>
<div class="issue labs-docsums labs-content-box ">
<div class='tool'>
<div class='usa-width-one-fourth'>
<div class='toolname'>
<a href='https://ftp.ncbi.nlm.nih.gov/pub/lu/BC5CDR/'>BioCreative V CDR corpus</a>
</div>
</div>
<div class='usa-width-three-fourths tooldesc' id='tool_2'>
<b>Description</b>
<br>BC5CDR corpus consists of 1500 PubMed articles with 4409 annotated chemicals, 5818 diseases and 3116 chemical-disease interactions.
</div>
</div>
</div>
<div class="issue labs-docsums labs-content-box ">
<div class='tool'>
<div class='usa-width-one-fourth'>
<div class='toolname'>
<a href='/bionlp/Data/disease'>NCBI disease corpus</a>
</div>
</div>
<div class='usa-width-three-fourths tooldesc' id='tool_1'>
<b>Description</b>
<br>NCBI disease corpus is a collection of 793 PubMed abstracts fully annotated at both mention and concept levels.
</div>
</div>
</div>
</div>
</main>
</main>
</div>
</div>
</div>
<footer class="usa-footer usa-footer-big ncbi-footer" role="contentinfo">
<div class="usa-grid">
<div class="usa-row">
<div class="usa-width-one-half">
<div>
<div class="org-section">
<a href="https://www.hhs.gov/"><img class="usa-footer-logo-img hhs-logo"
src="/research/bionlp/static/base/images/dhhs-logo-white.svg"
alt="U.S. Department of Health & Human Services">
<span class="usa-sr-only">Department of Health and Human Services</span></a>
<a href="https://www.nih.gov/"><img class="usa-footer-logo-img nih-logo"
src="/research/bionlp/static/base/images/nih-logo-white.svg"
alt="National Institutes of Health">
<span class="usa-sr-only">National Institutes of Health</span></a>
<a href="https://www.nlm.nih.gov/"><img class="usa-footer-logo-img nlm-logo"
src="/research/bionlp/static/base/images/nlm-logo-letters-white.svg"
alt="National Library of Medicine">
<span class="usa-sr-only">National Library of Medicine</span></a>
<a href="https://www.usa.gov/"><img class="usa-footer-logo-img usagov-logo"
src="/research/bionlp/static/base/images/usagov-logo-white.svg"
alt="USA.gov"/>
<span class="usa-sr-only">USA.gov</span></a>
</div>
</div>
</div>
<div class="usa-width-one-half">
<div>
<p class="about-links">
<a href="https://www.nlm.nih.gov/research/index.html">About DIR</a>
<a href="https://www.nlm.nih.gov/web_policies.html">Web Policies</a></p>
</div>
</div>
</div>
</div>
</footer>
</div>
<!-- JavaScript -->
<script src="/research/bionlp/static/django_uswds/uswds/js/uswds.js"></script>
<script type="text/javascript" src="/research/bionlp/static/base/header.js"></script>
</body>
</html>