nih-gov/www.nlm.nih.gov/pubs/techbull/ma24/ma24_mtix.html
2025-02-26 13:17:41 -05:00

192 lines
12 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="X-UA-Compatible" content="IE=8;" />
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@NLM_NIH">
<meta name="twitter:title" content="MTIX: the Next-Generation Algorithm for Automated Indexing of MEDLINE. NLM Technical Bulletin. 2024 Mar&#8211;Apr">
<meta name="twitter:description" content=" The NLM Technical Bulletin is your source to stay informed about NLM products and services.">
<meta name="twitter:image" content="https://www.nlm.nih.gov/pubs/techbull/images/nlm_tech_bulletin_graphic_twitter.jpg">
<meta property="og:url" content="https://www.nlm.nih.gov/pubs/techbull/tb.html" />
<meta property="og:type" content="article" />
<meta property="og:title" content="MTIX: the Next-Generation Algorithm for Automated Indexing of MEDLINE. NLM Technical Bulletin. 2024 Mar&#8211;Apr" />
<meta property="og:description" content="The NLM Technical Bulletin is your source to stay informed about NLM products and services." />
<meta property="og:image" content="https://www.nlm.nih.gov/pubs/techbull/images/nlm_tech_bulletin_graphic_facebook.jpg" />
<link type="text/css" href="/pubs/techbull/styles/reset.css" rel="stylesheet" />
<link type="text/css" href="/pubs/techbull/styles/technicalBulletin.css" rel="stylesheet" />
<!--Call jQuery-->
<script src="//assets.nlm.nih.gov/jquery/jquery-latest.min.js"></script>
<script src="//assets.nlm.nih.gov/jquery/jquery-migrate-latest.min.js"></script>
<script src="/pubs/techbull/scripts/techbull.js" type="text/javascript" language="javascript"></script>
<!--[if lte IE 8]>
<script type="text/javascript" src="/scripts/PIE.js"></script>
<![endif]-->
<link type="text/css" href="/pubs/techbull/styles/print.css" rel="stylesheet" media="print"/>
<title>MTIX: the Next-Generation Algorithm for Automated Indexing of MEDLINE. NLM Technical Bulletin. 2024 Mar&#8211;Apr</title>
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" title="The Dublin Core metadata Element Set" />
<meta name="DC.Title" content="MTIX: the Next-Generation Algorithm for Automated Indexing of MEDLINE" />
<meta name="DC.Publisher" content="U.S. National Library of Medicine" />
<meta name="DC.Date.Issued" content="2024-04-29" />
<meta name="DC.Date.Modified" content="2024-04-29" />
<meta name="NLM.Contact.Email" content="nlmtechbull@mail.nlm.nih.gov" />
<meta name="DC.Type" content="Newsletters" />
<meta name="NLM.Permanence.Level" content="Permanent: Stable Content" />
<meta name="DC.Rights" content="Public Domain" />
<meta name="DC.Language" content="eng" />
<meta name="DC.Subject.Keyword" content="MEDLINE" />
<meta name="DC.Subject.Keyword" content="Indexing" />
<meta name="DC.Subject.Keyword" content="Medical Text Indexer" />
<!-- Google Tag Manager -->
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-MT6MLL');</script>
<!-- End Google Tag Manager -->
</head>
<body>
<!-- Google Tag Manager -->
<noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-MT6MLL" height="0" width="0" style="display:none;visibility:hidden" title="googletagmanager"></iframe></noscript>
<!-- End Google Tag Manager -->
<div class="skipnavigation"><a title="Skip the navigation on this page" href="#skipnav" class="skipnavigation">Skip Navigation Bar</a></div>
<div>
<div class="header">
<img src="/pubs/techbull/images/tb_logo_113.jpg" alt="National Library of Medicine Technical Bulletin" title="National Library of Medicine Technical Bulletin" /><img src="/pubs/techbull/images/nlm_masthead_113.jpg" alt="National Library of Medicine Technical Bulletin" title="National Library of Medicine Technical Bulletin" usemap="#nlm_masthead_113" />
</div>
<div class="search_box">
<form method="get" action="//vsearch.nlm.nih.gov/vivisimo/cgi-bin/query-meta" target="_self" name="searchForm" class="searchForm">
<label class="displaynone" for="search">Search</label>
<input name="query" id="search" type="text" class="search-input inactive" size="50" onfocus="this.value=''" value="Search here for NLM Technical Bulletin articles" aria-label="Search NLM Technical Bulletin">
<input type="hidden" name="v:project" value="technical-bulletin">
</form>
</div>
</div>
<div id="nav">
<!--Open drop-->
<ul class="topnav">
<li class="currentissue"><a href="//www.nlm.nih.gov/pubs/techbull/current_issue.html">Current Issue</a> <img class="separator" src="/pubs/techbull/images/whitelinetransparentbackground.gif " alt=""/></li>
<li class="archive"><a href="//www.nlm.nih.gov/pubs/techbull/back_issues.html">Previous Issues</a> <img class="separator" src="/pubs/techbull/images/whitelinetransparentbackground.gif " alt=""/></li>
<li class="about"><a href="//www.nlm.nih.gov/pubs/techbull/about.html">About</a> <img class="separator" src="/pubs/techbull/images/whitelinetransparentbackground.gif " alt=""/></li>
<li class="staycurrent"><a href="//www.nlm.nih.gov/pubs/techbull/stay_current.html">Stay Current <img class="emaillogo" src="/pubs/techbull/images/email_20px.gif" alt="E-Mail Sign Up" style="margin-top: -4px;"/> <img class="rsslogo" src="/pubs/techbull/images/rss_20px.gif" alt="RSS Feed" style="margin-top: -4px;"/></a></li>
</ul>
<!--Close drop-->
</div>
<div class="body">
<a id="skipnav" name="skipnav"></a>
<div class="syndicate">
<p class="tableOfContents"><strong>Table of Contents: <a href="/pubs/techbull/ma24/ma24_issue_cover.html">2024 MARCH&#8211;APRIL No. 457</a></strong></p>
<p class="prevnext"><span class="buttons">
<span class="previous"><a href="ma24_leiter_nlm_mla_lecture.html">Previous</a></span> <span class="next"><a href="ma24_NLM_at_MLA.html">Next</a></span>
</span></p>
<hr class="hr1" />
<h1 class="articleH1">MTIX: the Next-Generation Algorithm for Automated Indexing of MEDLINE</h1>
<p class="tbyearmonth">MTIX: the Next-Generation Algorithm for Automated Indexing of MEDLINE. NLM Tech Bull. 2024 Mar-Apr;(457):e4.</p>
<div class="articleactions">2024 April 29 <span class="status">[posted]</span>
</div>
<div class="articleParagraph">
<p>The National Library of Medicine (NLM) is committed to advancing biomedical discovery across our databases of biomedical literature, genomic information, and other scientific data. As part of these efforts, NLM strives to produce timely <a href="https://www.nlm.nih.gov/mesh/meshhome.html">MeSH</a> indexing of <a href="https://www.nlm.nih.gov/medline/medline_overview.html">MEDLINE</a> biomedical and life sciences citations for the <a href="https://pubmed.ncbi.nlm.nih.gov/">PubMed</a> database. To this end, the Library is pleased to announce the next major milestone in <a href="https://www.nlm.nih.gov/pubs/techbull/nd21/nd21_medline_2022.html">automated MEDLINE indexing</a>: the implementation of the MTIX (Medical Text Indexer-NeXt Generation) algorithm, which replaces the MTIA (Medical Text Indexer-Automated) algorithm.</p>
<p><h1>MTIX Technology</h1></p>
<p>Although MTIA and MTIX have similar names, they use different technologies. MTIA was a complex system based on a dictionary of MeSH terms, synonyms, and other trigger phrases, with rules created and refined by humans over the course of many years. In contrast, MTIX is a machine learning model known as a neural network, a type of AI.</p>
<p>MTIX was trained on millions of MEDLINE citations published between 2007 and 2022. From those examples, MTIX learns how the citation title, abstract, publication year, indexing year, and journal name relate to the indexed terms on that article. Once trained, MTIX can apply the knowledge it developed during training to new citations, determining which MeSH terms are statistically likely to be appropriate indexing for that new article.</p>
<p><h1>MTIX Performance</h1></p>
<p>MTIX outperforms MTIA by "understanding" more complex representations of concepts. For example, it can recognize the concept of "Hip Fractures" from interrupted and reordered phrases like "hip and knee fractures", "fractures of the femur and hip", or "complex fractures and dislocations of the hip". Because MTIX makes determinations based on many features and not just trigger words, it can recognize abstract ideas that are not literally stated in the text as well as predict some MeSH concepts that are present in the full text of the article from the abstract. MTIX can also avoid contextual errors when encountering metaphorical language. For example, it will not index "Elephants" on an article that contains an idiom like "the elephant in the room." </p>
<p>This sophistication translates to superior performance. MTIA was tuned to favor precision (no incorrect terms indexed) over recall (all correct terms indexed) when measured against human indexing. MTIX maintains a similar level of precision but makes large gains in recall, correctly applying 50% more terms than MTIA, for significantly more comprehensive indexing. MTIX has especially high performance in publication types and check tags, two categories with high search impact in PubMed.</p>
<a name="fig1"></a>
<div class="figure"><br />
<img src="/pubs/techbull/ma24/graphics/mtix_fig1.png" alt="A bar graph, displaying the values:
Overall: MTIA, 58%. MTIX, 74%.
Checktags: MTIA, 62%. MTIX, 87%.
Publication Types: MTIA, 67%. MTIX, 88%." /><br />
<strong>Figure 1: F1 scores for MTIX versus MTIA<br />
F1 combines precision and recall scores as a harmonic mean. Data are from a random sample of ~40,000 MEDLINE citations published between 2017-2022. "Overall" combines all four other categories.</strong></div>
<p><h1>Quality Assurance</h1></p>
<p>Human curators will continue to play a significant role in quality assurance for MTIX. Roughly one-third of articles indexed via automation will also receive human curation. Our curation efforts focus on areas with the highest impact on our users; for example, curators review publication types such as systematic reviews or clinical trials and citations that involve genes or proteins, some of the most frequent search topics in PubMed.</p>
<p>If you have questions or suggestions regarding MEDLINE indexing, please contact <a href="https://support.nlm.nih.gov/support/create-case/">NLM Customer Support</a>. We use feedback on MTIX indexing to refine and improve performance.</p>
</div>
<p class="articleParagraph">
</p>
</div>
<div class="footer">
<p class="footerLeft"><span class="footerissn"><strong>ISSN 2161-2986 (Online)</strong> Content not copyrighted; freely reproducible.</span><br/>
<a href="/">National Library of Medicine</a> 8600 Rockville Pike, Bethesda, MD 20894
<br/>
<a href="//www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a>,
<a href="//www.nlm.nih.gov/web_policies.html">Web Policies</a>,
<a href="//www.nlm.nih.gov/careers/jobopenings.html">Careers</a>,
<a href="//www.nlm.nih.gov/accessibility.html">Accessibility</a>,
<a href="//www.usa.gov/" id="anch_34">USA.gov</a>,
<a href="//www.hhs.gov/vulnerability-disclosure-policy/index.html">HHS Vulnerability Disclosure</a>
<br/>
<a href="//www.nih.gov/">NIH</a>,
<a href="https://www.hhs.gov/">HHS</a>,
<a href="//www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office">FOIA</a>,
<a class="supportLink" href="//support.nlm.nih.gov?from=" target="_blank">NLM Support Center</a>
</p>
<p class="footerRight">
<strong>Last updated:</strong> 29 April 2024</p>
</div>
</div>
<map id="nlm_masthead_113" name="nlm_masthead_113">
<area shape="rect" alt="NLM Technical Bulletin" coords="1,15,396,45" href="//www.nlm.nih.gov/pubs/techbull/tb.html" title="NLM Technical Bulletin" />
<area shape="rect" alt="National Library of Medicine" coords="0,47,203,62" href="//www.nlm.nih.gov/" title="National Library of Medicine" />
<area shape="rect" coords="207,47,396,62" href="//www.nih.gov/" alt="National Institutes of Health" title="" />
</map>
<!--*****************************Content end*******************************-->
<script src="/scripts/support.js"></script>
<script src="/core/nlm-notifyExternal/1.0/nlm-notifyExternal.min.js"></script>
</body>
</html>