nih-gov/www.nlm.nih.gov/pubs/techbull/nd07/nd07_diacritics.html
2025-02-26 13:17:41 -05:00

372 lines
15 KiB
HTML

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="twitter:card" content="summary_large_image">
<meta name="twitter:site" content="@NLM_NIH">
<meta name="twitter:title" content="Diacritics in PubMed&#174; Displays and Searching. NLM Technical Bulletin. 2007 Nov&#8211;Dec">
<meta name="twitter:description" content=" The NLM Technical Bulletin is your source to stay informed about NLM products and services.">
<meta name="twitter:image" content="https://www.nlm.nih.gov/pubs/techbull/images/nlm_tech_bulletin_graphic_twitter.jpg">
<meta property="og:url" content="https://www.nlm.nih.gov/pubs/techbull/tb.html" />
<meta property="og:type" content="article" />
<meta property="og:title" content="Diacritics in PubMed&#174; Displays and Searching. NLM Technical Bulletin. 2007 Nov&#8211;Dec" />
<meta property="og:description" content="The NLM Technical Bulletin is your source to stay informed about NLM products and services." />
<meta property="og:image" content="https://www.nlm.nih.gov/pubs/techbull/images/nlm_tech_bulletin_graphic_facebook.jpg" />
<title>Diacritics in PubMed&#174; Displays and Searching. NLM Technical Bulletin. 2007 Nov&#8211;Dec</title>
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" title="The Dublin Core metadata Element Set" />
<meta name="DC.Title" content="Diacritics in PubMed&#174; Displays and Searching" />
<meta name="DC.Publisher" content="U.S. National Library of Medicine" />
<meta name="DC.Date.Issued" content="2007-11-09" />
<meta name="DC.Date.Modified" content="2007-11-09" />
<meta name="NLMDC.Date.LastReviewed" content="2009-08-20" />
<meta name="NLM.Contact.Email" content="nlmtechbull@mail.nlm.nih.gov" />
<meta name="DC.Type" content="Newsletters" />
<meta name="NLM.Permanence.Level" content="Permanent: Stable Content" />
<meta name="NLM.Permanence.Guarantor" content="U.S. National Library of Medicine" />
<meta name="DC.Rights" content="Public Domain" />
<meta name="DC.Language" content="eng" />
<meta name="DC.Subject.Keyword" content="Abstract" />
<meta name="DC.Subject.Keyword" content="AbstractPlus" />
<meta name="DC.Subject.Keyword" content="Affiliation" />
<meta name="DC.Subject.Keyword" content="Author Name" />
<meta name="DC.Subject.Keyword" content="Author" />
<meta name="DC.Subject.Keyword" content="Diacritics" />
<meta name="DC.Subject.Keyword" content="Entrez" />
<meta name="DC.Subject.Keyword" content="Extensible Markup Language" />
<meta name="DC.Subject.Keyword" content="Integrated Library System" />
<meta name="DC.Subject.Keyword" content="Journal Title" />
<meta name="DC.Subject.Keyword" content="Language" />
<meta name="DC.Subject.Keyword" content="MEDLINE" />
<meta name="DC.Subject.Keyword" content="National Center for Biotechnology Information" />
<meta name="DC.Subject.Keyword" content="National Library of Medicine" />
<meta name="DC.Subject.Keyword" content="Personal Name as Subject" />
<meta name="DC.Subject.Keyword" content="PubMed" />
<meta name="DC.Subject.Keyword" content="Release" />
<meta name="DC.Subject.Keyword" content="Summary Display" />
<meta name="DC.Subject.Keyword" content="Title" />
<meta name="DC.Subject.Keyword" content="Update" />
<link rel="stylesheet" href="../techbulletin.css" />
<link rel="alternate" type="application/rss+xml" title="RSS" href="/rss/techbull.rss" />
<!-- Google Tag Manager -->
<script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push({'gtm.start': new Date().getTime(),event:'gtm.js'});var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-MT6MLL');</script>
<!-- End Google Tag Manager -->
</head>
<body link="#476B47" alink="#476B47" vlink="#476B47" text="#000000" bgcolor="ffffff">
<!-- Google Tag Manager -->
<noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-MT6MLL" height="0" width="0" style="display:none;visibility:hidden" title="googletagmanager"></iframe></noscript>
<!-- End Google Tag Manager -->
<a title="Skip the navigation on this page" href="#skipnav" class="skipnavigation">Skip Navigation Bar</a>
<a name="top" id="top"></a>
<div id="body" >
<div class="header">
<div class="shadow_bottom" align="left">
<div class="shadow_right">
<div class="shadow_corner">
<table class="title" width="100%" border="0" cellpadding="0" cellspacing="0" >
<col width="134" />
<col width="*" />
<tr>
<td>
<img src="../new_tb_graphics/tb_header_logo.gif" alt="" width="134" height="64" align="top" />
</td>
<td id="titleCell">
<img id="titleImage" src="../new_tb_graphics/tb_header_title.gif" alt="NLM Technical Bulletin"
title="" align="top" border="0" height="26" width="335"/>
<div id="titleText">NLM Technical Bulletin</div>
<div>
<a href="//www.nlm.nih.gov/">National&nbsp;Library&nbsp;of&nbsp;Medicine</a> |
<a href="//www.nih.gov/">National&nbsp;Institutes&nbsp;of&nbsp;Health</a>
</div>
</td>
</tr></table>
</div></div></div></div>
<div class="header">
<div class="shadow_bottom" align="left">
<div class="shadow_right">
<div class="shadow_corner">
<table class="grid" width="100%" border="0" cellpadding="0" cellspacing="0">
<tr class="search" >
<td colspan="2" align="right">
<form method="get" action="//search.nlm.nih.gov/techbull/query" target="_self" name="searchForm">
<label for="searchInput">Search</label>
<input name="PARAMETER" type="text" id="searchInput" size="12" align="middle"/>
<input type="submit" id="searchGo" value="GO" />
<input type="hidden" name="FUNCTION" value="search" />
<input type="hidden" name="MAX" value="250" />
<input type="hidden" name="DISAMBIGUATION" value="true" />
<a href="/vsearchfaq.html">Help</a>
</form>
<img src="../new_tb_graphics/spacer.gif" width="10" height="1" border="0" alt="" />
</td>
</tr>
<tr>
<td colspan="2">
2007 NOVEMBER&#8211;DECEMBER; 359
</td>
</tr>
<tr>
<td class="gridBottomLeft">
<a href="nd07_issue_cover.html">Table of Contents</a>
</td><td class="gridBottomRight">
<a href="../tb.html" class="homeMenu">Home</a>
<a href="../back_issues.html" class="issuesMenu">Back&nbsp;Issues</a>
<a href="../new_index.html" class="indexMenu">Indexes</a>
</td>
</tr>
</table>
</div></div></div></div>
<!-- header_end-->
<div id="content" class="article">
<a id="skipnav" name="skipnav"></a>
<div class="actions">November 09, 2007 <span class="status">[posted]</span>
</div>
<h1>Diacritics in PubMed<span class="registered">&#174;</span> Displays and Searching</h1>
<h2><img src="../new_tb_graphics/b.gif" class="initcap" alt=""
width="35" height="43" />
<span class="initcap">b</span>ackground</h2>
<p>
A diacritic is a mark that modifies a letter and indicates a different phonetic value or pronunciation from the unmarked letter, such as the acute accent over the letter e, é, in the French language. The National Library of Medicine<span class="registered">&#174;</span> (NLM<span class="registered">&#174;</span>) has always used a certain set of diacritical marks in its journal citation data (see <a href="//www.nlm.nih.gov/databases/dtd/medline_characters.html">//www.nlm.nih.gov/databases/dtd/medline_characters.html</a>) and displayed them in print publications such as <em>Index Medicus</em>. Note that the list of marks is limited and that NLM does not use them in combination with capital letters (with the exception of the Swedish capital letter O, Ø, and the Polish capital letter L, Ł). NLM converted to using Unicode (UTF-8) encoding for our character set when we transitioned off our mainframe computer to relational database technology around the year 2000; previously we had used an EBCDIC (Extended Binary Coded Decimal Interchange Code) character set.
</p>
<p>
With the debut of PubMed on the World Wide Web, NLM continued to use diacritics but did not display them as a default setting because of potentially confusing users over how to search when those characters did not appear on most keyboards in use in the United States. Now the growth of the Web and growth in international PubMed use along with widespread availability of UTF-8 character set printing capabilities has led NLM to display diacritics in PubMed.
</p>
<h2>Display</h2>
<p>
Since late April, when we changed to the new Entrez System (see <a href="//www.nlm.nih.gov/pubs/techbull/ma07/ma07_ncbi_new_titles.html#note"><em>NCBI to Introduce Changes to the Entrez System — Beta Version Available for Preview. NLM Tech Bull. 2007 Mar-Apr;(355):e7</em></a>), diacritical marks have been displayed in author names and affiliation (first author's address) on the AbstractPlus, Abstract, and Citation displays (see <a href="#fig1">Figure 1</a>).
</p>
<br /><br />
<div class="figure" style="width: 504px;">
<a name="fig1" id="fig1">
<img src="graphics/diacritics_fig1.gif"
alt="Screen capture of Diacritical marks in the Author and Author Affiliation fields in the PubMed AbstractPlus Display."
title="" width="502" height="276" /><br />
<strong>Figure 1: Diacritical marks in the Author and author Affiliation fields in the PubMed AbstractPlus Display
.</strong>
</a>
</div>
<br /><br />
<p>
Today, diacritics were added to the Summary display (see <a href="#fig2">Figure 2</a>) for new citations and next week should be displayed in all citations for which diacritics are available. The XML display option has always shown the diacritical marks. The MEDLINE display will not show diacritics, as this has historically been a straight ASCII (American Standard Code for Information Interchange) presentation of only 128 characters.
</p>
<br /><br />
<div class="figure" style="width: 504px;">
<a name="fig2" id="fig2">
<img src="graphics/diacritics_fig2.gif"
alt="Screen capture of Diacritics displayed in the PubMed Summary Display."
title="" width="502" height="258" /><br />
<strong>Figure 2: Diacritics displayed in the PubMed Summary Display.</strong>
</a>
</div>
<br /><br />
<p>
PubMed pages generate the default character setting of Unicode (UTF-8) for optimal viewing of diacritical marks.
</p>
<p>
In general, most diacritical marks appear in author names and affiliation and Transliterated/Vernacular Title fields with some marks occurring in the Article Title, Abstract, Personal Name as Subject or Full Journal Title fields. (Note: The Full Journal Title field may contain characters not in the MEDLINE Character set because this element is derived from Voyager, the NLM Integrated Library System, which has a larger character set.)
</p>
<p>
Please note that diacritic marks that did not successfully convert to Unicode display as an inverted question mark. As time and resources permit, these will be corrected.</p>
<h2>Searching</h2>
<p>
All PubMed searching for terms containing diacritical marks ignores those marks, even if users enter them in a search query box (by cutting and pasting or by direct entry). Therefore, searches that include diacritics will retrieve results for terms that include the diacritic as well as terms that do not. If you search with plain letters, your retrieval will include results for terms with the diacritic as well as those without. In other words, search results are "diacritics-neutral" (see <a href="#fig3">Figure 3</a>).
</p>
<br /><br />
<div class="figure" style="width: 504px;">
<a name="fig3" id="fig3">
<img src="graphics/diacritics_fig3.gif"
alt="Screen capture of Diacritics neutral search results."
title="" width="502" height="54" /><br />
<strong>Figure 3: Diacritics neutral search results</strong>
</a>
</div>
<br /><br />
<p>
Searching uses the plain letter equivalent whether the query is user-entered, or system generated such as the author name search links that are launched by clicking on an author name from most displays (see <a href="#fig4">Figure 4</a>).
</p>
<br /><br />
<div class="figure" style="width: 504px;">
<a name="fig4" id="fig4">
<img src="graphics/diacritics_fig4.gif"
alt="Screen capture of PubMed Author Link Search with a Diacritic."
title="" width="502" height="312" /><br />
<strong>Figure 4: PubMed Author Search Link with a Diacritic</strong>
</a>
</div>
<br /><br />
<p class="authors">
By
Lou Wave S. Knecht<br />
Bibliographic Services Division<br />
and<br />
Kathi Canese<br />
National Center for Biotechnology Information<br />
</p>
<img src="../new_tb_graphics/black_pixel.gif" width="450" height="1" alt="" />
<!--************************CHANGE CITATION INFORMATION BELOW*****************************-->
<p class="citation">Knecht LWS, Canese K. Diacritics in PubMed<span class="registered">&#174;</span> Displays and Searching. NLM Tech Bull. 2007 Nov-Dec; (359):e4.</p>
<!--************************END CITATION INFORMATION**************************************-->
</div><!-- end of content -->
<div class="header" align="center">
<div class="shadow_bottom" align="left">
<div class="shadow_right">
<div class="shadow_corner">
<table class="grid" width="100%" border="0" cellpadding="0" cellspacing="0">
<tr>
<td>
<a href="nd07_pa_update.html" title="Previous Page">PREVIOUS</a>
</td><td>
<span class="issueDateMenu" >
2007 NOVEMBER&#8211;DECEMBER</span> No. 359
</td>
<td class="gridNext">
<a href="nd07_pubmed_html.html" title="Next Article">NEXT</a>
</td>
</tr>
<tr>
<td>
<a href="/pubs/techbull/stay_current.html" class="gridBottomLeft">Stay Current <img src="/pubs/techbull/new_tb_graphics/footer_icons_14h.gif" alt="E-Mail Sign Up"/></a>
</td><td class="gridBottomRight" colspan="2">
<a href="../tb.html" class="homeMenu">Home</a>
<a href="../back_issues.html" class="issuesMenu">Back&nbsp;Issues</a>
<a href="../new_index.html" class="indexMenu">Indexes</a>
</td>
</tr>
</table>
</div></div></div></div>
<!-- BEGIN NLM FOOTER -->
<div class="footer">
<a href="/">National Library of Medicine</a> 8600 Rockville Pike, Bethesda, MD 20894
<br>
<a href="//www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a>,
<a href="//www.nlm.nih.gov/web_policies.html">Web Policies</a>,
<a href="//www.nlm.nih.gov/careers/jobopenings.html">Careers</a>,
<a href="//www.nlm.nih.gov/accessibility.html">Accessibility</a>,
<a href="//www.usa.gov/">USA.gov</a>,
<a href="//www.hhs.gov/vulnerability-disclosure-policy/index.html">HHS Vulnerability Disclosure</a>
<br>
<a href="//www.nih.gov/">NIH</a>,
<a href="https://www.hhs.gov/">HHS</a>,
<a href="//www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office">FOIA</a>,
<a class="supportLink" href="//support.nlm.nih.gov?from=" target="_blank">NLM Support Center</a>
</div>
</div><!-- body -->
<script src="//assets.nlm.nih.gov/jquery/jquery-latest.min.js"></script>
<script src="/scripts/support.js"></script>
<script src="/core/nlm-notifyExternal/1.0/nlm-notifyExternal.min.js"></script>
</body>
</html>