nih-gov/www.nlm.nih.gov/research/umls/Snomed/snomed_inversion_source_trans.html
2025-02-26 13:17:41 -05:00

498 lines
37 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!doctype html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1"/>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@100;300;400;500;700&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.0.10/css/all.css" integrity="sha384-+d0P83n9kaQMCwj8F4RJB66tzIwOKmrdb46+porD/OvrJ+37WqIM7UoBtwHO6Nlg" crossorigin="anonymous">
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" title="The Dublin Core metadata Element Set" />
<script src="https://assets.nlm.nih.gov/uswds/js/uswds-init.min.js"></script>
<link rel="stylesheet" href="/home_assets/uswds/css/styles.css">
<title>SNOMED CT® in the UMLS® Metathesaurus®: Inversion Source Transparency Achieved Jan. 2005 SNOMED CT in the 2005AB Release </title>
<link rel="schema.DC" href="http://purl.org/dc/elements/1.1/" title="The Dublin Core metadata Element Set" />
<meta name="DC.Title" content="SNOMED CT® in the UMLS® Metathesaurus®: Inversion Source Transparency Achieved Jan. 2005 SNOMED CT in the 2005AB Release " />
<meta name="DC.Publisher" content="U.S. National Library of Medicine" />
<meta name="DC.Date.Issued" content="2005-06-01" />
<meta name="DC.Date.Modified" content="2006-05-24" />
<meta name="NLMDC.Date.LastReviewed" content="2008-01-29" />
<meta name="NLM.Contact.Email" content="nlmumlscustserv@mail.nlm.nih.gov" />
<meta name="DC.Type" content="Procedures" />
<meta name="NLM.Permanence.Level" content="Permanent: Dynamic Content" />
<meta name="DC.Rights" content="Public Domain" />
<meta name="DC.Language" content="eng" />
<!--<link rel="stylesheet" href="/research/umls/styles.css" type="text/css">-->
<!-- Google Tag Manager --><script>(function(w,d,s,l,i){w[l]=w[l]||[];w[l].push(
{'gtm.start': new Date().getTime(),event:'gtm.js'}
);var f=d.getElementsByTagName(s)[0],j=d.createElement(s),dl=l!='dataLayer'?'&l='+l:'';j.async=true;j.src='//www.googletagmanager.com/gtm.js?id='+i+dl;f.parentNode.insertBefore(j,f);})(window,document,'script','dataLayer','GTM-MT6MLL');</script>
<!-- End Google Tag Manager -->
</head>
<body>
<!-- Google Tag Manager -->
<noscript><iframe src="//www.googletagmanager.com/ns.html?id=GTM-MT6MLL" height="0" width="0" style="display:none;visibility:hidden" title="googletagmanager"></iframe></noscript>
<!-- End Google Tag Manager -->
<!-- TOP NAV -->
<a class="usa-skipnav" href="#main">Skip to main content</a>
<div class="usa-banner site-banner" aria-label="Official government website">
<div class="usa-accordion">
<header class="usa-banner__header">
<div class="usa-banner__inner">
<div class="grid-col-auto"> <img class="usa-banner__header-flag" src="https://assets.nlm.nih.gov/uswds/img/us_flag_small.png" alt="U.S. flag"/> </div>
<div class="grid-col-fill tablet:grid-col-auto">
<p class="usa-banner__header-text"> An official website of the United States government </p>
<p class="usa-banner__header-action" aria-hidden="true"> Heres how you know </p>
</div>
<button class="usa-accordion__button usa-banner__button" aria-expanded="false" aria-controls="gov-banner"> <span class="usa-banner__button-text">Heres how you know</span> </button>
</div>
</header>
<div class="usa-banner__content usa-accordion__content" id="gov-banner">
<div class="grid-row grid-gap-lg">
<div class="usa-banner__guidance tablet:grid-col-6"> <img class="usa-banner__icon usa-media-block__img" src="https://assets.nlm.nih.gov/uswds/img/icon-dot-gov.svg" role="img" alt="" aria-hidden="true"/>
<div class="usa-media-block__body">
<p> <strong> Official websites use .gov </strong> <br />
A <strong>.gov</strong> website belongs to an official government
organization in the United States. </p>
</div>
</div>
<div class="usa-banner__guidance tablet:grid-col-6"> <img class="usa-banner__icon usa-media-block__img" src="https://assets.nlm.nih.gov/uswds/img/icon-https.svg" role="img" alt="" aria-hidden="true"/>
<div class="usa-media-block__body">
<p> <strong> Secure .gov websites use HTTPS </strong> <br />
A <strong>lock</strong> ( <span class="icon-lock">
<svg xmlns="http://www.w3.org/2000/svg" width="52" height="64" viewBox="0 0 52 64" class="usa-banner__lock-image" role="img" aria-labelledby="banner-lock-title-default banner-lock-description-default" focusable="false">
<title id="banner-lock-title-default">Lock</title>
<desc id="banner-lock-description-default">A locked padlock</desc>
<path fill="#000000" fill-rule="evenodd" d="M26 0c10.493 0 19 8.507 19 19v9h3a4 4 0 0 1 4 4v28a4 4 0 0 1-4 4H4a4 4 0 0 1-4-4V32a4 4 0 0 1 4-4h3v-9C7 8.507 15.507 0 26 0zm0 8c-5.979 0-10.843 4.77-10.996 10.712L15 19v9h22v-9c0-6.075-4.925-11-11-11z"/>
</svg>
</span> ) or <strong>https://</strong> means youve safely connected to the .gov website. Share sensitive information only on official, secure websites. </p>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- HEADER -->
<header id="siteheader" class="usa-header usa-header--basic">
<div class="usa-nav-container tablet:padding-x-4 mobile-lg:padding-x-2 padding-y-1">
<div class="grid-row padding-y-105">
<div class="grid-col-8 desktop:grid-col-4 tablet-lg:grid-col-4 tablet:grid-col-6"> <a href="https://www.nlm.nih.gov/"> <img src="https://assets.nlm.nih.gov/uswds/img/NLM_White.png" alt="NLM logo" class="logo margin-top-1"> </a> </div>
<div class="desktop:grid-col-4 desktop:grid-offset-4 tablet-lg:grid-col-6 tablet-lg:grid-offset-2 tablet:grid-col-6 grid-col-12">
<form class="usa-search desktop:margin-top-2 tablet:margin-top-2 mobile:margin-top-1" role="search" data-gtm-form-interact-id="0" method="get" action="//vsearch.nlm.nih.gov/vivisimo/cgi-bin/query-meta" target="_self" name="searchForm" id="searchForm2">
<input class="usa-input ui-autocomplete-input" aria-label="Search" type="search" name="query" data-gtm-form-interact-field-id="0" id="search2" autocomplete="off" placeholder="Search NLM" >
<input type="hidden" name="v:project" value="nlm-main-website">
<button class="usa-button border border-top border-bottom border-right border-white" role="button" aria-label="Search" type="submit"> <span class="usa-search__submit-text"> <i class="fas fa-search"></i> </span> </button>
</form>
</div>
</div>
</div>
</header>
<div class="bg-secondary insertCOOP">
<div class="usa-nav-container">
<div class="usa-navbar ">
<button class="usa-menu-btn">Menu</button>
</div>
<nav aria-label="Primary navigation" class="usa-nav">
<button class="usa-nav__close"><img src="https://assets.nlm.nih.gov/uswds/img/close.svg" alt="close"></button>
<ul class="usa-nav__primary usa-accordion insertNav">
<li class="usa-nav__primary-item desktop-lg:margin-x-5 desktop:margin-x-3 tablet:margin-x-0">
<button type="button" class="usa-accordion__button usa-nav__link usa-current" aria-expanded="false" aria-controls="basic-nav-section-one"> <span>Products and Services <i class="fas fa-caret-down margin-left-05"></i> </span> </button>
<ul id="basic-nav-section-one" class="usa-nav__submenu bg-secondary" hidden="">
<li class="usa-nav__submenu-item"> <a href="//eresources.nlm.nih.gov/nlm_eresources/"><span>All Products and Services</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//clinicaltrials.gov/"><span>ClinicalTrials.gov</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//collections.nlm.nih.gov/"><span>Digital Collections</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//catalog.nlm.nih.gov"><span>LocatorPlus Catalog</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//meshb.nlm.nih.gov/search"><span>Medical Subject Headings (MeSH)</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//medlineplus.gov/"><span>MedlinePlus</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//pubmed.ncbi.nlm.nih.gov/"><span>PubMed/MEDLINE</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//uts.nlm.nih.gov/uts/"><span>Unified Medical Language System (UMLS)</span></a> </li>
</ul>
</li>
<li class="usa-nav__primary-item desktop-lg:margin-x-5 desktop:margin-x-3 tablet:margin-x-0">
<button type="button" class="usa-accordion__button usa-nav__link usa-current" aria-expanded="false" aria-controls="basic-nav-section-two"> <span> Resources for You <i class="fas fa-caret-down margin-left-05"></i></span> </button>
<ul id="basic-nav-section-two" class="usa-nav__submenu bg-secondary" hidden="">
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/portals/researchers.html"><span>For Researchers</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/portals/publishers.html "><span>For Publishers</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="ttps://www.nlm.nih.gov/portals/librarians.html"><span>For Librarians</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/training.html "><span>For Educators/Trainers </span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/portals/healthcare.html"><span>For Health care Professionals</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//www.nlm.nih.gov/portals/public.html "><span>For the Public</span></a> </li>
</ul>
</li>
<li class="usa-nav__primary-item desktop-lg:margin-x-5 desktop:margin-x-3 tablet:margin-x-0">
<button type="button" class="usa-accordion__button usa-nav__link usa-current" aria-expanded="false" aria-controls="basic-nav-section-three"> <span>Explore NLM <i class="fas fa-caret-down margin-left-05"></i> </span> </button>
<ul id="basic-nav-section-three" class="usa-nav__submenu bg-secondary" hidden="">
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/about/index.html"><span>About the Library</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/about/visitor.html"><span>Visit the Library</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/hmd/index.html"><span>History of Medicine</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/about/org.html"><span>NLM by Organization</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/news/newsandevents.html"><span>News, Events, and Updates</span></a> </li>
</ul>
</li>
<li class="usa-nav__primary-item desktop-lg:margin-x-5 desktop:margin-x-3 tablet:margin-x-0">
<button type="button" class="usa-accordion__button usa-nav__link usa-current" aria-expanded="false" aria-controls="basic-nav-section-four"> <span> Grants and Research <i class="fas fa-caret-down margin-left-05"></i> </span> </button>
<ul id="basic-nav-section-four" class="usa-nav__submenu bg-secondary" hidden="">
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/ep/index.html"><span>NLM Extramural Programs</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.nlm.nih.gov/research/index.html"><span>NLM Intramural Research Program</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="https://www.ncbi.nlm.nih.gov/"><span>National Center for Biotechnology Information</span></a> </li>
<li class="usa-nav__submenu-item"> <a href="//lhncbc.nlm.nih.gov/"><span>Lister Hill National Center for Biomedical Communications</span></a> </li>
</ul>
</li>
</ul>
</nav>
</div>
</div>
<!-- End of TOP NAV -->
<!-- DIVISIONAL BANNER -->
<div class="bg-gray-70">
<div class="grid-container">
<div class="grid-row divisional">
<div class="grid-col text-white">
<div class="float-left">
<h4 class="margin-bottom-0">Unified Medical Language System<small><sup>&reg;</sup></small> (UMLS<small><sup>&reg;</sup></small>)</h4>
</div>
<div class="float-right margin-top-05">
<p>
<a class="text-white" href="/research/umls/quickstart.html">UMLS Quick Start Guide</a>
&nbsp;&nbsp;|&nbsp;&nbsp;<a class="text-white" href="/research/umls/faq_main.html">FAQs</a>
&nbsp;&nbsp;|&nbsp;&nbsp;<a class="text-white" href="/research/umls/support.html">Customer Support</a>
</p>
</div>
</div>
</div>
</div>
</div>
<!--END DIVISIONAL BANNER -->
<!-- Breadcrumbs -->
<div class="grid-container">
<nav class="usa-breadcrumb usa-breadcrumb--wrap padding-top-1" aria-label="Breadcrumbs">
<ol class="usa-breadcrumb__list">
<li class="usa-breadcrumb__list-item"> <a href="/index.html" class="usa-breadcrumb__link"><span>Home</span></a> </li>
<li class="usa-breadcrumb__list-item"> <a href="/healthit" class="usa-breadcrumb__link"><span>Health IT</span></a> </li>
<li class="usa-breadcrumb__list-item"> <a href="/research/umls/" class="usa-breadcrumb__link"><span>UMLS</span></a> </li>
</ol>
</nav>
</div>
<!-- End Breadcrumbs -->
<main class="padding-bottom-5" id="main">
<div class="grid-container">
<div class="grid-row">
<div class="col">
<div class="h1">SNOMED CT® in the UMLS® Metathesaurus®: Inversion Source Transparency Achieved Jan. 2005 SNOMED CT in the 2005AB Release </div>
</div>
</div>
<p>David Sherertz, Apelon, Inc.</p>
<p>Executive Summary <br />The NLM received 9 SNOMED CT source files in the January 2005 release, and 3 Spanish translation SNOMED CT source files in the October 2004 release, from the College of American Pathologists (CAP) for inclusion in the 2005AB release of the UMLS Metathesaurus. The 2005AB release is the fifth Metathesaurus release to appear in the Rich Release Format (RRF), and the second to include SNOMED Spanish translations. An important principle of the inversion file structure is &ldquo;source transparency&rdquo;. Source transparency implies that, independent of the Metathesaurus value-added attributes, the original SNOMED CT source files can be recreated &lsquo;exactly&rsquo; from the information in the *.src inversion files. For the January 2005 / October 2004 SNOMED CT source files and the 2005AB *.src inversion files, this recreation has been demonstrated successfully.</p>
<p><strong>Abstract </strong><br /><em>One central principle for the incorporation of a source vocabulary into the UMLS is source transparency. This implies that there is no loss of information in the inversion and insertion process. Every element of information contained in the source is included in the inversion files, even though it may be represented in a format different from that of the original source files, and some of the information may not yet be released in the UMLS (for example, inactive SNOMED CT concepts / descriptions, and relationships that include them). The data structure of </em><em> SNOMED CT</em><em> is complex, and some of its information pertains to particular atoms (specific occurrences of a string or concept name), rather than to a </em><em> SNOMED CT</em><em> concept as a whole or to all instances of a string in the vocabulary. Each release of </em><em> SNOMED CT</em><em> is inverted fully, although only the changes are marked as needing review in the insertion of the inversion files. All unchanged information is considered as a safe replacement, and only the source version label of the release is updated in the unchanged information. </em></p>
<p><em> Starting with the January 2005 and October 2004 SNOMED CT source files received from the CAP, all information was extracted into source-derived files. These files formed the basis for comparison. All </em><em> SNOMED CT</em><em> information from the 2005AB *.src inversion files was extracted into UMLS-derived files and converted to a format identical to the source-derived files. Direct row-by-row comparisons between the source-derived and UMLS-derived files proved they are identical. This verified that all information contained in the source can be retrieved from the inversion files, and that source transparency has been achieved again in the incorporation of </em><em> SNOMED CT</em><em> into the UMLS Metathesaurus</em><em> .</em></p>
<p><strong>Introduction </strong></p>
<p>The RRF extensions to the Metathesaurus release files are an enhancement of the original release format (ORF). These extensions enable all Metathesaurus sources including SNOMED CT and other new sources such as the NCI Thesaurus to be represented <em>transparently</em>. Thus, a critical quality assurance check is to see if the SNOMED CT information can be extracted from the 2005AB source inversion files and used exclusively to reproduce the original SNOMED CT files as received from the CAP. Appendix A shows the file names, field names, and sizes of each of these 12 SNOMED CT files. ( They are the ones in the list that begin with &lsquo;sct&rsquo; and end with &lsquo;.txt&rsquo;.) Appendix A also shows the successful result of the process described in the remaining sections, which elaborate on the recreation details. The 12 SNOMED CT files are compared to versions derived from the *.src inversion files. In each comparison, multiple blanks and vertical bars are removed and the files are sorted; this is done for both the original and inversion derived files. There are no differences in any of the files; this defines what is meant by the files being &lsquo;exactly&rsquo; identical.</p>
<p>&nbsp;</p>
<p><strong>Methods <br /></strong>The only UNIX commands used in any of the scripts are <em>nawk, join, sed, sort, set, diff, wc, rm, touch</em> and <em>echo</em>. All of the scripts are <em>csh</em> and are called from a single shell script, called the Master Script, which in turn runs a sequence of scripts in the order described in the steps below.</p>
<ul>
<li>The Master Script first makes the &lsquo;original&rsquo; SNOMED CT files. It does this by extracting all the concepts, descriptions, relationships, subsets, history, and cross mappings. The information in the 12 SNOMED CT files is put into 12 files with descriptive names starting with &lsquo;QA&rsquo; and ending with &lsquo;*.original&rsquo;, as shown in Appendix A. The SNOMED CT files as received from the CAP use a TAB character as a field delimiter. This is preserved in the QA*.original files. Also, in the Cross Mapping TARGETCODES field, the SNOMED CT files use a vertical bar as a subfield delimiter. This is changed to a comma &lsquo;,&rsquo;. These slight changes are made to facilitate comparisons with the files as recreated from the 2005AB *.src inversion files.</li>
<li>The Master Script next extracts all of the pertinent rows from the *.src inversion files.</li>
<li>The Master Script then runs in sequence 6 scripts that recreate from the *.src inversion files pulled in Step 2) the 12 SNOMED CT files corresponding to the QA*.original files created in Step 1). The files created in this step have descriptive names starting with &lsquo;QA&rsquo; and ending with &lsquo;*.final&rsquo;, and their descriptive name is identical to the corresponding QA*.original file made in Step 1). In each of these 6 scripts, after making its QA*.final file(s) (some scripts make two or three QA*.final files), the Unix <em>wc</em> command is run on the QA*.original and QA*.final files, and then a <em>diff</em> command piped into a <em>wc -l</em>. A successful recreation is achieved when the <em>wc</em> counts are identical and the <em>diff</em> with <em>wc &ndash;l</em> returns 0.</li>
</ul>
<p>The approach in creating most of the QA*.final files in the 6 scripts of this step is to make a series of &lsquo;triples&rsquo; for the fields that will become the QA*.final file. The triples have the operative ID (concept, description, relationship, subset, cross mapping) as the first field, the field number in the QA*.final file as the second field, and the field value as the third field. Then a <em>sort</em> and simple <em>nawk</em> script reassembles the QA*.final rows from these triples. For some of the QA*.final files, this approach can be simplified, and the QA*.final file can be made by just extracting and reordering the appropriate fields directly from the pertinent *.src inversion file (for example, the Cross Mappings file).</p>
<p>In debugging the 6 scripts in Step 3), it is helpful to create and look over intermediate files during the process of making the QA*.final file to isolate and fix whatever problem is occurring. For the final run, each script removes all of the intermediate files, and only leaves the QA*.original file and QA*.final file when there is a discrepancy between them; a clean run of the Master Script will leave no files. All 6 of the scripts in this step are completely self-contained. One assumption made in making the triples is that the field names for the SNOMED CT files in the *.src files are unique if the name is shortened to its first letter and last five letters. That is currently true, but must be re-verified in the future when new field names are added to SNOMED CT.</p>
<p><strong>Results <br /></strong>Appendix A shows a single run of the Master Script described in Methods that first extracts the QA*.original files from the SNOMED CT files as received from the CAP; for each of the 12 files, it shows the <em>wc</em> counts for QA*.original file. It also shows the output from running the rest of the Master Script. As can be seen, the <em>wc</em> (record/line, word, character) counts are all identical for each of the 12 files, and the <em>diff</em> shows the files are identical. The entire Master Script, starting with just the SNOMED CT files as received, and the 2005AB *.src inversion files, takes around 90 minutes to complete successfully on a small-sized Solaris machine.</p>
<p><strong>Discussion <br /></strong>The 6 scripts that make the 12 QA*.final files vary in their degree of complexity. Interesting details about fine points will be elaborated in this section for each QA*.final file. In the sections below, whenever a reference is made to a specific *.src inversion file, the .src suffix is omitted, but the file name is shown in italics. Below the descriptive name of each file are the row(s) of the SNOMED CT field names for that file, as some of these names are mentioned in the discussion. For brevity, the field delimiter is shown as a vertical bar; in the actual files, the delimiter is a TAB character.</p>
<p>&nbsp;</p>
<p><strong> Concepts </strong></p>
<p><strong> CONCEPTID|CONCEPTSTATUS|FULLYSPECIFIEDNAME|CTV3ID|SNOMEDID|ISPRIMITIVE </strong></p>
<p>This QA*.final file is made by first getting all of the rows in the <em>Classes_Atoms</em> file with a term group of FN or OF. Next the <strong> DESCRIPTIONSTATUS</strong> field is used to determine which of the FN or OF rows is the <strong> FULLYSPECIFIEDNAME,</strong> as some concepts can have more than one. All the other fields are then extracted from the <em>Attributes</em> files. With these triples, the QA*.final file is made and compared to the QA*.original file.</p>
<p>&nbsp;</p>
<p><strong> Descriptions (2) </strong></p>
<p><strong> DESCRIPTIONID|DESCRIPTIONSTATUS|CONCEPTID|TERM|INITIALCAPITALSTATUS| </strong></p>
<p><strong> DESCRIPTIONTYPE|LANGUAGECODE </strong></p>
<p>A separate QA*.final file is made for the U.S. and Spanish descriptions. Note that for each language, there is a separate <em>Classes_Atoms</em> and <em>Attributes</em> file. Each QA*.final file is also relatively simple, as it involves simply getting all of the rows from the <em>Classes_Atoms</em> file for each language. The two exceptions are the &lsquo;description&rsquo; record made for the Cross Mappings and Subsets (term types XM and SB, respectively); these are ignored. Then, all of the other fields are pulled from the <em>Attributes</em> file for each language. With these triples, each QA*.final file is made and compared to the QA*.original file.</p>
<p>&nbsp;</p>
<p><strong> Relationships </strong></p>
<p><strong> RELATIONSHIPID|CONCEPTID1|RELATIONSHIPTYPE|CONCEPTID2|CHARACTERISTICTYPE| </strong></p>
<p><strong> REFINABILITY|RELATIONSHIPGROUP </strong></p>
<p>This QA*.final file is a little more complex. First of all, the ISA relationships are not in the <em>Relationships</em> inversion file; they have to be retrieved from the <em>Treepos.dat</em> file used to build SNOMED CT contexts as part of the inversion. Secondly, the inversion Atom IDs must be converted back to SNOMED CT <strong> CONCEPTIDs</strong> , as this is what is used in the SNOMED Relationship file. Finally, getting the SNOMED CT field RELATIONSHIPTYPE involves first getting the UMLSRELAs with their type from the <em>Attributes</em> file, matching the UMLSRELA string from the <em>Relationship</em> rows, and finally replacing the string with the corresponding RELATIONSHIPTYPE. The remaining fields are pulled from the <em>Attributes</em> file. <a id="OLE_LINK1" name="OLE_LINK1"></a>With these triples, the QA*.final file is made and compared to the QA*.original file.</p>
<p>Note that beginning with the July 2004 SNOMED CT release, the historical relationships are included in the single relationships file; prior to this, the historical relationships were distributed in a separate file with the same format as the relationships file. The only way of separating active relationships from historical relationships is that the historical relationships always have a CHARACTERISTICTYPE value equal to 2.</p>
<p>Also note that the AQ (Allowable Qualifiers) relationships are in both the <em>Attributes</em> file, and the <em>Relationships</em> file. They are made from both places, and a <em>sort -u</em> is done to insure that they are identically represented in the two places.</p>
<p>&nbsp;</p>
<p><strong> Component History </strong></p>
<p><strong> COMPONENTID|RELEASEVERSION|CHANGETYPE|STATUS|REASON </strong></p>
<p>This QA*.final file simply involves getting all of the rows in the <em>Attributes</em> file with a COMPONENTHISTORY attribute name, and printing out the appropriate fields from those rows in the order of the QA*.original file. The QA*.final file is then compared to the QA*.original file.</p>
<p>&nbsp;</p>
<p><strong> Subsets (2) </strong></p>
<p><strong> SUBSETID|SUBSETORIGINALID|SUBSETVERSION|SUBSETNAME|SUBSETTYPE|LANGUAGECODE| </strong></p>
<p><strong> REALMID|CONTEXTID </strong></p>
<p>A separate QA*.final file is made for the U.S. and Spanish subsets. Note that for each language, there is a separate <em>Classes_Atoms</em> and <em>Attributes</em> file. The QA*.final files are each mostly made from triples extracted from the <em>Attributes</em> file. However, there is one row in <em>Classes_Atoms</em> with a TTY of &lsquo;SB&rsquo; that contains the SUBSETNAME field. With these triples, each QA*.final file is made and compared to the QA*.original file.</p>
<p>&nbsp;</p>
<p><strong> Subset Members (2) </strong></p>
<p><strong> SUBSETID|MEMBERID|MEMBERSTATUS|LINKEDID </strong></p>
<p>A separate QA*.final file is made for the U.S. and Spanish subset members. Note that for each language, there is a separate <em>Attributes</em> file. Each QA*.final file is made entirely from the <em>Attributes</em> file, as all of the fields appear there. Each QA*.final file is made and compared to the corresponding QA*.original file.</p>
<p><strong>&nbsp;</strong></p>
<p><strong> Cross Mappings Sets </strong></p>
<p><strong> MAPSETID|MAPSETNAME|MAPSETTYPE|MAPSETSCHEMEID|MAPSETSCHEMENAME| </strong></p>
<p><strong> MAPSETSCHEMEVERSION|MAPSETREALMID|MAPSETSEPARATOR|MAPSETRULETYPE </strong></p>
<p>This QA*.final file is made almost entirely from the <em>Attributes</em> file, as most of the fields are separate rows in it; these are made into triples. However, there is one row in <em>Classes_Atoms</em> with a TTY of &lsquo;XM&rsquo; that contains the MAPSETNAME field. With these triples, the QA*.final file is made and compared to the QA*.original file.</p>
<p>&nbsp;</p>
<p><strong> Cross Mapping Targets </strong></p>
<p><strong> TARGETID|TARGETSCHEMEID|TARGETCODES|TARGETRULE|TARGETADVICE </strong></p>
<p>This QA*.final file is derived entirely from the XMAPTO rows in the <em>Attributes</em> file. The QA*.final file is made and compared to the QA*.original file.</p>
<p>&nbsp;</p>
<p><strong> Cross Mappings </strong></p>
<p><strong> MAPSETID|MAPCONCEPTID|MAPOPTION|MAPPRIORITY|MAPTARGETID|MAPRULE|MAPADVICE </strong></p>
<p>This QA*.final file is derived entirely from the XMAP rows in the <em>Attributes</em> file. The QA*.final file is made and compared to the QA*.original file.</p>
<p><strong>Conclusion <br /></strong>It is possible to recreate the 12 SNOMED CT files as received from the CAP using only the information in the Metathesaurus 2005AB *.src inversion files. While some of the steps and links require a sophisticated understanding of both the SNOMED CT fields and the UMLS Metathesaurus source inversion fields, and what each means, the process can be implemented simply and tested efficiently. This successful test of the recreation of the SNOMED CT files is a strong endorsement of the *.src and RRF structures, and proves that they do indeed support true source transparency. And, if SNOMED CT in the UMLS Metathesaurus is transparently complete, that is a good indication that any other present or future source will be demonstrably transparent when represented in the *.src file inversion structure, and the RRF distribution structure.</p>
<p>&nbsp;</p>
<p>&nbsp;</p>
<p><strong>Appendix A &ndash; SNOMED CT / 2005AB Inversion Files</strong></p>
<p>Wed Feb 23 10:53:05 PST 2005 master-create.s start</p>
<p>&nbsp;</p>
<p>Content: sct_concepts_20050131.txt</p>
<p>Wed Feb 23 11:00:05 PST 2005 create-concepts.s start</p>
<p>303140 3146279 22295645 QAcons.original</p>
<p>303140 3146279 22295645 QAcons.final</p>
<p>606280 6292558 44591290 total</p>
<p>0</p>
<p>Wed Feb 23 11:07:51 PST 2005 create-concepts.s end</p>
<p>&nbsp;</p>
<p>Content: sct_descriptions_20050131.txt</p>
<p>Wed Feb 23 11:07:51 PST 2005 create-descriptions.s start</p>
<p>753037 7948170 49660932 QAdesc.original</p>
<p>753037 7948170 49660932 QAdesc.final</p>
<p>1506074 15896340 99321864 total</p>
<p>0</p>
<p>Wed Feb 23 11:18:55 PST 2005 create-descriptions.s end</p>
<p>&nbsp;</p>
<p>Content: sct_relationships_20050131.txt</p>
<p>History: sct_componenthistory_20050131.txt</p>
<p>Wed Feb 23 11:18:55 PST 2005 create-relationships-history.s start</p>
<p>1322856 9259992 60736300 QArels.original</p>
<p>1322856 9259992 60736300 QArels.final</p>
<p>2645712 18519984 121472600 total</p>
<p>0</p>
<p>&nbsp;</p>
<p>1169216 4897999 30649970 QAhist.original</p>
<p>1169216 4897999 30649970 QAhist.final</p>
<p>2338432 9795998 61299940 total</p>
<p>0</p>
<p>Wed Feb 23 11:53:50 PST 2005 create-relationships-history.s end</p>
<p>&nbsp;</p>
<p>Cross Mappings: sct_crossmaps_icd9_20050131.txt</p>
<p>Cross Mappings: sct_crossmapsets_icd9_20050131.txt</p>
<p>Cross Mappings: sct_crossmaptargets_icd9_20050131.txt</p>
<p>Wed Feb 23 11:53:50 PST 2005 create-cross-mappings.s start</p>
<p>2 27 293 QAxmapssets.original</p>
<p>2 27 293 QAxmapssets.final</p>
<p>4 54 586 total</p>
<p>0</p>
<p>&nbsp;</p>
<p>14099 42298 642945 QAxmaptargets.original</p>
<p>14099 42298 642945 QAxmaptargets.final</p>
<p>28198 84596 1285890 total</p>
<p>0</p>
<p>&nbsp;</p>
<p>92785 556711 2965579 QAxmaps.original</p>
<p>92785 556711 2965579 QAxmaps.final</p>
<p>185570 1113422 5931158 total</p>
<p>0</p>
<p>Wed Feb 23 11:59:35 PST 2005 create-cross-mappings.s end</p>
<p>&nbsp;</p>
<p>&nbsp;</p>
<p>Subsets/U.S: sct_subsetmembers_us_20050131.txt</p>
<p>Subsets/U.S: sct_subsets_us_20050131.txt</p>
<p>&nbsp;</p>
<p>Subsets/Spanish: /d1/project/snmct/20041031/ SNOMED CT October 2004/Subsets/sct_subsetmembers_20041031.txt</p>
<p>Subsets/Spanish: /d1/project/snmct/20041031/ SNOMED CT October 2004/Subsets/sct_subsets_20041031.txt</p>
<p>Wed Feb 23 11:59:35 PST 2005 create-subsets.s start</p>
<p>2 18 143 QAesssets.original</p>
<p>2 18 143 QAesssets.final</p>
<p>4 36 286 total</p>
<p>0</p>
<p>&nbsp;</p>
<p>688393 2065180 14303724 QAesssetmems.original</p>
<p>688393 2065180 14303724 QAesssetmems.final</p>
<p>1376786 4130360 28607448 total</p>
<p>0</p>
<p>&nbsp;</p>
<p>2 19 147 QAenssets.original</p>
<p>2 19 147 QAenssets.final</p>
<p>4 38 294 total</p>
<p>0</p>
<p>&nbsp;</p>
<p>725206 2175619 14558868 QAenssetmems.original</p>
<p>725206 2175619 14558868 QAenssetmems.final</p>
<p>1450412 4351238 29117736 total</p>
<p>0</p>
<p>Wed Feb 23 12:09:10 PST 2005 create-subsets.s end</p>
<p>&nbsp;</p>
<p>Subsets/Spanish: /d1/project/snmct/20041031/SNOMED CT October 2004/Spanish Descriptions/sct_descriptions_20041031.txt</p>
<p>Wed Feb 23 12:09:10 PST 2005 create-es-descriptions.s start</p>
<p>669710 7864750 50293955 QAdesces.original</p>
<p>669710 7864750 50293955 QAdesces.final</p>
<p>1339420 15729500 100587910 total</p>
<p>0</p>
<p>Wed Feb 23 12:16:25 PST 2005 create-es-descriptions.s end</p>
<p>&nbsp;</p>
<p>Wed Feb 23 12:16:25 PST 2005 master-create.s end</p>
<p class=”margin-top-5”><small>Last Reviewed: January 29, 2008</small></p>
</div>
</main>
<!-- FOOTER -->
<footer class="usa-footer__primary-section padding-top-5 padding-bottom-3 insertfooter">
<div class="grid-container">
<div class="grid-row">
<div class="desktop:grid-col-3 grid-col-6"> <a href="https://www.nlm.nih.gov/socialmedia/index.html">
<p class="text-white">Connect with NLM</p>
</a>
<ul class="social_media add-list-reset">
<li class="margin-right-05"><a href="https://www.facebook.com/nationallibraryofmedicine"><img class="bg-secondary" src="https://www.nlm.nih.gov/images/facebook.svg" alt="Facebook"></a></li>
<li class="margin-right-05"><a title="External link: please review our privacy policy." href="https://www.linkedin.com/company/national-library-of-medicine-nlm/"><img class="bg-secondary" src="//www.nlm.nih.gov/images/linkedin.svg" alt="LinkedIn"></a></li>
<li class="margin-right-05"><a title="External link: please review our privacy policy." href="https://twitter.com/NLM_NIH"><img src="//www.nlm.nih.gov/images/twitter.svg" class="img-fluid bg-secondary" alt="Twitter"></a></li>
<li class="margin-right-05"><a title="External link: please review our privacy policy." href="https://www.youtube.com/user/NLMNIH"><img src="//www.nlm.nih.gov/images/youtube.svg" class="bg-secondary" alt="You Tube"></a></li>
<li class="margin-right-05"><a title="External link: please review our privacy policy." href="https://public.govdelivery.com/accounts/USNLMOCPL/subscriber/new?preferences=true"><img src="//www.nlm.nih.gov/images/mail.svg" class=" bg-secondary" alt="Government Delivery"></a></li>
</ul>
</div>
<div class="desktop:grid-col-3 grid-col-6">
<p class="address_footer text-white"> National Library of Medicine <br>
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white"> 8600 Rockville Pike <br>
Bethesda, MD 20894 </a></p>
</div>
<div class="desktop:grid-col-3 grid-col-6">
<p><a href="/web_policies.html" class="text-white"> Web Policies </a><br>
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white"> FOIA </a><br>
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white">HHS Vulnerability Disclosure</a> </p>
</div>
<div class="desktop:grid-col-3 grid-col-6">
<p><a class="supportLink text-white" href="//support.nlm.nih.gov?from="> NLM Support Center </a> <br>
<a href="/accessibility.html" class="text-white"> Accessibility </a><br>
<a href="/careers/careers.html" class="text-white"> Careers </a></p>
</div>
</div>
<div class="grid-row">
<div class="grid-col-12">
<p class="text-center text-white"> <a class="text-white" href="//www.nlm.nih.gov/">NLM</a> | <a class="text-white" href="https://www.nih.gov/">NIH</a> | <a class="text-white" href="https://www.hhs.gov/">HHS</a> | <a class="text-white" href="https://www.usa.gov/">USA.gov</a></p>
</div>
</div>
</div>
</footer>
<script src="//assets.nlm.nih.gov/uswds/js/uswds.min.js"></script>
<script src="//assets.nlm.nih.gov/jquery/jquery-latest.min.js"></script>
<script src="//assets.nlm.nih.gov/jquery/jquery-migrate-latest.min.js"></script>
<script src="/scripts/nlm_autocomplete.js"></script>
<script src="/scripts/nlm_uswds.js"></script>
</body>
</html>