nih-gov/www.ncbi.nlm.nih.gov/books/n/handbook/A1237/index.html?report=reader

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" class="no-js no-jr">
    <head>
        <!-- For pinger, set start time and add meta elements. -->
        <script type="text/javascript">var ncbi_startTime = new Date();</script>

        <!-- Logger begin -->
        <meta name="ncbi_db" content="books">
<meta name="ncbi_pdid" content="book-part">
<meta name="ncbi_acc" content="NBK21106">
<meta name="ncbi_domain" content="handbook">
<meta name="ncbi_report" content="reader">
<meta name="ncbi_type" content="fulltext">
<meta name="ncbi_objectid" content="">
<meta name="ncbi_pcid" content="/NBK21106/?report=reader">
<meta name="ncbi_pagename" content="Glossary - The NCBI Handbook - NCBI Bookshelf">
<meta name="ncbi_bookparttype" content="glossary">
<meta name="ncbi_app" content="bookshelf">
        <!-- Logger end -->

        <!--component id="Page" label="meta"/-->
        <script type="text/javascript" src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.boots.min.js"> </script><title>Glossary - The NCBI Handbook - NCBI Bookshelf</title>
<meta charset="utf-8">
<meta name="apple-mobile-web-app-capable" content="no">
<meta name="viewport" content="initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no">
<meta name="jr-col-layout" content="auto">
<meta name="jr-prev-unit" content="/books/n/handbook/ch24/?report=reader">
<meta name="bk-toc-url" content="/books/n/handbook/?report=toc">
<meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX">
<meta name="citation_inbook_title" content="The NCBI Handbook [Internet]">
<meta name="citation_title" content="Glossary">
<meta name="citation_publisher" content="National Center for Biotechnology Information (US)">
<meta name="citation_date" content="2002">
<meta name="citation_author" content="Jo McEntyre">
<meta name="citation_author" content="Jim Ostell">
<meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK21106/">
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0/">
<meta name="DC.Title" content="Glossary">
<meta name="DC.Type" content="Text">
<meta name="DC.Publisher" content="National Center for Biotechnology Information (US)">
<meta name="DC.Contributor" content="Jo McEntyre">
<meta name="DC.Contributor" content="Jim Ostell">
<meta name="DC.Date" content="2002">
<meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK21106/">
<meta name="og:title" content="Glossary">
<meta name="og:type" content="book">
<meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK21106/">
<meta name="og:site_name" content="NCBI Bookshelf">
<meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png">
<meta name="twitter:card" content="summary">
<meta name="twitter:site" content="@ncbibooks">
<meta name="warning" content="This publication is provided for historical reference only and the information may be out of date.">
<meta name="bk-non-canon-loc" content="/books/n/handbook/A1237/?report=reader">
<link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK21106/">
<link href="https://fonts.googleapis.com/css?family=Archivo+Narrow:400,700,400italic,700italic&amp;subset=latin" rel="stylesheet" type="text/css">
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/libs.min.css">
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/jr.min.css">
<meta name="format-detection" content="telephone=no">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css//books_print.min.css" type="text/css" media="print">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_reader.min.css" type="text/css">
<style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style>
<style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace}  .first-line-outdent .bk_ref {display: inline}  .body-content h2, .body-content .h2  {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list  .graphic {display:inline-block !important} .temp-labeled-list  img{width:100%}</style>

    <link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico">
<meta name="ncbi_phid" content="CE8EB1E97D5C5A210000000000670056.m_5">
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3849091.css"></head>
    <body>
        <!-- Book content! -->


<div id="jr" data-jr-path="/corehtml/pmc/jatsreader/ptpmc_3.22/"><div class="jr-unsupported"><table class="modal"><tr><td><span class="attn inline-block"></span><br />Your browser does not support the NLM PubReader view.<br />Go to <a href="/pmc/about/pr-browsers/">this page</a> to see a list of supported browsers<br />or return to the <br /><a href="/books/NBK21106/?report=classic">regular view</a>.</td></tr></table></div><div id="jr-ui" class="hidden"><nav id="jr-head"><div class="flexh tb"><div id="jr-tb1"><a id="jr-links-sw" class="hidden" title="Links"><svg xmlns="http://www.w3.org/2000/svg" version="1.1" x="0px" y="0px" viewBox="0 0 70.6 85.3" style="enable-background:new 0 0 70.6 85.3;vertical-align:middle" xml:space="preserve" width="24" height="24">
								<style type="text/css">.st0{fill:#939598;}</style>
								<g>
									<path class="st0" d="M36,0C12.8,2.2-22.4,14.6,19.6,32.5C40.7,41.4-30.6,14,35.9,9.8"></path>
									<path class="st0" d="M34.5,85.3c23.2-2.2,58.4-14.6,16.4-32.5c-21.1-8.9,50.2,18.5-16.3,22.7"></path>
									<path class="st0" d="M34.7,37.1c66.5-4.2-4.8-31.6,16.3-22.7c42.1,17.9,6.9,30.3-16.4,32.5h1.7c-66.2,4.4,4.8,31.6-16.3,22.7           c-42.1-17.9-6.9-30.3,16.4-32.5"></path>
								</g>
							</svg> Books</a></div><div class="jr-rhead f1 flexh"><div class="head"><a href="/books/n/handbook/ch24/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="body"><div class="t">Glossary</div><div class="j">The NCBI Handbook [Internet]</div></div><div class="tail"></div></div><div id="jr-tb2"><a id="jr-bkhelp-sw" class="btn wsprkl hidden" title="Help with NLM PubReader">?</a><a id="jr-help-sw" class="btn wsprkl hidden" title="Settings and typography in NLM PubReader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" preserveAspectRatio="none"><path d="M462,283.742v-55.485l-29.981-10.662c-11.431-4.065-20.628-12.794-25.274-24.001  c-0.002-0.004-0.004-0.009-0.006-0.013c-4.659-11.235-4.333-23.918,0.889-34.903l13.653-28.724l-39.234-39.234l-28.72,13.652  c-10.979,5.219-23.68,5.546-34.908,0.889c-0.005-0.002-0.01-0.003-0.014-0.005c-11.215-4.65-19.933-13.834-24-25.273L283.741,50  h-55.484l-10.662,29.981c-4.065,11.431-12.794,20.627-24.001,25.274c-0.005,0.002-0.009,0.004-0.014,0.005  c-11.235,4.66-23.919,4.333-34.905-0.889l-28.723-13.653l-39.234,39.234l13.653,28.721c5.219,10.979,5.545,23.681,0.889,34.91  c-0.002,0.004-0.004,0.009-0.006,0.013c-4.649,11.214-13.834,19.931-25.271,23.998L50,228.257v55.485l29.98,10.661  c11.431,4.065,20.627,12.794,25.274,24c0.002,0.005,0.003,0.01,0.005,0.014c4.66,11.236,4.334,23.921-0.888,34.906l-13.654,28.723  l39.234,39.234l28.721-13.652c10.979-5.219,23.681-5.546,34.909-0.889c0.005,0.002,0.01,0.004,0.014,0.006  c11.214,4.649,19.93,13.833,23.998,25.271L228.257,462h55.484l10.595-29.79c4.103-11.538,12.908-20.824,24.216-25.525  c0.005-0.002,0.009-0.004,0.014-0.006c11.127-4.628,23.694-4.311,34.578,0.863l28.902,13.738l39.234-39.234l-13.66-28.737  c-5.214-10.969-5.539-23.659-0.886-34.877c0.002-0.005,0.004-0.009,0.006-0.014c4.654-11.225,13.848-19.949,25.297-24.021  L462,283.742z M256,331.546c-41.724,0-75.548-33.823-75.548-75.546s33.824-75.547,75.548-75.547  c41.723,0,75.546,33.824,75.546,75.547S297.723,331.546,256,331.546z"></path></svg></a><a id="jr-fip-sw" class="btn wsprkl hidden" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-rtoc-sw" class="btn wsprkl hidden" title="Table of Contents"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,20h10v8H20V20zM36,20h44v8H36V20zM20,37.33h10v8H20V37.33zM36,37.33h44v8H36V37.33zM20,54.66h10v8H20V54.66zM36,54.66h44v8H36V54.66zM20,72h10v8 H20V72zM36,72h44v8H36V72z"></path></svg></a></div></div></nav><nav id="jr-dash" class="noselect"><nav id="jr-dash" class="noselect"><div id="jr-pi" class="hidden"><a id="jr-pi-prev" class="hidden" title="Previous page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a><div class="pginfo">Page <i class="jr-pg-pn">0</i> of <i class="jr-pg-lp">0</i></div><a id="jr-pi-next" class="hidden" title="Next page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div><div id="jr-is-tb"><a id="jr-is-sw" class="btn wsprkl hidden" title="Switch between Figures/Tables strip and Progress bar"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><rect x="10" y="40" width="20" height="20"></rect><rect x="40" y="40" width="20" height="20"></rect><rect x="70" y="40" width="20" height="20"></rect></svg></a></div><nav id="jr-istrip" class="istrip hidden"><a id="jr-is-prev" href="#" class="hidden" title="Previous"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M80,40 60,65 80,90 70,90 50,65 70,40z M50,40 30,65 50,90 40,90 20,65 40,40z"></path><text x="35" y="25" textLength="60" style="font-size:25px">Prev</text></svg></a><a id="jr-is-next" href="#" class="hidden" title="Next"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,40 40,65 20,90 30,90 50,65 30,40z M50,40 70,65 50,90 60,90 80,65 60,40z"></path><text x="15" y="25" textLength="60" style="font-size:25px">Next</text></svg></a></nav><nav id="jr-progress"></nav></nav></nav><aside id="jr-links-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">NCBI Bookshelf</div></div><div class="cnt lol f1"><a href="/books/">Home</a><a href="/books/browse/">Browse All Titles</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://www.facebook.com/sharer/sharer.php?u=https://www.ncbi.nlm.nih.gov/books/NBK21106/"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24" preserveAspectRatio="none"><g><path d="M 17.996,32L 12,32 L 12,16 l-4,0 l0-5.514 l 4-0.002l-0.006-3.248C 11.993,2.737, 13.213,0, 18.512,0l 4.412,0 l0,5.515 l-2.757,0 c-2.063,0-2.163,0.77-2.163,2.209l-0.008,2.76l 4.959,0 l-0.585,5.514L 18,16L 17.996,32z"></path></g></svg> Share on Facebook</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://twitter.com/intent/tweet?url=https://www.ncbi.nlm.nih.gov/books/NBK21106/&amp;text=Glossary"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24"><g><path d="M 32,6.076c-1.177,0.522-2.443,0.875-3.771,1.034c 1.355-0.813, 2.396-2.099, 2.887-3.632 c-1.269,0.752-2.674,1.299-4.169,1.593c-1.198-1.276-2.904-2.073-4.792-2.073c-3.626,0-6.565,2.939-6.565,6.565 c0,0.515, 0.058,1.016, 0.17,1.496c-5.456-0.274-10.294-2.888-13.532-6.86c-0.565,0.97-0.889,2.097-0.889,3.301 c0,2.278, 1.159,4.287, 2.921,5.465c-1.076-0.034-2.088-0.329-2.974-0.821c-0.001,0.027-0.001,0.055-0.001,0.083 c0,3.181, 2.263,5.834, 5.266,6.438c-0.551,0.15-1.131,0.23-1.73,0.23c-0.423,0-0.834-0.041-1.235-0.118 c 0.836,2.608, 3.26,4.506, 6.133,4.559c-2.247,1.761-5.078,2.81-8.154,2.81c-0.53,0-1.052-0.031-1.566-0.092 c 2.905,1.863, 6.356,2.95, 10.064,2.95c 12.076,0, 18.679-10.004, 18.679-18.68c0-0.285-0.006-0.568-0.019-0.849 C 30.007,8.548, 31.12,7.392, 32,6.076z"></path></g></svg> Share on Twitter</a></div></aside><aside id="jr-rtoc-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Table of Content</div></div><div class="cnt lol f1"><a href="/books/n/handbook/?report=reader">Title Information</a><a href="/books/n/handbook/toc/?report=reader">Table of Contents Page</a></div></aside><aside id="jr-help-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Settings</div></div><div class="cnt f1"><div id="jr-typo-p" class="typo"><div><a class="sf btn wsprkl">A-</a><a class="lf btn wsprkl">A+</a></div><div><a class="bcol-auto btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 100" preserveAspectRatio="none"><text x="10" y="70" style="font-size:60px;font-family: Trebuchet MS, ArialMT, Arial, sans-serif" textLength="180">AUTO</text></svg></a><a class="bcol-1 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M15,25 85,25zM15,40 85,40zM15,55 85,55zM15,70 85,70z"></path></svg></a><a class="bcol-2 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M5,25 45,25z M55,25 95,25zM5,40 45,40z M55,40 95,40zM5,55 45,55z M55,55 95,55zM5,70 45,70z M55,70 95,70z"></path></svg></a></div></div><div class="lol"><a class="" href="/books/NBK21106/?report=classic">Switch to classic view</a><a href="/books/NBK21106/pdf/Bookshelf_NBK21106.pdf">PDF (464K)</a><a href="/books/n/handbook/pdf/">PDF (7.2M)</a><a href="/books/NBK21106/?report=printable">Print View</a></div></div></aside><aside id="jr-bkhelp-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Help</div></div><div class="cnt f1 lol"><a id="jr-helpobj-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/help.xml" href="">Help</a><a href="mailto:info@ncbi.nlm.nih.gov?subject=PubReader%20feedback%20%2F%20NBK21106%20%2F%20sid%3ACE8B5AF87C7FFCB1_0191SID%20%2F%20phid%3ACE8EB1E97D5C5A210000000000670056.4">Send us feedback</a><a id="jr-about-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/about.xml" href="">About PubReader</a></div></aside><aside id="jr-objectbox" class="thidden hidden"><div class="jr-objectbox-close wsprkl">&#10008;</div><div class="jr-objectbox-inner cnt"><div class="jr-objectbox-drawer"></div></div></aside><nav id="jr-pm-left" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Previous Page</text></svg></nav><nav id="jr-pm-right" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Next Page</text></svg></nav><nav id="jr-fip" class="hidden"><nav id="jr-fip-term-p"><input type="search" placeholder="search this page" id="jr-fip-term" autocorrect="off" autocomplete="off" /><a id="jr-fip-mg" class="wsprkl btn" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-fip-done" class="wsprkl btn" title="Dismiss find">&#10008;</a></nav><nav id="jr-fip-info-p"><a id="jr-fip-prev" class="wsprkl btn" title="Jump to previuos match">&#9664;</a><button id="jr-fip-matches">no matches yet</button><a id="jr-fip-next" class="wsprkl btn" title="Jump to next match">&#9654;</a></nav></nav></div><div id="jr-epub-interstitial" class="hidden"></div><div id="jr-content"><article data-type="main"><p class="vip-notice"><strong><a href="/books/n/handbook2e/?report=reader">See "The NCBI Handbook, 2nd Edition"</a></strong></p><p class="vip-notice retraction"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p><div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><div class="fm-sec"><h1 id="_NBK21106_"><span class="title" itemprop="name">Glossary</span></h1><p class="fm-aai"><a href="#_NBK21106_pubdet_">Publication Details</a></p></div></div><div class="body-content whole_rhythm" itemprop="text"><dl><dt id="app1">3-D or 3D</dt><dd><p>Three-dimensional.</p></dd><dt id="app208">Accession number</dt><dd><p>An Accession number is a unique identifier given to a sequence when it is submitted to
one of the DNA repositories (GenBank, EMBL, DDBJ). The initial deposition of a sequence
record is referred to as version 1. If the sequence is updated, the version number is
incremented, but the Accession number will remain constant.</p></dd><dt id="app2">
<i>Alu</i>
</dt><dd><p>The <i>Alu</i> repeat family comprises short interspersed elements (SINES)
present in multiple copies in the genomes of humans and other primates. The <i>Alu</i> sequence is approximately 300 bp in length and is found commonly in
<a class="def" href="/books/NBK21106/def-item/app86/">intron</a>s, 3&#x02032; untranslated
regions of genes, and intergenic genomic regions. They are mobile elements and are
present in the human genome in extremely high copy number. Almost 1 million copies of
the <i>Alu</i> sequence are estimated to be present, making it the most abundant
mobile element. The <i>Alu</i> sequence is so named because of the presence of a
recognition site for the <i>Alu</i>I endonuclease in the middle of the <i>Alu</i> sequence. Because of the widespread occurrence of the <i>Alu</i>
repeat in the genome, the <i>Alu</i> sequence is used as a universal primer for
PCR in animal cell lines; it binds in both forward and reverse directions. The <i>Alu</i> universal primer sequence is as follows: 5&#x02032;-GTG GAT CAC CTG AGG
TCA GGA GTT TC-3&#x02032;
(26-mer).
</p></dd><dt id="app3">allele</dt><dd><p>One of the variant forms of a gene at a particular <a class="def" href="/books/NBK21106/def-item/app95/">locus</a> on a chromosome. Different alleles produce variation in inherited
characteristics such as hair color or blood type. In an individual, one form of the
allele (the dominant one) may be expressed more than another form (the recessive one).
When &#x0201c;genes&#x0201d; are considered simply as segments of a nucleotide
sequence, allele refers to each of the possible alternative nucleotides at a specific
position in the sequence. For example, a CT polymorphism such as CCT[C/T]CCAT would have
two alleles: C and T.</p></dd><dt id="app4">API</dt><dd><p>Application Programming Interface. An API is a set of routines that an application uses
to request and carry out lower-level services performed by a computer's operating
system. For computers running a graphical user interface, an API manages an
application's windows, icons, menus, and dialog boxes.</p></dd><dt id="app5">ASN.1</dt><dd><p>Abstract Syntax Notation 1 is an international standard data-representation format used
to achieve interoperability between computer platforms. It allows for the reliable
exchange of data in terms of structure and content by computer and software systems of
all types.</p></dd><dt id="app6">BAC</dt><dd><p>Bacterial Artificial Chromosome. A BAC is a large segment of DNA
(100,000&#x02013;200,000 bp) from another species cloned into bacteria. Once the
foreign DNA has been cloned into the host bacteria, many copies of it can be made.</p></dd><dt id="app7">BankIt</dt><dd><p>BankIt is a tool for the online submission of one or a few sequences into <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> and is designed to make the submission
process quick and easy. (BankIt also automatically uses <a href="/VecScreen/VecScreen.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">VecScreen</a> to
identify segments of nucleic acid sequence that may be of vector, adapter, or linker
origin to combat the problem of vector contamination in GenBank.)</p></dd><dt id="app8">bit score</dt><dd><p>The value S&#x02032; is derived from the raw alignment score S in which the
statistical properties of the scoring system used have been taken into account. By
normalizing a raw score using the formula: <div class="graphic"><img src="/books/NBK21106/bin/glossfig1.jpg" alt="Image glossfig1.jpg" /></div> a &#x0201c;bit score&#x0201d; S&#x02032; is attained, which has a standard set of units, and where
K and <i>lambda</i> are the statistical parameters of the scoring system.
Because bit scores have been normalized with respect to the scoring system, they can be
used to compare alignment scores from different searches.</p></dd><dt id="app9">BLAST</dt><dd><p>Basic Local Alignment Search Tool (<a href="/pubmed?term=2231712" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=term&amp;targettype=pubmed">Altschul et
al., J Mol Biol 215:403-410; 1990</a>). A sequence comparison <a href="/Education/BLASTinfo/BLAST_algorithm.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">algorithm</a> that is optimized for speed and used to search sequence databases
for optimal local alignments to a query. See the <a href="/books/n/handbook/ch16/?report=reader">BLAST
chapter</a> (Chapter 15) or the <a href="/Education/BLASTinfo/tut1.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">tutorial</a>
or the narrative <a href="/Education/BLASTinfo/guide.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">guide</a> to
BLAST.</p></dd><dt id="app10">blastn</dt><dd><p>nucleotide&#x02013;nucleotide BLAST. blastn takes nucleotide sequences in <a class="def" href="/books/NBK21106/def-item/app53/">FASTA</a> format, <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> Accession numbers, or <a class="def" href="/books/NBK21106/def-item/app67/">GI</a> numbers and compares them against the NCBI <a href="/blast/html/blastcgihelp.html#nucleotide_databases" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Nucleotide databases</a>.
</p></dd><dt id="app11">blastp</dt><dd><p>protein&#x02013;protein BLAST. blastp takes protein sequences in <a class="def" href="/books/NBK21106/def-item/app53/">FASTA</a> format, <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> Accession numbers, or <a class="def" href="/books/NBK21106/def-item/app67/">GI</a> numbers and compares them against the NCBI <a href="/blast/html/blastcgihelp.html#protein_databases" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein databases</a>.
</p></dd><dt id="app12">BLAT</dt><dd><p>A DNA/Protein sequence analysis program to quickly find sequences of 95% and
greater similarity of length 40 bases or more. It may miss more divergent or shorter
sequence alignments. BLAT on proteins finds sequences of 80% and greater
similarity of length 20 amino acids or more. BLAT is not BLAST. (See the <a href="http://genome.ucsc.edu/cgi-bin/hgBlat?command=start" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLAT web
page</a>.)</p></dd><dt id="app13">BLink</dt><dd><p>BLAST Link. BLink displays the results of <a class="def" href="/books/NBK21106/def-item/app9/">BLAST</a> searches that have been done for every protein sequence in the Entrez
Protein data domain. It can be accessed by following the BLink link displayed beside any
hit in the results of an Entrez Protein search. In contrast to Entrez's <b>Related
Sequences</b> feature, which lists the titles of similar sequences, BLink displays
the graphical output of precomputed <a class="def" href="/books/NBK21106/def-item/app11/">blastp</a>
results against the non-redundant (nr) protein database. The output includes the
positions of up to 200 BLAST hits on the query sequence, scores, and alignments. BLink
offers a variety of display options, including the distribution of hits by taxonomic
grouping, the best hit to each organism, the protein domains in the query sequence,
similar sequences that have known 3D structures, and more. Additional options allow you
to specify from which taxa you would like to exclude, increase, or decrease the BLAST
cutoff score or filter the BLAST hits to show only those from a specific source
database, such as <a class="def" href="/books/NBK21106/def-item/app155/">RefSeq</a> or <a class="def" href="/books/NBK21106/def-item/app175/">SWISS-PROT</a>. See the <a href="/sutils/blink.cgi?mode=help" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLink help
document</a> for additional information.</p></dd><dt id="app14">BLOB</dt><dd><p>Binary Large Object (or binary data object). BLOB refers to a large piece of data, such
as a bitmap. A BLOB is characterized by large field values, an unpredictable table size,
and data that are formless from the perspective of a program. It is also a keyword
designating the BLOB structure, which contains information about a block of data.</p></dd><dt id="app15">BLOSUM 62</dt><dd><p>Blocks Substitution Matrix. A substitution matrix in which scores for each position are
derived from observations of the frequencies of substitutions in blocks of local
alignments in related proteins. Each matrix is tailored to a particular evolutionary
distance. In the <a href="/Education/BLASTinfo/Scoring2.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLOSUM 62
matrix</a>, for example, the alignment from which scores were derived was
created using sequences sharing no more than 62% identity. Sequences more
identical than 62% are represented by a single sequence in the alignment to
avoid overweighting closely related family members (<a href="/pubmed?term=1438297" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=term&amp;targettype=pubmed">Henikoff and Henikoff, Proc Natl Acad Sci U S A 89:10915-10919;
1992</a>).</p></dd><dt id="app16">Boolean</dt><dd><p>This term refers to binary algebra that uses the logical operators AND, OR, XOR, and
NOT; the outcomes consist of logical values (either TRUE or FALSE). The keyword boolean
indicates that the expression or constant expression associated with the identifier
takes the value TRUE or FALSE. The logical-AND (&#x00026;&#x00026;)
operator produces the value 1 if both operands have nonzero values; otherwise, it
produces the value 0. The logical-OR (&#x005c0;&#x005c0;) operator produces the value 1 if
either of its operands has a nonzero value. The logical-NOT (!) operator
produces the value 0 if its operand is true (nonzero) and the value 1 if its operand is
FALSE (0). The exclusive OR (XOR) operator yields TRUE only if one of its operands are
TRUE and the other is FALSE. If both operands are the same (either TRUE or FALSE), the
operation yields FALSE.</p></dd><dt id="app209">build</dt><dd><p>A run of the genome assembly and annotation process of the set of products generated by
that run.</p></dd><dt id="app17">CCAP</dt><dd><p>Cancer Chromosome Aberration Project. CCAP was designed to expedite the definition and
detailed characterization of the distinct chromosomal alterations that are associated
with malignant transformation. The project is a collaboration among the <a class="def" href="/books/NBK21106/def-item/app118/">NCI</a>, the <a class="def" href="/books/NBK21106/def-item/app116/">NCBI</a>, and numerous research labs.
</p></dd><dt id="app18">CD</dt><dd><p>Conserved Domain. CD refers to a domain (a distinct functional and/or structural unit
of a protein) that has been conserved during evolution. During evolution, changes at
specific positions of an amino acid sequence in the protein have occurred in a way that
preserve the physico-chemical properties of the original residues, and hence the
structural and/or functional properties of that region of the protein.</p></dd><dt id="app19">CDART</dt><dd><p>Conserved Domain Architecture Retrieval Tool. When given a protein query sequence,
CDART displays the functional domains that make up the protein and lists proteins with
similar domain architectures. The functional domains for a sequence are found by
comparing the protein sequence to a database of conserved domain alignments, <a class="def" href="/books/NBK21106/def-item/app20/">CDD</a> using <a class="def" href="/books/NBK21106/def-item/app159/">RPS-BLAST</a>.</p></dd><dt id="app20">CDD</dt><dd><p>Conserved Domain Database. This database is a collection of sequence alignments and
profiles representing protein domains conserved during molecular evolution.</p></dd><dt id="app21">cDNA</dt><dd><p>complementary DNA. A <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a> sequence
obtained by reverse transcription of a messenger RNA (<a class="def" href="/books/NBK21106/def-item/app114/">mRNA</a>) sequence.</p></dd><dt id="app22">CDS</dt><dd><p>coding region, coding sequence. CDS refers to the portion of a genomic DNA sequence
that is translated, from the start codon to the stop codon, inclusively, if complete. A
partial CDS lacks part of the complete CDS (it may lack either or both the start and
stop codons). Successful translation of a CDS results in the synthesis of a
protein.</p></dd><dt id="app216">CEPH</dt><dd><p>
<a href="http://www.cephb.fr/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Centre d'Etude du Polymorphism
Humain</a>
</p></dd><dt id="app23">CGAP</dt><dd><p>Cancer Genome Anatomy Project. CGAP is an interdisciplinary program to identify the
human genes expressed in different cancerous states, based on cDNA (<a class="def" href="/books/NBK21106/def-item/app46/">EST</a>) libraries, and to determine the molecular
profiles of normal, precancerous, and malignant cells. The project is a collaboration
among the <a class="def" href="/books/NBK21106/def-item/app118/">NCI</a>, the <a class="def" href="/books/NBK21106/def-item/app116/">NCBI</a>, and numerous research labs.</p></dd><dt id="app24">CGH</dt><dd><p>Comparative Genomic Hybidization. CGH is a fluorescent molecular cytogenetic technique
that identifies chromosomal aberrations and maps these changes to metaphase chromosomes.
CGH can be used to generate a map of DNA copy number changes in tumor genomes. CGH is
based on quantitative two-color fluorescence <i>in situ</i> hybridization
(<a class="def" href="/books/NBK21106/def-item/app56/">FISH</a>). DNA extracted from tumor cells
is labeled in one color (e.g., green) and mixed in a 1:1 ratio with DNA from normal
cells, which is labeled in a different color (e.g., red). The mixture is then applied to
normal metaphase chromosomes. Portions of the genome that are equally represented in
normal and tumor cells will appear orange, regions that are deleted in the tumor sample
relative to the normal sample will appear red, and regions that are present in higher
copy number in the tumor sample (because of amplification) will appear green. Special
image analysis tools are necessary to quantitate the ratio of green-to-red fluorescence
to determine whether a given region is more highly represented in the normal or in the
tumor sample.</p></dd><dt id="app25">CGI</dt><dd><p>Common Gateway Interface. A mechanism that allows a Web server to run a program or
script on the server and send the output to a Web browser.</p></dd><dt id="app26">cluster</dt><dd><p>A group that is created based on certain criteria. For example, a gene cluster may
include a set of genes whose similar expression profiles are found to be similar
according to certain criteria, or a cluster may refer to a group of clones that are
related to each other by homology.
</p></dd><dt id="app27">Cn3D</dt><dd><p>&#x0201c;See in 3-D&#x0201d; is a structure and sequence alignment viewer for NCBI
databases. It allows viewing of 3-D structures and sequence&#x02013;structure or
structure&#x02013;structure alignments. Cn3D can work as a helper application to the
browser or as a client&#x02013;server application that retrieves structure records
from the Molecular Modeling Database (MMDB, see below) directly from the internet. The
<a href="/Structure/CN3D/cn3d.shtml" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Cn3D homepage</a> provides access to information on how to install the program,
a tutorial to get started, and a comprehensive help document.</p></dd><dt id="app206">codon</dt><dd><p>Sequence of three nucleotides in DNA or mRNA that specifies a particular amino acid
during protein synthesis; also called a triplet. Of the 64 possible codons, 3 are stop
codons, which do not specify amino acids.</p></dd><dt id="app28">COGs</dt><dd><p>Clusters of Orthologous Groups (of proteins) were delineated by comparing protein
sequences from completely sequenced genomes. Each COG consists of individual proteins or
groups of paralogs from at least three lineages and thus corresponds to an ancient
conserved domain.</p></dd><dt id="app29">consensus sequence</dt><dd><p>The nucleotides or amino acids found most commonly at each position in the sequences of
homologous DNAs, RNAs, or proteins.</p></dd><dt id="app30">contig</dt><dd><p>A contiguous segment of the genome made by joining overlapping clones or sequences. A
clone contig consists of a group of cloned (copied) pieces of DNA representing
overlapping regions of a particular chromosome. A sequence contig is an extended
sequence created by merging primary sequences that overlap. A contig map shows the
regions of a chromosome where contiguous DNA segments overlap. Contig maps provide the
ability to study a complete and often large segment of the genome by examining a series
of overlapping clones, which then provide an unbroken succession of information about
that region.</p></dd><dt id="app217">Coriell</dt><dd><p>
<a href="http://locus.umdnj.edu/nia/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Coriell Institute of Aging Cell
Repository</a>
</p></dd><dt id="app31">CPU</dt><dd><p>Central Processing Unit. The CPU is the computational and control unit of a computer,
the device that interprets and executes instructions.</p></dd><dt id="app32">CSS</dt><dd><p>Cascading Style Sheets. CSS specify the formatting details that control the
presentation and layout of <a class="def" href="/books/NBK21106/def-item/app75/">HTML</a> and
<a class="def" href="/books/NBK21106/def-item/app198/">XML</a> elements. CSS can be used for
describing the formatting behavior and text decoration of simply structured XML
documents but cannot display structure that varies from the structure of the source
data.</p></dd><dt id="app33">Cubby</dt><dd><p>A tool of <a class="def" href="/books/NBK21106/def-item/app45/">Entrez</a>, the <a href="/entrez/login.fcgi?call=so.SignOn..Login" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Cubby</a> stores search strategies that may be updated at any time, stores LinkOut
preferences to specify which LinkOut providers have to be displayed in PubMed, and
changes the default document delivery service.</p></dd><dt id="app34">DCMS</dt><dd><p>Data Creation and Maintenance System</p></dd><dt id="app35">DDBJ</dt><dd><p>
<a href="http://www.ddbj.nig.ac.jp/Welcome-e.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">DNA Data Bank of
Japan</a>
</p></dd><dt id="app36">definition line</dt><dd><p>A sequence in FASTA format begins with a single-line description, followed by lines of
sequence data. The definition line or description line is distinguished from the
sequence data by a &#x0201c;greater than&#x0201d; (&#x0003e;) symbol in
the first column (see <a href="/BLAST/fasta.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">example</a>); also
DEFLINE, as in a flatfile. </p></dd><dt id="app37">DNA</dt><dd><p>Deoxyribonucleic acid is the chemical inside the nucleus of a cell that carries the
genetic instructions for making living organisms. DNA is composed of two anti-parallel
strands, each a linear polymer of nucleotides. Each nucleotide has a phosphate group
linked by a phosphoester bond to a pentose (a five-carbon sugar molecule, deoxyribose),
that in turn is linked to one of four organic bases, adenine, guanine, cytosine, or
thymine, abbreviated A, G, C, and T, respectively. The bases are of two types: purines,
which have two rings and are slightly larger (A and G); and pyrimidines, which have only
one ring (C and T). Each nucleotide is joined to the next nucleotide in the chain by a
covalent phosphodiester bond between the 5&#x02032; carbon of one deoxyribose group
and the 3&#x02032; carbon of the next. DNA is a helical molecule with the
sugar&#x02013;phosphate backbone on the outside and the nucleotides extending toward
the central axis. There is specific base-pairing between the bases on opposite strands
in such a way that A always pairs with T and G always pairs with C.</p></dd><dt id="app38">domain</dt><dd><p>A &#x0201c;domain&#x0201d; refers to a discrete portion of a protein assumed to
fold independently of the rest of the protein and which possesses its own
function.</p></dd><dt id="app39">draft sequence</dt><dd><p>Draft sequence refers to DNA sequence that is not yet finished but is generally of high
quality (i.e., an accuracy of greater than 90%). Draft sequence data are mostly
in the form of 10,000 base pair-sized fragments, the approximate chromosomal locations
of which are known. The following keywords are associated with draft sequence: phase 0,
light-pass coverage of a clone, generally only 1&#x000d7; coverage; phase
1, 4&#x02013;10&#x000d7; coverage of a <a class="def" href="/books/NBK21106/def-item/app6/">BAC</a> clone
(order and orientation of the fragments are unknown); and phase 2, 4&#x02013;10&#x000d7;
coverage of a BAC clone (order and orientation of the fragments are known). Phase 3
refers to the completely <a class="def" href="/books/NBK21106/def-item/app55/">finished
sequence</a>.</p></dd><dt id="app40">DTD</dt><dd><p>Document Type Definition. The DTD is an optional part of the prolog of an XML document
that defines the rules of the document. It sets constraints for an XML document by
specifying which elements are present in the document and the relationships between
elements, e.g., which tags can contain other tags, the number and sequence of the tags,
and attributes of the tags. The DTD helps to validate the data when the receiving
application does not have a built-in description of the incoming data.</p></dd><dt id="app41">DUST</dt><dd><p>A program for filtering low-complexity regions from nucleic acid sequences.</p></dd><dt id="app42">E-value</dt><dd><p>Expect value. The E-value is a parameter that describes the number of hits one can
&#x0201c;expect&#x0201d; to see by chance when searching a database of a particular
size. It decreases exponentially with the score (S) that is assigned to a match between
two sequences. Essentially, the E-value describes the random background noise that
exists for matches between sequences. For example, an E-value of 1 assigned to a hit can
be interpreted as meaning that in a database of the current size, one might expect to
see one match with a similar score simply by chance. This means that the lower the
E-value, or the closer it is to &#x0201c;0&#x0201d;, the higher is the
&#x0201c;significance&#x0201d; of the match. However, it is important to note that
searches with short sequences can be virtually identical and have relatively high
E-value. This is because the calculation of the E-value also takes into account the
length of the query sequence. This is because shorter sequences have a high probability
of occurring in the database purely by chance. For more information, see the following
<a href="http://www.ncbi.nih.gov/BLAST/tutorial/Altschul-1.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">tutorial</a>.</p></dd><dt id="app43">EC number</dt><dd><p>A number assigned to a type of enzyme according to a scheme of standardized enzyme
nomenclature developed by the Enzyme Commission of the Nomenclature Committee of the
International Union of Biochemistry and Molecular Biology (IUBMB). EC numbers may be
found in <a href="http://us.expasy.org/enzyme/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">ENZYME</a>, the
Enzyme nomenclature database, maintained at the <a class="def" href="/books/NBK21106/def-item/app52/">ExPASy</a> molecular biology server.
</p></dd><dt id="app44">EMBL</dt><dd><p>
<a href="http://www1.embl-heidelberg.de/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">European Molecular Biology
Laboratory</a>
</p></dd><dt id="app45">Entrez</dt><dd><p>Entrez is a retrieval system for searching several linked databases. It provides access
to the following NCBI databases: <a class="def" href="/books/NBK21106/def-item/app150/">PubMed</a>,
<a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a>, Protein, Structure, Genome,
PopSet, <a class="def" href="/books/NBK21106/def-item/app124/">OMIM</a>, Taxonomy, Books, ProbeSet,
3D Domains, <a class="def" href="/books/NBK21106/def-item/app188/">UniSTS</a>, SNP, and <a class="def" href="/books/NBK21106/def-item/app20/">CDD</a>. (See the <a href="/books/n/handbook/ch15/?report=reader">Entrez chapter</a> or the <a href="/Entrez/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez web page</a>.)</p></dd><dt id="app97">Entrez Gene </dt><dd><p>(formerly known as LocusLink). Entrez Gene provides tracked, unique identifiers for
genes (<a class="def" href="/books/NBK21106/def-item/app96/">GeneID</a>s) and reports information
associated with those identifiers for unrestricted public use. See the Entrez Gene
<a href="/books/n/handbook/ch19/?report=reader">chapter</a> or <a href="http://www.ncbi.nih.gov/entrez/query.fcgi?db=gene" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">web page</a>.)</p></dd><dt id="app46">EST</dt><dd><p>Expressed Sequence Tag. ESTs are short (usually approximately 300&#x02013;500 base
pairs), single-pass sequence reads from <a class="def" href="/books/NBK21106/def-item/app21/">cDNA</a>. Typically, they are produced in large batches. They represent the
genes expressed in a given tissue and/or at a given developmental stage. They are tags
(some coding, others not) of expression for a given cDNA library. They are useful in
identifying full-length genes and in mapping.
</p></dd><dt id="app47">e-PCR</dt><dd><p>Electronic <a class="def" href="/books/NBK21106/def-item/app129/">PCR</a> is used to compare a
query sequence to mapped sequence-tagged sites (<a class="def" href="/books/NBK21106/def-item/app173/">STS</a>s) to find a possible map location for the query sequence. e-PCR finds
STSs in DNA sequences by searching for subsequences that closely match the PCR primers
present in mapped markers. The subsequences must have the correct order, orientation,
and spacing that they could plausibly prime the amplification of a PCR product of the
correct molecular weight.</p></dd><dt id="app48">epub citation</dt><dd><p>&#x0201c;Ahead-of-print&#x0201d; citation. <a class="def" href="/books/NBK21106/def-item/app150/">PubMed</a> now accepts citations from publishers for articles that have been
published electronically ahead of the printed issue. PubMed displays the category
&#x0201c;[epub ahead of print]&#x0201d; in the part of the citation where the volume
and pagination would ordinarily display. For example: Proc Natl Acad Sci U S A. 2000 May
2 [epub ahead of print].</p></dd><dt id="app49">ExoFish</dt><dd><p>Exon Finding by Sequence Homology. Exofish is a tool based on homology searches for the
rapid and reliable identification of human genes. It relies on the sequence of another
vertebrate, the pufferfish <i>Tetraodon nigroviridis</i> (similar to Fugu), to
detect conserved sequences with a very low background. The genome of <i>T.
nigroviridis</i> is eight times more compact than the human genome and has been
used in the comparative identification of human genes from the rough draft of the human
genome (<a href="/pubmed?term=10835645" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=term&amp;targettype=pubmed">Roest Crollius et al., Nat Genet
25:235-238; 2000</a>).</p></dd><dt id="app50">exon</dt><dd><p>Refers to the portion of a gene that encodes for a part of that gene's mRNA. A gene may
comprise many exons, some of which may include only protein-coding sequence; however, an
exon may also include 5' or 3' untranslated sequence. Each exon codes for a specific
portion of the complete protein. In some species (including humans), a gene's exons are
separated by long regions of DNA (called <a class="def" href="/books/NBK21106/def-item/app86/">intron</a>s or sometimes &#x0201c;junk DNA&#x0201d;) that often have no
apparent function but have been shown to encode small untranslated RNAs or regulatory
information. (See also <a class="def" href="/books/NBK21106/def-item/app170/">splice
sites</a>.)</p></dd><dt id="app51">exon-trapped</dt><dd><p>Exon trapping is a technique for cloning exon sequences from genomic DNA by selecting
for functional splice sites, relying on the cellular splicing machinery. The genomic DNA
containing the putative exon(s) is cloned into an exon-trap vector, which has a
promoter, polyadenylation signals, and splice sites, and then transfected into a cell
line. If there are functional splice sites in the genomic DNA fragment, the segments of
DNA between the splice sites will be removed. Total RNA is isolated and
reverse-transcribed. After <a class="def" href="/books/NBK21106/def-item/app21/">cDNA</a> synthesis
and <a class="def" href="/books/NBK21106/def-item/app129/">PCR</a> amplification, the exon of
interest is cloned.</p></dd><dt id="app52">ExPASy</dt><dd><p>
<a href="http://www.expasy.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Expert Protein Analysis System</a>
is a proteomics server of the Swiss Bioinformatics Institute (SIB).</p></dd><dt id="app53">FASTA</dt><dd><p>The first widely used algorithm for similarity searching of protein and DNA sequence
databases. The program looks for optimal local alignments by scanning the sequence for
small matches called &#x0201c;words&#x0201d;. Initially, the scores of segments in
which there are multiple word hits are calculated (&#x0201c;init1&#x0201d;). Later,
the scores of several segments may be summed to generate an &#x0201c;initn&#x0201d;
score. An optimized alignment that includes gaps is shown in the output as
&#x0201c;opt&#x0201d;. The sensitivity and speed of the search are inversely related
and controlled by the &#x0201c;k-tup&#x0201d; variable, which specifies the size of
a &#x0201c;word&#x0201d; (<a href="/pubmed?term=3162770" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=term&amp;targettype=pubmed">Pearson and
Lipman</a>). Also refers to a <a href="/BLAST/fasta.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">format</a> for a nucleic
acid or protein
sequence.
</p></dd><dt id="app54">fingerprint</dt><dd><p>The pattern of bands on a gel produced by a clone when restricted by a particular
enzyme, such as <i>Hin</i>dIII.</p></dd><dt id="app55">finished sequence</dt><dd><p>High-quality, low-error DNA sequence that is free of gaps. To qualify as a finished sequence, only a single error out of every 10,000 bases (i.e., an accuracy of
99.999%) is allowed.</p></dd><dt id="app56">FISH</dt><dd><p>Fluorescence <i>in situ</i> hybridization. In this technique, fluorescent
molecules are used to label a <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a> probe,
which can then hybridize to a specific DNA sequence in a chromosome spread so that the
site becomes visible through a microscope. FISH has been used to highlight the locations
of genes, subchromosome regions, entire chromosomes, or specific DNA sequences. It has
been used for mapping and the detection of genomic rearrangements, as well as studies on
DNA replication.
</p></dd><dt id="app57">flatfile or flat file</dt><dd><p>A flat file is a data file that contains records (each corresponding to a row in a
table); however, these records have no structured relationships. To interpret these
files, the format properties of the file should be known. For example, a database
management system may allow the user to export data to a comma-delimited file. Such a
file is called a flat file because it has no inherent information about the data, and
interpretation requires additional information. Files in a database management system
have more complex storage structures.
</p></dd><dt id="app210">freeze</dt><dd><p>To copy changing data so as to preserve the dataset as it existed at a particular point
in time. Also used to refer to the resulting set of frozen data.</p></dd><dt id="app58">FTP</dt><dd><p>File Transfer Protocol. A method of retrieving files over a network directly to the
user's computer or to his/her home directory using a set of protocols that govern how
the data are to be transported.</p></dd><dt id="app59">gap</dt><dd><p>A gap is a space introduced into an alignment to compensate for insertions and
deletions in one sequence relative to another. To prevent the accumulation of too many
gaps in an alignment, introduction of a gap causes the deduction of a fixed amount (the
gap score) from the alignment score. Extension of the gap to encompass additional
nucleotides or amino acid is also penalized in the scoring of an alignment. (See the
<a href="/Education/BLASTinfo/Alignment_Scores2.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">figure</a> for more information.)</p></dd><dt id="app60">GB</dt><dd><p>gigabytes</p></dd><dt id="app61">GBFF</dt><dd><p>
<a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> Flat File. Refers to a format
.gbff.
</p></dd><dt id="app62">GenBank</dt><dd><p>GenBank is a database of nucleotide sequences from more than 100,000 organisms. Records
that are annotated with coding region features also include amino acid translations.
GenBank belongs to an international collaboration of sequence databases that also
includes <a class="def" href="/books/NBK21106/def-item/app44/">EMBL</a> and <a class="def" href="/books/NBK21106/def-item/app35/">DDBJ</a>. [See the <a href="/books/n/handbook/ch1/?report=reader">GenBank</a> chapter (Chapter 1) or the <a href="/Genbank/submit.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GenBank web
page</a>.]</p></dd><dt id="app96">GeneID</dt><dd><p>GeneID is a unique identifier that is assigned to a gene record in <a class="def" href="/books/NBK21106/def-item/app97/">Entrez Gene</a>. It is an integer and is species
specific. In other words, the integer assigned to dystrophin in human is different from
that in any other species. For genomes that had been represented in LocusLink, the
GeneID is the same as the LocusID. The GeneID is reported in RefSeq records as a
'db_xref' (e.g. /db_xref="GeneID:856646", in GenBank format). </p></dd><dt id="app63">genetic code</dt><dd><p>The instructions in a gene that tell the cell how to make a specific protein. A, T, G,
and C are the &#x0201c;letters&#x0201d; of the <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a> code; they stand for the chemicals adenine, thymine, guanine, and
cytosine, respectively, that make up the nucleotide bases of DNA. Each gene's code
combines the four chemicals in various ways to spell out three-letter
&#x0201c;words&#x0201d; that specify which amino acid is needed at every position
for making a protein.
</p></dd><dt id="app64">GenomeScan</dt><dd><p>A gene identification algorithm that is used to identify exon&#x02013;intron
structures in genomic DNA sequence.
</p></dd><dt id="app65">genotype</dt><dd><p>The genetic identity of an individual that does not show as outward characteristics.
The genotype refers to the pair of alleles for a given region of the genome that an
individual carries.</p></dd><dt id="app66">GEO</dt><dd><p>Gene Expression Omnibus. GEO is a gene expression data repository and online resource
for the retrieval of gene expression data from any organism or artificial source. Many
types of gene expression data from platform types, such as spotted microarray,
high-density oligonucleotide array, hybridization filter, and serial analysis of gene
expression (<a class="def" href="/books/NBK21106/def-item/app160/">SAGE</a>) data, are accepted,
accessioned, and archived as a public dataset. [See the <a href="/books/n/handbook/ch6/?report=reader">GEO chapter</a> (Chpater 6) or the <a href="/geo/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">GEO web page</a>.]</p></dd><dt id="app67">GI</dt><dd><p>The GenInfo Identifier is a sequence identification number for a nucleotide sequence.
If a nucleotide sequence changes in any way, a new GI number will be assigned. A
separate GI number is also assigned to each protein translation within a nucleotide
sequence record, and a new GI is assigned if the protein translation changes in any way.
GI sequence identifiers run parallel to the new accession.version system of sequence
identifiers (see the description of <a href="/Sitemap/samplerecord.html#VersionB" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Version</a>).</p></dd><dt id="app68">GSS</dt><dd><p>Genome Survey Sequences are analogous to <a class="def" href="/books/NBK21106/def-item/app46/">EST</a>s except that the sequences are genomic in origin, rather than cDNA
(mRNA). The GSS division of <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a>
contains (but is not limited to) the following types of data: random
&#x0201c;single-pass read&#x0201d; genome survey sequences, cosmid/<a class="def" href="/books/NBK21106/def-item/app6/">BAC</a>/<a class="def" href="/books/NBK21106/def-item/app201/">YAC</a> end sequences, <a class="def" href="/books/NBK21106/def-item/app51/">exon-trapped</a> genomic sequences, and <a class="def" href="/books/NBK21106/def-item/app2/"><i>Alu</i></a>-PCR sequences.</p></dd><dt id="app69">heterozygosity</dt><dd><p>The probability that a diploid individual will have two different alleles at a
particular genome locus. These individuals are defined as heterozygous, whereas
individuals who have two identical alleles at the locus are defined as homozygous. The
probability can be estimated by sampling a representative number of individuals from the
population and dividing the number of heterozygotes by the total number sampled.</p></dd><dt id="app70">HIV</dt><dd><p>Human Immunodeficiency Virus. HIV-1 is a retrovirus that is recognized as the causative
agent of AIDS (Acquired Immunodeficiency Syndrome).
</p></dd><dt id="app71">HNPCC</dt><dd><p>Hereditary nonpolyposis colon cancer</p></dd><dt id="app72">homogeneously staining region</dt><dd><p>A region of the chromosome identified cytologically by DNA staining or the <a class="def" href="/books/NBK21106/def-item/app56/">FISH</a> technique because of the presence of
multiple copies of a subchromosomal region resulting from amplification.
</p></dd><dt id="app73">homologous</dt><dd><p>The term refers to similarity attributable to descent from a common ancestor.
Homologous chromosomes are members of a pair of essentially identical chromosomes, each
derived from one parent. They have the same or allelic genes with genetic loci arranged
in the same order. Homologous chromosomes synapse during
meiosis.
</p></dd><dt id="app74">HTGS</dt><dd><p>High-Throughput Genomic Sequences. The source of HTGS are large-scale genome sequencing
centers; <a class="def" href="/books/NBK21106/def-item/app186/">unfinished sequence</a>s are in
phases 0, 1, and 2, and <a class="def" href="/books/NBK21106/def-item/app55/">finished sequence</a>s
are in phase 3.</p></dd><dt id="app211">HTGS_CANCELLED</dt><dd><p>A keyword added to GenBank entries by sequencing centers to indicate that work has
stopped on a clone and that the existing sequence will not be finished. Sequencing
centers may stop work because the clone is redundant or for various other reasons.</p></dd><dt id="app212">HTGS_PHASE0, HTGS_PHASE1, HTGS_PHASE2, HTGS_PHASE3</dt><dd><p>Keywords added to GenBank entries by sequencing centers to indicate the status (phase)
of the sequence (see phase definitions described under <a class="def" href="/books/NBK21106/def-item/app39/">draft sequence</a>).</p></dd><dt id="app75">HTML</dt><dd><p>Hypertext Markup Language. HTML is derived from <a class="def" href="/books/NBK21106/def-item/app163/">SGML</a>. It is a text-based mark-up language and is used to primarily
display information using a web browser and to link pieces of information via
hyperlinks. The tags used in an HTML document provide information only on how the
content is to be displayed but do not provide information about the content they
encompass.</p></dd><dt id="app76">HUP</dt><dd><p>Hold Until Published. HUP refers to the category for data that is electronically
submitted for when it should be released to the public.
</p></dd><dt id="app77">ICBN</dt><dd><p>International Code of Botanical Nomenclature</p></dd><dt id="app78">ICD</dt><dd><p>International Classification of
Diseases
</p></dd><dt id="app79">ICD-O-3</dt><dd><p>
<a href="http://training.seer.cancer.gov/module_icdo3/icdo3_home.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">International Classification of Diseases for Oncology, 3rd edition</a>
</p></dd><dt id="app80">ICNB</dt><dd><p>International Code of Nomenclature of Bacteria</p></dd><dt id="app81">ICNCP</dt><dd><p>International Code of Nomenclature for Cultivated Plants</p></dd><dt id="app82">ICTV</dt><dd><p>
<a href="/ICTV/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">International Committee on
Taxonomy of Viruses</a>
</p></dd><dt id="app83">ICVCN</dt><dd><p>
<a href="/ICTV/rules.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">International
Code of Virus Classification and Nomenclature</a>
</p></dd><dt id="app84">ICZN</dt><dd><p>
<a href="http://www.iczn.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">International Code of Zoological
Nomenclature</a>
</p></dd><dt id="app203">ideogram</dt><dd><p>A diagrammatic representation of the karyotype of an organism.</p></dd><dt id="app85">IMAGE Consortium</dt><dd><p>Integrated Molecular Analysis of Genomes and their Expression. A consortium of academic
groups that share high-quality, arrayed cDNA libraries and place sequence, map, and
expression data of the clones in these arrays into the public domain. With the use of
this information, unique clones can be rearrayed to form a &#x0201c;master
array&#x0201d;, with the aim of ultimately having a representative cDNA from every
gene in the genome under study. To date, human, mouse, rat, zebrafish, and <i>Xenopus laevis</i> genomes have been
studied.
</p></dd><dt id="app86">intron</dt><dd><p>Refers to that portion of the DNA sequence that is present in the primary transcript
and that is removed by splicing during RNA processing and is not included in the mature,
functional <a class="def" href="/books/NBK21106/def-item/app114/">mRNA</a>, rRNA, or tRNA. Also
called an intervening sequence. (See also <a class="def" href="/books/NBK21106/def-item/app170/">splice
sites</a>.)</p></dd><dt id="app87">ISAM</dt><dd><p>Indexed Sequential-Access Method. ISAM is a database access method. It allows data
records in a database to be accessed either sequentially (in the order in which they
were entered) or randomly (using an index). In the index, each record has a unique key
that enables its rapid location. The key is the field used to reference the
record.</p></dd><dt id="app88">ISCN</dt><dd><p>International System for Human Cytogenetic Nomenclature
</p></dd><dt id="app89">ISO</dt><dd><p>
<a href="http://www.iso.ch/iso/en/ISOOnline.openerpage" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">International
Organization for Standardization</a>
</p></dd><dt id="app90">ISSN</dt><dd><p>
<a href="http://www.issn.org:8080/English/pub/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">International Standard
Serial Number</a>. The ISSN is an eight-digit number that identifies periodical
publications, including electronic serials.</p></dd><dt id="app91">karyotype</dt><dd><p>The particular chromosome complement of an individual or a related group of
individuals, as defined by both the number and morphology of the chromosomes, usually in
mitotic metaphase, and arranged by pairs according to the standard classification.</p></dd><dt id="app92">LANL</dt><dd><p>
<a href="http://www.lanl.gov/worldview/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Los Alamos National
Lab</a>
</p></dd><dt id="app93">LIMS</dt><dd><p>Laboratory Information Management Systems. LIMS comprise software that helps biological
and chemical laboratories handle data generation, information management, and data
archiving.</p></dd><dt id="app94">LinkOut</dt><dd><p>A registry service to create links from specific articles, journals, or biological data
in <a class="def" href="/books/NBK21106/def-item/app45/">Entrez</a> to resources on external web
sites. Third parties can provide a URL, resource name, brief description of their web
sites, and specification of the NCBI data from which they would like to establish links.
The specification can be written as a valid Boolean query to Entrez or as a list of
identifiers for specific articles or sequences. Entrez PubMed users can then select
which external links are visible in their searches through the NCBI Cubby service (see
above). (See the <a href="/books/n/handbook/ch17/?report=reader">LinkOut</a> chapter or <a href="/entrez/linkout/doc/linkoutoverview.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">web
page</a>.)</p></dd><dt id="app95">locus</dt><dd><p>In a genomic contect, locus refers to position on a chromosome. It may, therefore,
refer to a marker, a gene, or any other landmark that can be described.</p></dd><dt id="app98">MACAW</dt><dd><p>Multiple Alignment Construction and Analysis Workbench. MACAW is a program for
locating, analyzing, and editing blocks of localized sequence similarity among multiple
seqences and linking them into a composite multiple alignment.</p></dd><dt id="app99">Map Viewer</dt><dd><p>The Map Viewer is a software component of <a href="/entrez/query.fcgi?db=Genome" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez
Genomes</a> that provides special browsing capabilities for a subset of organisms.
It allows one to view and search an organism's complete genome, display chromosome maps,
and zoom into progressively greater levels of detail, down to the sequence data for a
region of interest. If multiple maps are available for a chromosome, it displays them
aligned to each other based on shared marker and gene names and, for the sequence maps,
based on a common sequence coordinate system. The organisms currently represented in the
Map Viewer are listed in the <a href="/PMGifs/Genomes/MapViewerHelp.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Entrez Map
Viewer help document</a>, which provides general information on how to use that
tool. The number and types of available maps vary by organism and are described in the
&#x0201c;data and search tips&#x0201d; file provided for each organism.</p></dd><dt id="app100">MB</dt><dd><p>megabytes</p></dd><dt id="app102">MEDLINE</dt><dd><p>MEDLINE is <a class="def" href="/books/NBK21106/def-item/app121/">NLM</a>'s database of indexed
journal citations and abstracts in the fields of biomedicine and healthcare. It
encompasses nearly 4,500 journals published in the United States and more than 70 other
countries. (For more information, see the <a href="http://www.nlm.nih.gov/pubs/factsheets/dif_med_pub.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Fact
Sheet</a>.)</p></dd><dt id="app103">MegaBLAST</dt><dd><p>MegaBLAST is a program for aligning sequences that differ slightly as a result of
sequencing or other similar &#x0201c;errors&#x0201d;. When larger word size is used,
it is up to 10 times faster than more common sequence-similarity programs. MegaBLAST is
also able to efficiently handle much longer DNA sequences than the <a class="def" href="/books/NBK21106/def-item/app10/">blastn</a> program of the traditional BLAST algorithm. It uses
the GREEDY algorithm for a nucleotide sequence alignment search.</p></dd><dt id="app104">MeSH</dt><dd><p>Medical Subject Headings. MeSH refers to the controlled vocabulary of <a class="def" href="/books/NBK21106/def-item/app121/">NLM</a> used for indexing articles in PubMed. MeSH
terminology provides a consistent way to retrieve information that may use different
terminology for the same concepts. (See the <a href="http://www.nlm.nih.gov/mesh/meshhome.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">MeSH homepage</a>.)</p></dd><dt id="app105">Metathesaurus</dt><dd><p>
<a href="http://ncievs.nci.nih.gov/indexMetaphrase.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Metathesaurus</a> is a National Cancer Institute browser containing different
biomedical vocabularies, including the International Classification of Diseases for
Oncology <a class="def" href="/books/NBK21106/def-item/app79/">ICD-O-3</a>.</p></dd><dt id="app106">mFASTA</dt><dd><p>Multi-FASTA format.
</p></dd><dt id="app107">MGC</dt><dd><p>Mammalian Gene Collection. <a href="http://mgc.nci.nih.gov/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">MGC</a> is a project of the <a class="def" href="/books/NBK21106/def-item/app120/">NIH</a> to
provide a complete set of full-length (open reading frame) sequences and cDNA clones of
expressed genes for human and mouse.</p></dd><dt id="app108">MGD</dt><dd><p>
<a href="http://www.informatics.jax.org/mgihome/MGD/aboutMGD.shtml" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Mouse Genome Database</a>. MGD contains information on mouse genetic markers,
molecular segments, phenotypes, comparative mapping data, experimental mapping data, and
graphical displays for genetic, physical, and cytogenetic maps.</p></dd><dt id="app109">MGI</dt><dd><p>
<a href="http://www.informatics.jax.org/mgihome/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Mouse Genome
Informatics</a>. MGI houses a database that provides integrated access to data
on the genetics, genomics, and biology of the laboratory mouse.</p></dd><dt id="app110">microsatellite</dt><dd><p>Repetitive stretches of short sequences of DNA used as genetic markers to track
inheritance in families (e.g., CC[TATATATA]CCCT). Also known as short tandem repeats
(STRs).</p></dd><dt id="app111">MIM</dt><dd><p>Mendelian Inheritance in Man. First published in 1966, <i><a href="http://www.press.jhu.edu/press/books/titles/f97/f97mcme.htm" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Mendelian
Inheritance in Man (MIM)</a></i> is a genetic knowledge base that serves clinical medicine and biomedical research,
including the Human Genome Project.</p></dd><dt id="app213">minimal tiling path</dt><dd><p>An ordered list or map that defines the minimal set of overlapping clones needed to
provide complete coverage of a chromosome or other extended segment of DNA (compare with
<a class="def" href="/books/NBK21106/def-item/app182/">tiling path</a>).</p></dd><dt id="app112">MMDB</dt><dd><p>Molecular Modeling Database. MMDB is a database of three-dimensional biomolecular
structures derived from X-ray crystallography and nuclear magnetic resonance (NMR)
spectroscopy.</p></dd><dt id="app113">MMDB-ID</dt><dd><p>Molecular Modeling Database Accession number.</p></dd><dt id="app114">mRNA</dt><dd><p>messenger RNA. mRNA describes the section of a genomic DNA sequence that is
transcribed, and can include the 5' untranslated region (5'UTR), <a class="def" href="/books/NBK21106/def-item/app22/">CDS</a>, and 3' untranslated region (3'UTR). Successful
translation of the CDS section of an mRNA results in the synthesis of a protein.</p></dd><dt id="app115">mutation</dt><dd><p>A permanent structural alteration in DNA. In most cases, DNA changes have either no
effect or cause harm, but occasionally a mutation can improve an organism's chance of
surviving, and the beneficial change is passed on to the organism's descendants.
Typically, mutations are more rare than polymorphisms in population samples because
natural selection recognizes their lower fitness and removes them from the
population.</p></dd><dt id="app116">NCBI</dt><dd><p>
<a href="http://www.ncbi.nlm.nih.gov/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">National Center for
Biotechnology Information</a>
</p></dd><dt id="app117">NCBI Toolkit</dt><dd><p>Contains supported software tools from the Information Engineering Branch (IEB) of the
NCBI. The NCBI Toolkit describes the three components of the ToolBox: data model, data
encoding, and programming libraries. Provides access to documentation for the DataModel,
C Toolkit, C++ Toolkit, NCBI C Toolkit Source Browser, XML Demo Program, XML DTDs, and
the <a href="ftp://ftp.ncbi.nih.gov/toolbox/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">FTP site</a>.</p></dd><dt id="app118">NCI</dt><dd><p>
<a href="http://www.nci.nih.gov/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">National Cancer Institute</a>
</p></dd><dt id="app119">NEXUS</dt><dd><p>NEXUS refers to a file format designed to contain data for processing by computer
programs. NEXUS files should end with .nxs or .nex for purposes of clarity (<a href="/pubmed?term=11975335" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=term&amp;targettype=pubmed">Maddison et al., Syst Biol 46:590-621;
1997</a>).</p></dd><dt id="app120">NIH</dt><dd><p>
<a href="http://www.nih.gov/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">National Institutes of Health</a>
</p></dd><dt id="app121">NLM</dt><dd><p>
<a href="http://www.nlm.nih.gov/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">National Library of Medicine</a>
</p></dd><dt id="app122">NMR</dt><dd><p>Nuclear Magnetic Resonance. NMR is a spectroscopic technique used for the determination
of protein structure.</p></dd><dt id="app123">nr-PDB</dt><dd><p>non-redundant Protein Data Bank</p></dd><dt id="app124">OMIM</dt><dd><p>Online Mendelian Inheritance in Man. OMIM is a directory of human genes and genetic
disorders, with links to literature references, sequence records, maps, and related
databases.</p></dd><dt id="app125">ortholog</dt><dd><p>Orthology describes genes in different species that derive from a single ancestral gene
in the last common ancestor of the respective species.</p></dd><dt id="app126">orthology</dt><dd><p>Orthology describes genes in different species that derive from a common ancestor,
i.e., they are direct evolutionary counterparts.</p></dd><dt id="app127">paralog</dt><dd><p>A paralog is one of a set of homologous genes that have diverged from each other as a
consequence of gene duplication. For example, the mouse &#x003b1;-<i>globin</i> and &#x003b2;-<i>globin</i> genes are paralogs. The relationship
between mouse &#x003b1;-<i>globin</i> and chick &#x003b2;-<i>globin</i> is also considered paralogous.</p></dd><dt id="app128">paralogy</dt><dd><p>Paralogy describes the relationship of homologous genes that arose by gene
duplication.</p></dd><dt id="app129">PCR</dt><dd><p>Polymerase Chain Reaction. A technique for amplifying a specific DNA segment in a
complex mixture. Also present in the DNA mixture are short oligonucleotide primers to
the DNA segment of interest and reagents for DNA synthesis. PCR relies on the ability of
DNA to separate into its two complementary strands at high temperature (a process called
denaturation) and for the two strands to anneal at an optimal lower temperature
(annealing). The annealing phase is followed by a DNA synthesis step at an optimal
temperature for a heat-stable DNA polymerase. After multiple rounds of denaturation,
annealing, and DNA synthesis, the DNA sequence specified by the oligonucleotide primers
is amplified.</p></dd><dt id="app130">PDB</dt><dd><p>
<a href="http://www.rcsb.org/pdb/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Data Bank</a>. The PDB
is a database for 3D macromolecular structure data.</p></dd><dt id="app131">Pfam</dt><dd><p>
<a href="http://pfam.wustl.edu/index.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Pfam</a> is a database
housing a large collection of multiple sequence alignments and hidden Markov models
covering many common protein domains.</p></dd><dt id="app132">phenotype</dt><dd><p>The observable traits or characteristics of an organism, e.g., hair color, weight, or
the presence or absence of a disease. Phenotypic traits are not necessarily
genetic.</p></dd><dt id="app133">PHRAP</dt><dd><p>A computer program that assembles raw sequence into sequence contigs (see above) and
assigns to each position in the sequence an associated &#x0201c;quality
score&#x0201d;, on the basis of the <a class="def" href="/books/NBK21106/def-item/app134/">PHRED</a> scores of the raw sequence reads. A PHRAP quality score of <i>X</i> corresponds to an error probability of approximately 10<sup>-<i>X</i>/10</sup>
. Thus, a PHRAP quality score of 30 corresponds to 99.9% accuracy
for a base in the assembled sequence.
</p></dd><dt id="app134">PHRED</dt><dd><p>A computer program that analyses raw sequence to produce a &#x0201c;base
call&#x0201d; with an associated &#x0201c;quality score&#x0201d; for each position
in the sequence. A PHRED quality score of <i>X</i> corresponds to an error
probability of approximately 10<sup>-<i>X</i>/10</sup>
. Thus, a PHRED quality score of 30 corresponds to
99.9% accuracy for the base call in the raw
read.
</p></dd><dt id="app135">phyletic pattern</dt><dd><p>Pattern of presence&#x02013;absence of a cluster of orthologs (COG) in different
species.</p></dd><dt id="app136">PHYLIP</dt><dd><p>
<a href="http://evolution.genetics.washington.edu/phylip.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">PHYLogeny Inference Package</a>. A package of programs for various computer
platforms to infer phylogenies or evolutionary trees, freely available from the
Web.</p></dd><dt id="app137">PIR</dt><dd><p>
<a href="http://pir.georgetown.edu/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Information
Resource</a>
</p></dd><dt id="app138">PMC</dt><dd><p>
<a href="http://www.pubmedcentral.nih.gov/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">PubMed Central</a>.
NLM's digital archive of life sciences journal literature.</p></dd><dt id="app139">PMID</dt><dd><p>PubMed ID number</p></dd><dt id="app140">PNG</dt><dd><p>Portable Network Graphics. An extensible file format for the lossless, well-compressed
storage of raster images (images that are composed of horizontal lines of pixels, such
as those created by a computer screen). Compression of image, media, and application
files is necessary to reduce the transmission time across the web. The technique of
lossless compression reduces the size of the file without sacrificing any original data,
and the image after expansion is exactly as it was before compression. PNG overcomes the
patent issues of GIF (Graphic Interchange Format) and can replace many common uses of
TIFF (Tagged Image File Format). Several features such as indexed color, grayscale, and
truecolor are supported, as well as an optional alpha-channel. PNG is designed to work
well in online viewing applications and is supported as an image standard by the
<a class="def" href="/books/NBK21106/def-item/app197/">WWW</a>.</p></dd><dt id="app141">poly A</dt><dd><p>A string of adenylic acid residues that are added to the 3&#x02032; end of the
primary <a class="def" href="/books/NBK21106/def-item/app114/">mRNA</a> transcript. Poly(A)
polymerase is the enzyme that adds the poly A tail, which is between 100 and 250 bases
long.</p></dd><dt id="app142">polymorphism</dt><dd><p>A common variation in the sequence of <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a>
among individuals. Genetic variations occurring in more than 1% of the
population would be considered useful polymorphisms for genetic linkage analysis.</p></dd><dt id="app204">polypeptide</dt><dd><p>Linear polymer of amino acids connected by peptide bonds. Proteins are large
polypeptides, and the two terms are commonly used interchangeably.</p></dd><dt id="app143">PRF</dt><dd><p>
<a href="http://www.prf.or.jp/en/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Research Foundation</a>
</p></dd><dt id="app144">private polymorphism</dt><dd><p>Variations that are only common in specific populations. Usually such populations are
reproductively isolated from other, larger groups. These variations may be completely
absent in other groups.</p></dd><dt id="app145">ProtEST</dt><dd><p>A database of protein sequences from eight organisms: human (<i>Homo
sapiens</i>), mouse (<i>Mus musculus</i>), rat (<i>Rattus
norvegicus</i>), fruitfly (<i>Drosophila melanogaster</i>), worm (<i>Caenorhabditis elegans</i>), yeast (<i>Saccharomyces cerevisiae</i>),
plant (<i>Arabidopsis thaliana</i>), and bacteria (<i>Escherichia
coli</i>). (See the <a href="/UniGene/ProtEST/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">ProtEST web
page</a>.)</p></dd><dt id="app146">PROW</dt><dd><p>
<a href="/PROW/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Protein Reviews On the
Web</a>. An online resource that features PROW Guides&#x02014;authoritative,
short, structured reviews on proteins and protein families. The Guides provide
approximately 20 standardized categories of information (abstract, biochemical function,
ligands, references, etc.) for each protein.</p></dd><dt id="app147">pseudogene</dt><dd><p>A sequence of DNA that is very similar to a normal gene but that has been altered
slightly so that it is not expressed. Such genes were probably once functional but, over
time, acquired one or more mutations that rendered them incapable of producing a protein
product.</p></dd><dt id="app148">PSI-BLAST</dt><dd><p>Position-Specific Iterated BLAST. PSI-BLAST (<a href="/pubmed?term=9254694" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=term&amp;targettype=pubmed">Altschul et al., J Mol Biol 215:403-410; 1990</a>) is used for iterative
protein&#x02013;sequence similarity searches using a position-specific score matrix
(<a class="def" href="/books/NBK21106/def-item/app149/">PSSM</a>). It is a program for
searching protein databases using protein queries to find other members of the same
protein family. All statistically significant alignments found by <a class="def" href="/books/NBK21106/def-item/app9/">BLAST</a> are combined into a multiple alignment, from which a
PSSM is constructed. This matrix is used to search the database for additional
significant alignments, and the process may be iterated until no new alignments are
found.</p></dd><dt id="app149">PSSM</dt><dd><p>Position-Specific Score Matrix. The PSSM gives the log-odds score for finding a
particular matching amino acid in a target sequence.</p></dd><dt id="app150">PubMed</dt><dd><p>A retrieval system containing citations, abstracts, and indexing terms for journal
articles in the biomedical sciences. It includes literature citations supplied directly
to NCBI by publishers as well as <a class="def" href="/books/NBK21106/def-item/app190/">URL</a>s to
full text articles on the publishers' web sites. PubMed contains the complete contents
of the <a class="def" href="/books/NBK21106/def-item/app102/">MEDLINE</a> and PREMEDLINE databases.
It also contains some articles and journals considered out of scope for MEDLINE, based
on either content or on a period of time when the journal was not indexed and,
therefore, is a superset of
MEDLINE.
</p></dd><dt id="app151">PXML</dt><dd><p>PubMed Central XML file</p></dd><dt id="app152">QBLAST</dt><dd><p>A queuing system to BLAST that allows users to retrieve their results at their
convenience and format their results multiple times with different formatting
options.</p></dd><dt id="app153">QTL</dt><dd><p>Quantitative Trait Locus. A QTL is a hypothesis that a certain region of the chromosome
contains genes that contribute significantly to the expression of a complex trait. QTLs
are generally identified by comparing the linkage of polymorphic molecular markers and
phenotypic trait measurements. The density of the linkage map is important in the
accurate and precise location of QTLs; the higher the map density, the more precise the
location of the putative QTL, although there is increased likelihood that false
positives will be detected. Once QTLs have been mapped to a relatively small chromosomal
region, other molecular methods can be used to isolate specific genes.</p></dd><dt id="app154">RCSB</dt><dd><p>
<a href="http://www.rcsb.org/index.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Research Collaboratory for
Structural Bioinformatics</a>. RCSB is a nonprofit consortium that works toward
the elucidation of biological, macromolecular, 3-D structures.</p></dd><dt id="app215">Reciprocal best hits</dt><dd><p>Reciprocal best hits are proteins from different organisms that are each other's top
BLAST hit, when the proteomes from those organisms are compared to each other. For
example, proteins A&#x02013;Z in organism 1 are compared against proteins
AA&#x02013;ZZ in organism 2. If protein A has a best hit to protein RR, and RR's best
hit, when it is compared to all the proteins in organism 1, also turns out to protein A,
then A and RR are reciprocal best hits. However, if RR's best hit is to B rather than to
A, then A and RR are not reciprocal best hits.</p></dd><dt id="app155">RefSeq</dt><dd><p>RefSeq is the NCBI database of reference sequences; a curated, non-redundant set
including genomic DNA contigs, mRNAs and proteins for known genes, and entire
chromosomes.</p></dd><dt id="app218">RepeatMasker</dt><dd><p>
<a href="http://ftp.genome.washington.edu/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Program</a> that
screens DNA sequences for interspersed repeats and low-complexity DNA sequences.</p></dd><dt id="app156">RFLP</dt><dd><p>Restriction Fragment Length Polymorphism. Genetic variations at the site where a
restriction enzyme cuts a piece of DNA. Such variations affect the size of the resulting
fragments. These sequences can be used as markers on physical maps and linkage maps.
RFLP is also pronounced &#x0201c;rif lip&#x0201d;.</p></dd><dt id="app157">RH map</dt><dd><p>Radiation Hybrid map. A genome map in which <a class="def" href="/books/NBK21106/def-item/app173/">STS</a>s are positioned relative to one another on the basis of the frequency
with which they are separated by radiation-induced breaks. The frequency is assayed by
analyzing a panel of human&#x02013;hamster hybrid cell lines. These hybrids are
produced by irradiating human cells, which damages the cells and fragments the DNA. The
dying human cells are fused with thymidine kinase negative (TK&#x02212;) live hamster
cells. The fused cells are grown under conditions that select against hamster cells and
favor the growth of hybrid cells that have taken up the human <i>TK</i> gene. In
the RH maps, the unit of distance is centirays (cR), denoting a 1% chance of a
break occurring between two loci.</p></dd><dt id="app158">RNA</dt><dd><p>Ribonucleic Acid. A single-stranded nucleic acid, similar to <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a>, but having a ribose sugar, instead of deoxyribose, and
uracil instead of thymine as one of its bases.</p></dd><dt id="app159">RPS-BLAST</dt><dd><p>Reverse Position-Specific BLAST. A program used to identify conserved domains in a
protein query sequence. It does this by comparing a query protein sequence to
position-specific score matrices (<a class="def" href="/books/NBK21106/def-item/app149/">PSSM</a>)s
that have been prepared from conserved domain alignments. RPS-BLAST is a
&#x0201c;reverse&#x0201d; version of position-specific iterated BLAST (<a class="def" href="/books/NBK21106/def-item/app148/">PSI-BLAST</a>); however, RPS-BLAST compares a
query sequence against a database of profiles prepared from ready-made alignments,
whereas PSI-BLAST builds alignments starting from a single protein sequence.</p></dd><dt id="app160">SAGE</dt><dd><p>Serial Analysis of Gene Expression. An experimental technique designed to
quantitatively measure gene expression.</p></dd><dt id="app161">Sequin</dt><dd><p>Sequin is a stand-alone software tool developed by the <a class="def" href="/books/NBK21106/def-item/app116/">NCBI</a> for submitting and updating entries to the <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a>, <a class="def" href="/books/NBK21106/def-item/app44/">EMBL</a>, or <a class="def" href="/books/NBK21106/def-item/app35/">DDBJ</a> sequence
databases. It is capable of handling simple submissions that contain a single, short
mRNA sequence and complex submissions containing long sequences, multiple annotations,
segmented sets of DNA, or phylogenetic and population
studies.
</p></dd><dt id="app162">SGD</dt><dd><p>Saccharomyces Genome Database. A database for the molecular biology and genetics of <i>Saccharomyces cerevisceae</i>, also known as baker's yeast.</p></dd><dt id="app163">SGML</dt><dd><p>Standard Generalized Markup Language. The international standard for specifying the
structure and content of electronic documents. SGML is used for the markup of data in a
way that is self-describing. SGML is not a language but a way of defining languages that
are developed along its general principles. A subset of SGML called <a class="def" href="/books/NBK21106/def-item/app198/">XML</a> is more widely used for the markup of
data. <a class="def" href="/books/NBK21106/def-item/app75/">HTML</a> (Hypertext Markup Language) is
based on SGML and uses some of its concepts to provide a universal markup language for
the display of information and the linking of different pieces of that
information.</p></dd><dt id="app164">SKY</dt><dd><p>Spectral Karyotyping. SKY is a technique that allows for the visualization of all of an
organism's chromosomes together, each labeled with a different color. This is achieved
by using chromosome-specific, single-stranded DNA probes (each labeled with a different
fluorophore) to hybridize or bind to the chromosomes of a cell; resulting in each
chromosome being painted a different color. This technique is useful for identifying
chromosome abnormalities because it is easy to spot instances where a chromosome painted
in one color has a small piece of another chromosome, painted in a different color,
attached to it. (Also see <a class="def" href="/books/NBK21106/def-item/app56/">FISH</a>, <a class="def" href="/books/NBK21106/def-item/app24/">CGH</a>.)</p></dd><dt id="app165">SKYGRAM</dt><dd><p>1. A software tool to automatically convert the short-form karyotype into an image
representation of a cell or clone, with each chromosome displayed in a different color,
with band overlay. The program will also incorporate the number of cells for each
structural abnormality, which is displayed in brackets. 2. The full ideogram or a cell
or clone, with each chromosome displayed in a different color, with band overlay.</p></dd><dt id="app166">SMART</dt><dd><p>Simple Modular Architecture Research Tool. A tool to allow automatic identification and
annotation of domains in user-supplied protein sequences. For example, the <a class="def" href="/books/NBK21106/def-item/app175/">SWISS-PROT</a> database is an extensively
annotated and nonredundant collection of protein sequences. SWISS-PROT annotations have
been mined for SMART-derived annotations of alignments.</p></dd><dt id="app167">SMD</dt><dd><p>
<a href="http://genome-www5.stanford.edu/MicroArray/SMD/index.shtml" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Stanford Microarray Database</a>. SMD stores raw and normalized data from
microarray experiments, as well as their corresponding image files. In addition, the SMD
provides interfaces for data retrieval, analysis, and visualization. Data are released
to the public at the researcher's discretion or upon publication.</p></dd><dt id="app168">SNP</dt><dd><p>Common, but minute, variations that occur in human DNA at a frequency of 1 every 1,000
bases. An SNP is a single base-pair site within the genome at which more than one of the
four possible base pairs is commonly found in natural populations. Several hundred
thousand SNP sites are being identified and mapped on the sequence of the genome,
providing the densest possible map of genetic differences. SNP is pronounced
&#x0201c;snip&#x0201d;.</p></dd><dt id="app169">SOFT</dt><dd><p>Simple Omnibus Format in Text. SOFT is an ASCII text format that was designed to be a
machine-readable representation of data retrieved from, or submitted to, the Gene
Expression Omnibus (<a class="def" href="/books/NBK21106/def-item/app66/">GEO</a>). SOFT is also a
line-based format, making it easy to parse, using commonly available text processing and
formatting languages. (For examples of SOFT, see the <a href="/geo/info/soft.cgi" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">guide</a>.)</p></dd><dt id="app170">splice sites</dt><dd><p>Refers to the location of the exon-intron junctions in a pre-mRNA (i.e., the primary
transcript that must undergo additional processing to become a mature RNA for
translation into a protein). Splice sites can be determined by comparing the sequence of
genomic DNA with that of the <a class="def" href="/books/NBK21106/def-item/app21/">cDNA</a>
sequence. In mRNA, introns (non-protein coding regions) are removed by the splicing
machinery; however, exons can also be removed. Depending on which exons (or parts of
exons) are removed, different proteins can be made from the same initial RNA or gene.
Different proteins created in this way are &#x0201c;splice variants&#x0201d; or
&#x0201c;alternatively spliced&#x0201d;.</p></dd><dt id="app171">SSAHA</dt><dd><p>Sequence Search and Alignment by Hashing Algorithm. SSAHA is a software tool for very
fast matching and alignment of DNA sequences and is used for searching databases
containing large amounts (gigabases) of genome sequence. It achieves its fast search
speed by converting sequence information into a &#x0201c;hash table&#x0201d; data
structure, which can then be searched very rapidly for matches (<a href="/pubmed?term=11591649" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=term&amp;targettype=pubmed">Ning et al., Genome Res 11:1725-1729; 2001</a>).</p></dd><dt id="app172">SSLP</dt><dd><p>Simple Sequence Length Polymorphisms. SSLPs are markers based on the variation in the
number of short tandem repeats in DNA.</p></dd><dt id="app173">STS</dt><dd><p>A short DNA segment that occurs only once in the human genome, the exact location and
order of bases of which are known. Because each is unique, STSs are helpful for
chromosome placement of mapping and sequencing data from many different laboratories.
STSs serve as landmarks on the physical map of the human
genome.
</p></dd><dt id="app174">substitution matrix</dt><dd><p>A substitution matrix containing values proportional to the probability that amino acid
i mutates into amino acid j for all pairs of amino acids. Such matrices are constructed
by assembling a large and diverse sample of verified pairwise alignments of amino acids.
If the sample is large enough to be statistically significant, the resulting matrices
should reflect the true probabilities of mutations occurring through a period of
evolution. (See also <a class="def" href="/books/NBK21106/def-item/app15/">BLOSUM 62</a>.)</p></dd><dt id="app175">SWISS-PROT</dt><dd><p>
<a href="http://www.ebi.ac.uk/swissprot/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">SWISS-PROT</a> is a
curated protein sequence database that provides a high level of annotation (such as the
description of protein function, domain structures, post-translational modifications,
variants, etc.), a minimal level of redundancy, and high level of integration with other
databases.</p></dd><dt id="app176">Sybase</dt><dd><p>A trademarked family of products that include databases, development tools, integration
middleware, enterprise portals, and mobile and wireless servers.</p></dd><dt id="app177">synteny</dt><dd><p>On the same strand. The phrase &#x0201c;conserved synteny&#x0201d; refers to
conserved gene order on chromosomes of different, related species.</p></dd><dt id="app178">Tax BLAST</dt><dd><p>BLAST Taxonomy Reports page. Tax BLAST groups BLAST hits by source organism, according
to information in <a class="def" href="/books/NBK21106/def-item/app116/">NCBI</a>'s Taxonomy
database. Species are listed in order of sequence similarity with the query sequence,
the strongest match listed first.
</p></dd><dt id="app179">taxID</dt><dd><p>Taxonomy Identifier. The taxID is a stable unique identifier for each taxon (for a
species, a family, an order, or any other group in the taxonomy database). The taxID is
seen in the <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> records as a
&#x0201c;source&#x0201d; feature table entry; for example,
/db_xref=&#x0201c;taxon:&#x0003c;9606&#x0003e;&#x0201d; is the taxID for <i>Homo sapiens</i>, and the line is therefore found in all recent human sequence
records.
</p></dd><dt id="app180">taxid</dt><dd><p>See <a class="def" href="/books/NBK21106/def-item/app179/">taxID</a>.</p></dd><dt id="app205">termination codon or stop codon</dt><dd><p>One of three codons that do not specify any amino acid and hence causes translation of
mRNA into protein to be terminated. These codons mark the end of a protein coding
sequence.</p></dd><dt id="app181">TIGR</dt><dd><p>
<a href="http://www.tigr.org" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">The Institute for Genomic
Research</a>
</p></dd><dt id="app182">tiling path</dt><dd><p>An ordered list or map that defines a set of overlapping clones that covers a
chromosome or other extended segment of DNA.</p></dd><dt id="app183">TPA</dt><dd><p>Third-Party Annotation
</p></dd><dt id="app214">TPF</dt><dd><p>Tiling Path Format. A table format used to specify the set of clones that will provide
the best possible sequence coverage for a particular chromosome, the order of the clones
along the chromosome, and the location of any gaps in the clone tiling path. Also used
to refer to a file (Tiling Path File) in which the <a class="def" href="/books/NBK21106/def-item/app213/">minimal tiling path</a> of clones covering a chromosome is specified in
Tiling Path Format or to the minimal tiling path of clones so defined.</p></dd><dt id="app207">translation start site</dt><dd><p>The position within an mRNA at which synthesis of a protein begins. The translation start site is usually an AUG codon, but occasionally, GUG or CUG codons are used to
initiate protein synthesis.</p></dd><dt id="app184">UID</dt><dd><p>Unique Identifier
</p></dd><dt id="app185">UMLS</dt><dd><p>
<a href="http://www.nlm.nih.gov/research/umls/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Unified Medical
Language System</a>. A project of the National Library of Medicine for the
development and distribution of multipurpose, electronic &#x0201c;Knowledge
Sources&#x0201d;, and associated lexical programs. The purpose of the UMLS is to aid
the development of systems that help health professionals and researchers retrieve and
integrate electronic biomedical information from a variety of sources and to make it
easy for users to link disparate information systems, including computer-based patient
records, bibliographic databases, factual databases, and expert systems.</p></dd><dt id="app186">unfinished sequence</dt><dd><p>See <a class="def" href="/books/NBK21106/def-item/app39/">draft sequence</a>.</p></dd><dt id="app187">UniGene cluster</dt><dd><p>
<a class="def" href="/books/NBK21106/def-item/app46/">EST</a>s and full-length mRNA sequences
organized into clusters such that each represents a unique known or putative gene within
the organism from which the sequences were obtained. UniGene clusters are annotated with
mapping and expression information when possible (e.g., for human) and include
cross-references to other resources. Sequence data can be downloaded by cluster through
the UniGene web pages, or the complete dataset can be downloaded from the <a href="ftp://ftp.ncbi.nih.gov/repository/UniGene/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">repository/UniGene
directory</a> of the FTP site.</p></dd><dt id="app188">UniSTS</dt><dd><p>
<a href="/entrez/query.fcgi?db=unists" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">UniSTS</a> presents a unified, non-redundant view of sequence-tagged sites
(<a class="def" href="/books/NBK21106/def-item/app173/">STS</a>s). UniSTS integrates marker and
mapping data from a variety of public resources. If two or more markers have different
names but the same primer pair, a single STS record is presented for the primer pair,
and all the marker names are shown.</p></dd><dt id="app189">UNIX</dt><dd><p>UNIX is an operating system that was developed by Dennis Ritchie and Kenneth Thompson
at Bell Labs more than 30 years ago. It allows multitasking and multiuser capabilities
and offers portability with other operating systems. It comes with hundreds of programs
that are of two types: integral utilites, such as the command line interpreter; and
tools such as email, which are not necessary for the operation of UNIX but provide
additional capabilities to the user. It is functionally organized at three levels: the
kernel, which schedules tasks and manages storage; the shell, which connects and
interprets user's commands, calls programs from memory, and executes them; and tools and
applications, which offer additional functionality to the operating system, such as word
processing and business applications. UNIX<sup>&#x000ae;</sup> was registered by <a href="http://www.lucent.com/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Bell Laboratories</a> as a trademark
for computer operating systems. Today, this mark is owned by <a href="http://www.opengroup.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">The Open Group</a>.</p></dd><dt id="app190">URL</dt><dd><p>Uniform Resource Locator. The address of a resource on the Internet. URL syntax is in
the form of protocol://host/localinfo, where &#x0201c;protocol&#x0201d; specifies
the means of fetching the object (such as HTTP, used by <a class="def" href="/books/NBK21106/def-item/app197/">WWW</a> browsers and servers to exchange information, or
<a class="def" href="/books/NBK21106/def-item/app58/">FTP</a>), &#x0201c;host&#x0201d;
specifies the remote location where the object resides, and
&#x0201c;localinfo&#x0201d; is a string (often a file name) passed to the protocol
handler at the remote location. Also called Uniform Resource Identifier (URI).</p></dd><dt id="app191">UTF-8</dt><dd><p>UCS (Universal Character Set) Transformation Format. An AscII-preserving encoding
method for Unicode (a standard to provide a unique number for every character
irrespective of the platform, program, or language).</p></dd><dt id="app192">UTR</dt><dd><p>Untranslated Region. The 3&#x02032; UTR is that portion of an <a class="def" href="/books/NBK21106/def-item/app114/">mRNA</a> from the position of the last codon that is used in
translation to the 3&#x02032; end. The 5&#x02032; UTR is that portion of an mRNA
from the 5&#x02032; end to the position of the first codon used in translation.</p></dd><dt id="app193">VAST</dt><dd><p>
<a href="/Structure/VAST/vast.shtml" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Vector
Alignment Search Tool</a>. A computer algorithm used to identify similar protein
3D structures.</p></dd><dt id="app194">weight</dt><dd><p>An assignment of importance to a term in a search query. If a term in a search query is
found to match a word in a document, that word is given a &#x0201c;weight&#x0201d;.
The exact weight of the word will depend on the emphasis given to the word by the author
or its position in the document. For example, a word that occurs in a chapter title will
have a higher weight than the same word if it occurs in the body of the chapter.
Similarly, words that occur in data collections are also assigned weights, depending on
how frequently the terms occur in the collection.

</p></dd><dt id="app195">WGS sequence</dt><dd><p>Whole Genome Shotgun sequence. In this semi-automated sequencing technique,
high-molecular-weight DNA is sheared into random fragments, size selected (usually 2,
10, 50, and 150 kb), and cloned into an appropriate vector. The clones are then
sequenced from both ends. The two ends of the same clone are referred to as mate pairs.
The distance between two mate pairs can be inferred if the library size is known and has
a narrow window of deviation. The sequences are aligned using sequence assembly
software. Proponents of this approach argue that it is possible to sequence the whole
genome at once using large arrays of sequencers, which makes the whole process much more
efficient than the traditional approaches.</p></dd><dt id="app196">WHO</dt><dd><p>
<a href="http://www.who.int/en/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">World Health Organization</a>
</p></dd><dt id="app197">WWW</dt><dd><p>World Wide Web. A <a href="http://www.w3.org/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">consortium</a>
(W3C) that develops technologies such specifications, guidelines, software, and tools
for the internet.</p></dd><dt id="app198">XML</dt><dd><p>Extensible Markup Language. XML describes a class of data objects called XML documents
and partially describes the behavior of computer programs that process them. XML is a
subset of SGML, and XML documents are conforming SGML documents. XML documents are made
up of storage units called entities, which contain either parsed or unparsed data.
Parsed data is made up of characters (a unit of text), some of which form character
data, and some of which form markup. Markup includes tags that provide information about
the data, i.e., a description of the structure and content of the document. Character
data comprises all the text that is not markup. XML provides a mechanism to impose
constraints on the storage layout and logical structure.</p></dd><dt id="app199">XSL</dt><dd><p>Extensible Stylesheet Language. XSL is used for the transformation of XML-based data
into HTML or other presentation formats, for display in a web browser. This is a
two-part process. First, the structure of the input XML tree must be transformed into a
new tree (e.g., HTML), allowing reordering of the elements, addition of text, and
calculations&#x02014;all without modification to the source document. This process is described
by <a class="def" href="/books/NBK21106/def-item/app200/">XSLT</a>. Second, XSL-FO (XSL Formatting
Objects, an XML vocabulary for formatting) is used for formatting the output, defining
areas of the display page and their properties. In this way, the source XML document can
be maintained from the perspective of &#x0201c;pure content&#x0201d; and can be
separated from the presentation. An XML document can be delivered in different formats
to different target audiences by simply switching style sheets.</p></dd><dt id="app200">XSLT</dt><dd><p>Extensible Stylesheet Language: Transformations. XSLT is a language for transforming
the structure of an XML document. XSLT is designed for use as part of <a class="def" href="/books/NBK21106/def-item/app199/">XSL</a>, the stylesheet language for XML. A
transformation expressed in XSLT describes a sequence of template rules for transforming
a source tree into a result tree; elements from the source tree can be filtered and
reordered, and a different structure can be added. A template rule has two parts: a
pattern that is matched against nodes in the source tree; and a template that can be
instantiated to form part of the result tree. This makes XSLT a declarative language
because it is possible to specify what output should be produced when specific patterns
occur in the input, which distinguishes it from procedural programming languages, where
it is necessary to specify what tasks have to be performed in what order. XSLT makes use
of the expression language defined by XPath (a language for addressing the parts of an
XML document) for selecting elements for processing, for conditional processing, and for
generating text.</p></dd><dt id="app201">YAC</dt><dd><p>Yeast Artificial Chromosome. Extremely large segments of DNA from another species
spliced into the DNA of yeast. YACs are used to clone up to one million bases of foreign
DNA into a host cell, where the DNA is propagated along with the other chromosomes of
the yeast cell.</p></dd><dt id="app202">ZFIN</dt><dd><p>Zebrafish Information Network. <a href="http://zdb.wehi.edu.au:8282//" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">ZFIN</a> is a database for the zebrafish model organism that holds information
on wild-type stocks, mutants, genes, gene expression data, and map markers.</p></dd></dl><div id="bk_toc_contnr"></div></div></div><div class="fm-sec"><h2 id="_NBK21106_pubdet_">Publication Details</h2><h3>Copyright</h3><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div></div><h3>Publisher</h3><p><a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&amp;targetsite=external&amp;targetcat=link&amp;targettype=publisher">National Center for Biotechnology Information (US)</a>, Bethesda (MD)</p><h3>NLM Citation</h3><p>McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-.  Glossary.<span class="bk_cite_avail"></span></p></div><div class="small-screen-prev"><a href="/books/n/handbook/ch24/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="small-screen-next"></div></article></div><div id="jr-scripts"><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/libs.min.js"> </script><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.min.js"> </script></div></div>


        <!-- Book content -->

        <script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js"> </script>


<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal107 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>

<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3968615.js" snapshot="books"></script></body>
</html>