883 lines
107 KiB
Text
883 lines
107 KiB
Text
<!DOCTYPE html>
|
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" class="no-js no-jr">
|
|
<head>
|
|
<!-- For pinger, set start time and add meta elements. -->
|
|
<script type="text/javascript">var ncbi_startTime = new Date();</script>
|
|
|
|
<!-- Logger begin -->
|
|
<meta name="ncbi_db" content="books">
|
|
<meta name="ncbi_pdid" content="book-part">
|
|
<meta name="ncbi_acc" content="NBK21106">
|
|
<meta name="ncbi_domain" content="handbook">
|
|
<meta name="ncbi_report" content="reader">
|
|
<meta name="ncbi_type" content="fulltext">
|
|
<meta name="ncbi_objectid" content="">
|
|
<meta name="ncbi_pcid" content="/NBK21106/?report=reader">
|
|
<meta name="ncbi_pagename" content="Glossary - The NCBI Handbook - NCBI Bookshelf">
|
|
<meta name="ncbi_bookparttype" content="glossary">
|
|
<meta name="ncbi_app" content="bookshelf">
|
|
<!-- Logger end -->
|
|
|
|
<!--component id="Page" label="meta"/-->
|
|
<script type="text/javascript" src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.boots.min.js"> </script><title>Glossary - The NCBI Handbook - NCBI Bookshelf</title>
|
|
<meta charset="utf-8">
|
|
<meta name="apple-mobile-web-app-capable" content="no">
|
|
<meta name="viewport" content="initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no">
|
|
<meta name="jr-col-layout" content="auto">
|
|
<meta name="jr-prev-unit" content="/books/n/handbook/ch24/?report=reader">
|
|
<meta name="bk-toc-url" content="/books/n/handbook/?report=toc">
|
|
<meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX">
|
|
<meta name="citation_inbook_title" content="The NCBI Handbook [Internet]">
|
|
<meta name="citation_title" content="Glossary">
|
|
<meta name="citation_publisher" content="National Center for Biotechnology Information (US)">
|
|
<meta name="citation_date" content="2002">
|
|
<meta name="citation_author" content="Jo McEntyre">
|
|
<meta name="citation_author" content="Jim Ostell">
|
|
<meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK21106/">
|
|
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0/">
|
|
<meta name="DC.Title" content="Glossary">
|
|
<meta name="DC.Type" content="Text">
|
|
<meta name="DC.Publisher" content="National Center for Biotechnology Information (US)">
|
|
<meta name="DC.Contributor" content="Jo McEntyre">
|
|
<meta name="DC.Contributor" content="Jim Ostell">
|
|
<meta name="DC.Date" content="2002">
|
|
<meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK21106/">
|
|
<meta name="og:title" content="Glossary">
|
|
<meta name="og:type" content="book">
|
|
<meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK21106/">
|
|
<meta name="og:site_name" content="NCBI Bookshelf">
|
|
<meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png">
|
|
<meta name="twitter:card" content="summary">
|
|
<meta name="twitter:site" content="@ncbibooks">
|
|
<meta name="warning" content="This publication is provided for historical reference only and the information may be out of date.">
|
|
<meta name="bk-non-canon-loc" content="/books/n/handbook/A1237/?report=reader">
|
|
<link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK21106/">
|
|
<link href="https://fonts.googleapis.com/css?family=Archivo+Narrow:400,700,400italic,700italic&subset=latin" rel="stylesheet" type="text/css">
|
|
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/libs.min.css">
|
|
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/jr.min.css">
|
|
<meta name="format-detection" content="telephone=no">
|
|
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css">
|
|
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css//books_print.min.css" type="text/css" media="print">
|
|
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_reader.min.css" type="text/css">
|
|
<style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style>
|
|
<style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style>
|
|
|
|
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico">
|
|
<meta name="ncbi_phid" content="CE8EB1E97D5C5A210000000000670056.m_5">
|
|
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3849091.css"></head>
|
|
<body>
|
|
<!-- Book content! -->
|
|
|
|
|
|
<div id="jr" data-jr-path="/corehtml/pmc/jatsreader/ptpmc_3.22/"><div class="jr-unsupported"><table class="modal"><tr><td><span class="attn inline-block"></span><br />Your browser does not support the NLM PubReader view.<br />Go to <a href="/pmc/about/pr-browsers/">this page</a> to see a list of supported browsers<br />or return to the <br /><a href="/books/NBK21106/?report=classic">regular view</a>.</td></tr></table></div><div id="jr-ui" class="hidden"><nav id="jr-head"><div class="flexh tb"><div id="jr-tb1"><a id="jr-links-sw" class="hidden" title="Links"><svg xmlns="http://www.w3.org/2000/svg" version="1.1" x="0px" y="0px" viewBox="0 0 70.6 85.3" style="enable-background:new 0 0 70.6 85.3;vertical-align:middle" xml:space="preserve" width="24" height="24">
|
|
<style type="text/css">.st0{fill:#939598;}</style>
|
|
<g>
|
|
<path class="st0" d="M36,0C12.8,2.2-22.4,14.6,19.6,32.5C40.7,41.4-30.6,14,35.9,9.8"></path>
|
|
<path class="st0" d="M34.5,85.3c23.2-2.2,58.4-14.6,16.4-32.5c-21.1-8.9,50.2,18.5-16.3,22.7"></path>
|
|
<path class="st0" d="M34.7,37.1c66.5-4.2-4.8-31.6,16.3-22.7c42.1,17.9,6.9,30.3-16.4,32.5h1.7c-66.2,4.4,4.8,31.6-16.3,22.7 c-42.1-17.9-6.9-30.3,16.4-32.5"></path>
|
|
</g>
|
|
</svg> Books</a></div><div class="jr-rhead f1 flexh"><div class="head"><a href="/books/n/handbook/ch24/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="body"><div class="t">Glossary</div><div class="j">The NCBI Handbook [Internet]</div></div><div class="tail"></div></div><div id="jr-tb2"><a id="jr-bkhelp-sw" class="btn wsprkl hidden" title="Help with NLM PubReader">?</a><a id="jr-help-sw" class="btn wsprkl hidden" title="Settings and typography in NLM PubReader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" preserveAspectRatio="none"><path d="M462,283.742v-55.485l-29.981-10.662c-11.431-4.065-20.628-12.794-25.274-24.001 c-0.002-0.004-0.004-0.009-0.006-0.013c-4.659-11.235-4.333-23.918,0.889-34.903l13.653-28.724l-39.234-39.234l-28.72,13.652 c-10.979,5.219-23.68,5.546-34.908,0.889c-0.005-0.002-0.01-0.003-0.014-0.005c-11.215-4.65-19.933-13.834-24-25.273L283.741,50 h-55.484l-10.662,29.981c-4.065,11.431-12.794,20.627-24.001,25.274c-0.005,0.002-0.009,0.004-0.014,0.005 c-11.235,4.66-23.919,4.333-34.905-0.889l-28.723-13.653l-39.234,39.234l13.653,28.721c5.219,10.979,5.545,23.681,0.889,34.91 c-0.002,0.004-0.004,0.009-0.006,0.013c-4.649,11.214-13.834,19.931-25.271,23.998L50,228.257v55.485l29.98,10.661 c11.431,4.065,20.627,12.794,25.274,24c0.002,0.005,0.003,0.01,0.005,0.014c4.66,11.236,4.334,23.921-0.888,34.906l-13.654,28.723 l39.234,39.234l28.721-13.652c10.979-5.219,23.681-5.546,34.909-0.889c0.005,0.002,0.01,0.004,0.014,0.006 c11.214,4.649,19.93,13.833,23.998,25.271L228.257,462h55.484l10.595-29.79c4.103-11.538,12.908-20.824,24.216-25.525 c0.005-0.002,0.009-0.004,0.014-0.006c11.127-4.628,23.694-4.311,34.578,0.863l28.902,13.738l39.234-39.234l-13.66-28.737 c-5.214-10.969-5.539-23.659-0.886-34.877c0.002-0.005,0.004-0.009,0.006-0.014c4.654-11.225,13.848-19.949,25.297-24.021 L462,283.742z M256,331.546c-41.724,0-75.548-33.823-75.548-75.546s33.824-75.547,75.548-75.547 c41.723,0,75.546,33.824,75.546,75.547S297.723,331.546,256,331.546z"></path></svg></a><a id="jr-fip-sw" class="btn wsprkl hidden" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-rtoc-sw" class="btn wsprkl hidden" title="Table of Contents"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,20h10v8H20V20zM36,20h44v8H36V20zM20,37.33h10v8H20V37.33zM36,37.33h44v8H36V37.33zM20,54.66h10v8H20V54.66zM36,54.66h44v8H36V54.66zM20,72h10v8 H20V72zM36,72h44v8H36V72z"></path></svg></a></div></div></nav><nav id="jr-dash" class="noselect"><nav id="jr-dash" class="noselect"><div id="jr-pi" class="hidden"><a id="jr-pi-prev" class="hidden" title="Previous page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a><div class="pginfo">Page <i class="jr-pg-pn">0</i> of <i class="jr-pg-lp">0</i></div><a id="jr-pi-next" class="hidden" title="Next page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div><div id="jr-is-tb"><a id="jr-is-sw" class="btn wsprkl hidden" title="Switch between Figures/Tables strip and Progress bar"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><rect x="10" y="40" width="20" height="20"></rect><rect x="40" y="40" width="20" height="20"></rect><rect x="70" y="40" width="20" height="20"></rect></svg></a></div><nav id="jr-istrip" class="istrip hidden"><a id="jr-is-prev" href="#" class="hidden" title="Previous"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M80,40 60,65 80,90 70,90 50,65 70,40z M50,40 30,65 50,90 40,90 20,65 40,40z"></path><text x="35" y="25" textLength="60" style="font-size:25px">Prev</text></svg></a><a id="jr-is-next" href="#" class="hidden" title="Next"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,40 40,65 20,90 30,90 50,65 30,40z M50,40 70,65 50,90 60,90 80,65 60,40z"></path><text x="15" y="25" textLength="60" style="font-size:25px">Next</text></svg></a></nav><nav id="jr-progress"></nav></nav></nav><aside id="jr-links-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">NCBI Bookshelf</div></div><div class="cnt lol f1"><a href="/books/">Home</a><a href="/books/browse/">Browse All Titles</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://www.facebook.com/sharer/sharer.php?u=https://www.ncbi.nlm.nih.gov/books/NBK21106/"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24" preserveAspectRatio="none"><g><path d="M 17.996,32L 12,32 L 12,16 l-4,0 l0-5.514 l 4-0.002l-0.006-3.248C 11.993,2.737, 13.213,0, 18.512,0l 4.412,0 l0,5.515 l-2.757,0 c-2.063,0-2.163,0.77-2.163,2.209l-0.008,2.76l 4.959,0 l-0.585,5.514L 18,16L 17.996,32z"></path></g></svg> Share on Facebook</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://twitter.com/intent/tweet?url=https://www.ncbi.nlm.nih.gov/books/NBK21106/&text=Glossary"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24"><g><path d="M 32,6.076c-1.177,0.522-2.443,0.875-3.771,1.034c 1.355-0.813, 2.396-2.099, 2.887-3.632 c-1.269,0.752-2.674,1.299-4.169,1.593c-1.198-1.276-2.904-2.073-4.792-2.073c-3.626,0-6.565,2.939-6.565,6.565 c0,0.515, 0.058,1.016, 0.17,1.496c-5.456-0.274-10.294-2.888-13.532-6.86c-0.565,0.97-0.889,2.097-0.889,3.301 c0,2.278, 1.159,4.287, 2.921,5.465c-1.076-0.034-2.088-0.329-2.974-0.821c-0.001,0.027-0.001,0.055-0.001,0.083 c0,3.181, 2.263,5.834, 5.266,6.438c-0.551,0.15-1.131,0.23-1.73,0.23c-0.423,0-0.834-0.041-1.235-0.118 c 0.836,2.608, 3.26,4.506, 6.133,4.559c-2.247,1.761-5.078,2.81-8.154,2.81c-0.53,0-1.052-0.031-1.566-0.092 c 2.905,1.863, 6.356,2.95, 10.064,2.95c 12.076,0, 18.679-10.004, 18.679-18.68c0-0.285-0.006-0.568-0.019-0.849 C 30.007,8.548, 31.12,7.392, 32,6.076z"></path></g></svg> Share on Twitter</a></div></aside><aside id="jr-rtoc-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Table of Content</div></div><div class="cnt lol f1"><a href="/books/n/handbook/?report=reader">Title Information</a><a href="/books/n/handbook/toc/?report=reader">Table of Contents Page</a></div></aside><aside id="jr-help-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Settings</div></div><div class="cnt f1"><div id="jr-typo-p" class="typo"><div><a class="sf btn wsprkl">A-</a><a class="lf btn wsprkl">A+</a></div><div><a class="bcol-auto btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 100" preserveAspectRatio="none"><text x="10" y="70" style="font-size:60px;font-family: Trebuchet MS, ArialMT, Arial, sans-serif" textLength="180">AUTO</text></svg></a><a class="bcol-1 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M15,25 85,25zM15,40 85,40zM15,55 85,55zM15,70 85,70z"></path></svg></a><a class="bcol-2 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M5,25 45,25z M55,25 95,25zM5,40 45,40z M55,40 95,40zM5,55 45,55z M55,55 95,55zM5,70 45,70z M55,70 95,70z"></path></svg></a></div></div><div class="lol"><a class="" href="/books/NBK21106/?report=classic">Switch to classic view</a><a href="/books/NBK21106/pdf/Bookshelf_NBK21106.pdf">PDF (464K)</a><a href="/books/n/handbook/pdf/">PDF (7.2M)</a><a href="/books/NBK21106/?report=printable">Print View</a></div></div></aside><aside id="jr-bkhelp-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Help</div></div><div class="cnt f1 lol"><a id="jr-helpobj-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/help.xml" href="">Help</a><a href="mailto:info@ncbi.nlm.nih.gov?subject=PubReader%20feedback%20%2F%20NBK21106%20%2F%20sid%3ACE8B5AF87C7FFCB1_0191SID%20%2F%20phid%3ACE8EB1E97D5C5A210000000000670056.4">Send us feedback</a><a id="jr-about-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/about.xml" href="">About PubReader</a></div></aside><aside id="jr-objectbox" class="thidden hidden"><div class="jr-objectbox-close wsprkl">✘</div><div class="jr-objectbox-inner cnt"><div class="jr-objectbox-drawer"></div></div></aside><nav id="jr-pm-left" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Previous Page</text></svg></nav><nav id="jr-pm-right" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Next Page</text></svg></nav><nav id="jr-fip" class="hidden"><nav id="jr-fip-term-p"><input type="search" placeholder="search this page" id="jr-fip-term" autocorrect="off" autocomplete="off" /><a id="jr-fip-mg" class="wsprkl btn" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-fip-done" class="wsprkl btn" title="Dismiss find">✘</a></nav><nav id="jr-fip-info-p"><a id="jr-fip-prev" class="wsprkl btn" title="Jump to previuos match">◀</a><button id="jr-fip-matches">no matches yet</button><a id="jr-fip-next" class="wsprkl btn" title="Jump to next match">▶</a></nav></nav></div><div id="jr-epub-interstitial" class="hidden"></div><div id="jr-content"><article data-type="main"><p class="vip-notice"><strong><a href="/books/n/handbook2e/?report=reader">See "The NCBI Handbook, 2nd Edition"</a></strong></p><p class="vip-notice retraction"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p><div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><div class="fm-sec"><h1 id="_NBK21106_"><span class="title" itemprop="name">Glossary</span></h1><p class="fm-aai"><a href="#_NBK21106_pubdet_">Publication Details</a></p></div></div><div class="body-content whole_rhythm" itemprop="text"><dl><dt id="app1">3-D or 3D</dt><dd><p>Three-dimensional.</p></dd><dt id="app208">Accession number</dt><dd><p>An Accession number is a unique identifier given to a sequence when it is submitted to
|
|
one of the DNA repositories (GenBank, EMBL, DDBJ). The initial deposition of a sequence
|
|
record is referred to as version 1. If the sequence is updated, the version number is
|
|
incremented, but the Accession number will remain constant.</p></dd><dt id="app2">
|
|
<i>Alu</i>
|
|
</dt><dd><p>The <i>Alu</i> repeat family comprises short interspersed elements (SINES)
|
|
present in multiple copies in the genomes of humans and other primates. The <i>Alu</i> sequence is approximately 300 bp in length and is found commonly in
|
|
<a class="def" href="/books/NBK21106/def-item/app86/">intron</a>s, 3′ untranslated
|
|
regions of genes, and intergenic genomic regions. They are mobile elements and are
|
|
present in the human genome in extremely high copy number. Almost 1 million copies of
|
|
the <i>Alu</i> sequence are estimated to be present, making it the most abundant
|
|
mobile element. The <i>Alu</i> sequence is so named because of the presence of a
|
|
recognition site for the <i>Alu</i>I endonuclease in the middle of the <i>Alu</i> sequence. Because of the widespread occurrence of the <i>Alu</i>
|
|
repeat in the genome, the <i>Alu</i> sequence is used as a universal primer for
|
|
PCR in animal cell lines; it binds in both forward and reverse directions. The <i>Alu</i> universal primer sequence is as follows: 5′-GTG GAT CAC CTG AGG
|
|
TCA GGA GTT TC-3′
|
|
(26-mer).
|
|
</p></dd><dt id="app3">allele</dt><dd><p>One of the variant forms of a gene at a particular <a class="def" href="/books/NBK21106/def-item/app95/">locus</a> on a chromosome. Different alleles produce variation in inherited
|
|
characteristics such as hair color or blood type. In an individual, one form of the
|
|
allele (the dominant one) may be expressed more than another form (the recessive one).
|
|
When “genes” are considered simply as segments of a nucleotide
|
|
sequence, allele refers to each of the possible alternative nucleotides at a specific
|
|
position in the sequence. For example, a CT polymorphism such as CCT[C/T]CCAT would have
|
|
two alleles: C and T.</p></dd><dt id="app4">API</dt><dd><p>Application Programming Interface. An API is a set of routines that an application uses
|
|
to request and carry out lower-level services performed by a computer's operating
|
|
system. For computers running a graphical user interface, an API manages an
|
|
application's windows, icons, menus, and dialog boxes.</p></dd><dt id="app5">ASN.1</dt><dd><p>Abstract Syntax Notation 1 is an international standard data-representation format used
|
|
to achieve interoperability between computer platforms. It allows for the reliable
|
|
exchange of data in terms of structure and content by computer and software systems of
|
|
all types.</p></dd><dt id="app6">BAC</dt><dd><p>Bacterial Artificial Chromosome. A BAC is a large segment of DNA
|
|
(100,000–200,000 bp) from another species cloned into bacteria. Once the
|
|
foreign DNA has been cloned into the host bacteria, many copies of it can be made.</p></dd><dt id="app7">BankIt</dt><dd><p>BankIt is a tool for the online submission of one or a few sequences into <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> and is designed to make the submission
|
|
process quick and easy. (BankIt also automatically uses <a href="/VecScreen/VecScreen.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">VecScreen</a> to
|
|
identify segments of nucleic acid sequence that may be of vector, adapter, or linker
|
|
origin to combat the problem of vector contamination in GenBank.)</p></dd><dt id="app8">bit score</dt><dd><p>The value S′ is derived from the raw alignment score S in which the
|
|
statistical properties of the scoring system used have been taken into account. By
|
|
normalizing a raw score using the formula: <div class="graphic"><img src="/books/NBK21106/bin/glossfig1.jpg" alt="Image glossfig1.jpg" /></div> a “bit score” S′ is attained, which has a standard set of units, and where
|
|
K and <i>lambda</i> are the statistical parameters of the scoring system.
|
|
Because bit scores have been normalized with respect to the scoring system, they can be
|
|
used to compare alignment scores from different searches.</p></dd><dt id="app9">BLAST</dt><dd><p>Basic Local Alignment Search Tool (<a href="/pubmed?term=2231712" ref="pagearea=body&targetsite=entrez&targetcat=term&targettype=pubmed">Altschul et
|
|
al., J Mol Biol 215:403-410; 1990</a>). A sequence comparison <a href="/Education/BLASTinfo/BLAST_algorithm.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">algorithm</a> that is optimized for speed and used to search sequence databases
|
|
for optimal local alignments to a query. See the <a href="/books/n/handbook/ch16/?report=reader">BLAST
|
|
chapter</a> (Chapter 15) or the <a href="/Education/BLASTinfo/tut1.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">tutorial</a>
|
|
or the narrative <a href="/Education/BLASTinfo/guide.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">guide</a> to
|
|
BLAST.</p></dd><dt id="app10">blastn</dt><dd><p>nucleotide–nucleotide BLAST. blastn takes nucleotide sequences in <a class="def" href="/books/NBK21106/def-item/app53/">FASTA</a> format, <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> Accession numbers, or <a class="def" href="/books/NBK21106/def-item/app67/">GI</a> numbers and compares them against the NCBI <a href="/blast/html/blastcgihelp.html#nucleotide_databases" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Nucleotide databases</a>.
|
|
</p></dd><dt id="app11">blastp</dt><dd><p>protein–protein BLAST. blastp takes protein sequences in <a class="def" href="/books/NBK21106/def-item/app53/">FASTA</a> format, <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> Accession numbers, or <a class="def" href="/books/NBK21106/def-item/app67/">GI</a> numbers and compares them against the NCBI <a href="/blast/html/blastcgihelp.html#protein_databases" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein databases</a>.
|
|
</p></dd><dt id="app12">BLAT</dt><dd><p>A DNA/Protein sequence analysis program to quickly find sequences of 95% and
|
|
greater similarity of length 40 bases or more. It may miss more divergent or shorter
|
|
sequence alignments. BLAT on proteins finds sequences of 80% and greater
|
|
similarity of length 20 amino acids or more. BLAT is not BLAST. (See the <a href="http://genome.ucsc.edu/cgi-bin/hgBlat?command=start" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BLAT web
|
|
page</a>.)</p></dd><dt id="app13">BLink</dt><dd><p>BLAST Link. BLink displays the results of <a class="def" href="/books/NBK21106/def-item/app9/">BLAST</a> searches that have been done for every protein sequence in the Entrez
|
|
Protein data domain. It can be accessed by following the BLink link displayed beside any
|
|
hit in the results of an Entrez Protein search. In contrast to Entrez's <b>Related
|
|
Sequences</b> feature, which lists the titles of similar sequences, BLink displays
|
|
the graphical output of precomputed <a class="def" href="/books/NBK21106/def-item/app11/">blastp</a>
|
|
results against the non-redundant (nr) protein database. The output includes the
|
|
positions of up to 200 BLAST hits on the query sequence, scores, and alignments. BLink
|
|
offers a variety of display options, including the distribution of hits by taxonomic
|
|
grouping, the best hit to each organism, the protein domains in the query sequence,
|
|
similar sequences that have known 3D structures, and more. Additional options allow you
|
|
to specify from which taxa you would like to exclude, increase, or decrease the BLAST
|
|
cutoff score or filter the BLAST hits to show only those from a specific source
|
|
database, such as <a class="def" href="/books/NBK21106/def-item/app155/">RefSeq</a> or <a class="def" href="/books/NBK21106/def-item/app175/">SWISS-PROT</a>. See the <a href="/sutils/blink.cgi?mode=help" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BLink help
|
|
document</a> for additional information.</p></dd><dt id="app14">BLOB</dt><dd><p>Binary Large Object (or binary data object). BLOB refers to a large piece of data, such
|
|
as a bitmap. A BLOB is characterized by large field values, an unpredictable table size,
|
|
and data that are formless from the perspective of a program. It is also a keyword
|
|
designating the BLOB structure, which contains information about a block of data.</p></dd><dt id="app15">BLOSUM 62</dt><dd><p>Blocks Substitution Matrix. A substitution matrix in which scores for each position are
|
|
derived from observations of the frequencies of substitutions in blocks of local
|
|
alignments in related proteins. Each matrix is tailored to a particular evolutionary
|
|
distance. In the <a href="/Education/BLASTinfo/Scoring2.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">BLOSUM 62
|
|
matrix</a>, for example, the alignment from which scores were derived was
|
|
created using sequences sharing no more than 62% identity. Sequences more
|
|
identical than 62% are represented by a single sequence in the alignment to
|
|
avoid overweighting closely related family members (<a href="/pubmed?term=1438297" ref="pagearea=body&targetsite=entrez&targetcat=term&targettype=pubmed">Henikoff and Henikoff, Proc Natl Acad Sci U S A 89:10915-10919;
|
|
1992</a>).</p></dd><dt id="app16">Boolean</dt><dd><p>This term refers to binary algebra that uses the logical operators AND, OR, XOR, and
|
|
NOT; the outcomes consist of logical values (either TRUE or FALSE). The keyword boolean
|
|
indicates that the expression or constant expression associated with the identifier
|
|
takes the value TRUE or FALSE. The logical-AND (&&)
|
|
operator produces the value 1 if both operands have nonzero values; otherwise, it
|
|
produces the value 0. The logical-OR (׀׀) operator produces the value 1 if
|
|
either of its operands has a nonzero value. The logical-NOT (!) operator
|
|
produces the value 0 if its operand is true (nonzero) and the value 1 if its operand is
|
|
FALSE (0). The exclusive OR (XOR) operator yields TRUE only if one of its operands are
|
|
TRUE and the other is FALSE. If both operands are the same (either TRUE or FALSE), the
|
|
operation yields FALSE.</p></dd><dt id="app209">build</dt><dd><p>A run of the genome assembly and annotation process of the set of products generated by
|
|
that run.</p></dd><dt id="app17">CCAP</dt><dd><p>Cancer Chromosome Aberration Project. CCAP was designed to expedite the definition and
|
|
detailed characterization of the distinct chromosomal alterations that are associated
|
|
with malignant transformation. The project is a collaboration among the <a class="def" href="/books/NBK21106/def-item/app118/">NCI</a>, the <a class="def" href="/books/NBK21106/def-item/app116/">NCBI</a>, and numerous research labs.
|
|
</p></dd><dt id="app18">CD</dt><dd><p>Conserved Domain. CD refers to a domain (a distinct functional and/or structural unit
|
|
of a protein) that has been conserved during evolution. During evolution, changes at
|
|
specific positions of an amino acid sequence in the protein have occurred in a way that
|
|
preserve the physico-chemical properties of the original residues, and hence the
|
|
structural and/or functional properties of that region of the protein.</p></dd><dt id="app19">CDART</dt><dd><p>Conserved Domain Architecture Retrieval Tool. When given a protein query sequence,
|
|
CDART displays the functional domains that make up the protein and lists proteins with
|
|
similar domain architectures. The functional domains for a sequence are found by
|
|
comparing the protein sequence to a database of conserved domain alignments, <a class="def" href="/books/NBK21106/def-item/app20/">CDD</a> using <a class="def" href="/books/NBK21106/def-item/app159/">RPS-BLAST</a>.</p></dd><dt id="app20">CDD</dt><dd><p>Conserved Domain Database. This database is a collection of sequence alignments and
|
|
profiles representing protein domains conserved during molecular evolution.</p></dd><dt id="app21">cDNA</dt><dd><p>complementary DNA. A <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a> sequence
|
|
obtained by reverse transcription of a messenger RNA (<a class="def" href="/books/NBK21106/def-item/app114/">mRNA</a>) sequence.</p></dd><dt id="app22">CDS</dt><dd><p>coding region, coding sequence. CDS refers to the portion of a genomic DNA sequence
|
|
that is translated, from the start codon to the stop codon, inclusively, if complete. A
|
|
partial CDS lacks part of the complete CDS (it may lack either or both the start and
|
|
stop codons). Successful translation of a CDS results in the synthesis of a
|
|
protein.</p></dd><dt id="app216">CEPH</dt><dd><p>
|
|
<a href="http://www.cephb.fr/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Centre d'Etude du Polymorphism
|
|
Humain</a>
|
|
</p></dd><dt id="app23">CGAP</dt><dd><p>Cancer Genome Anatomy Project. CGAP is an interdisciplinary program to identify the
|
|
human genes expressed in different cancerous states, based on cDNA (<a class="def" href="/books/NBK21106/def-item/app46/">EST</a>) libraries, and to determine the molecular
|
|
profiles of normal, precancerous, and malignant cells. The project is a collaboration
|
|
among the <a class="def" href="/books/NBK21106/def-item/app118/">NCI</a>, the <a class="def" href="/books/NBK21106/def-item/app116/">NCBI</a>, and numerous research labs.</p></dd><dt id="app24">CGH</dt><dd><p>Comparative Genomic Hybidization. CGH is a fluorescent molecular cytogenetic technique
|
|
that identifies chromosomal aberrations and maps these changes to metaphase chromosomes.
|
|
CGH can be used to generate a map of DNA copy number changes in tumor genomes. CGH is
|
|
based on quantitative two-color fluorescence <i>in situ</i> hybridization
|
|
(<a class="def" href="/books/NBK21106/def-item/app56/">FISH</a>). DNA extracted from tumor cells
|
|
is labeled in one color (e.g., green) and mixed in a 1:1 ratio with DNA from normal
|
|
cells, which is labeled in a different color (e.g., red). The mixture is then applied to
|
|
normal metaphase chromosomes. Portions of the genome that are equally represented in
|
|
normal and tumor cells will appear orange, regions that are deleted in the tumor sample
|
|
relative to the normal sample will appear red, and regions that are present in higher
|
|
copy number in the tumor sample (because of amplification) will appear green. Special
|
|
image analysis tools are necessary to quantitate the ratio of green-to-red fluorescence
|
|
to determine whether a given region is more highly represented in the normal or in the
|
|
tumor sample.</p></dd><dt id="app25">CGI</dt><dd><p>Common Gateway Interface. A mechanism that allows a Web server to run a program or
|
|
script on the server and send the output to a Web browser.</p></dd><dt id="app26">cluster</dt><dd><p>A group that is created based on certain criteria. For example, a gene cluster may
|
|
include a set of genes whose similar expression profiles are found to be similar
|
|
according to certain criteria, or a cluster may refer to a group of clones that are
|
|
related to each other by homology.
|
|
</p></dd><dt id="app27">Cn3D</dt><dd><p>“See in 3-D” is a structure and sequence alignment viewer for NCBI
|
|
databases. It allows viewing of 3-D structures and sequence–structure or
|
|
structure–structure alignments. Cn3D can work as a helper application to the
|
|
browser or as a client–server application that retrieves structure records
|
|
from the Molecular Modeling Database (MMDB, see below) directly from the internet. The
|
|
<a href="/Structure/CN3D/cn3d.shtml" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Cn3D homepage</a> provides access to information on how to install the program,
|
|
a tutorial to get started, and a comprehensive help document.</p></dd><dt id="app206">codon</dt><dd><p>Sequence of three nucleotides in DNA or mRNA that specifies a particular amino acid
|
|
during protein synthesis; also called a triplet. Of the 64 possible codons, 3 are stop
|
|
codons, which do not specify amino acids.</p></dd><dt id="app28">COGs</dt><dd><p>Clusters of Orthologous Groups (of proteins) were delineated by comparing protein
|
|
sequences from completely sequenced genomes. Each COG consists of individual proteins or
|
|
groups of paralogs from at least three lineages and thus corresponds to an ancient
|
|
conserved domain.</p></dd><dt id="app29">consensus sequence</dt><dd><p>The nucleotides or amino acids found most commonly at each position in the sequences of
|
|
homologous DNAs, RNAs, or proteins.</p></dd><dt id="app30">contig</dt><dd><p>A contiguous segment of the genome made by joining overlapping clones or sequences. A
|
|
clone contig consists of a group of cloned (copied) pieces of DNA representing
|
|
overlapping regions of a particular chromosome. A sequence contig is an extended
|
|
sequence created by merging primary sequences that overlap. A contig map shows the
|
|
regions of a chromosome where contiguous DNA segments overlap. Contig maps provide the
|
|
ability to study a complete and often large segment of the genome by examining a series
|
|
of overlapping clones, which then provide an unbroken succession of information about
|
|
that region.</p></dd><dt id="app217">Coriell</dt><dd><p>
|
|
<a href="http://locus.umdnj.edu/nia/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Coriell Institute of Aging Cell
|
|
Repository</a>
|
|
</p></dd><dt id="app31">CPU</dt><dd><p>Central Processing Unit. The CPU is the computational and control unit of a computer,
|
|
the device that interprets and executes instructions.</p></dd><dt id="app32">CSS</dt><dd><p>Cascading Style Sheets. CSS specify the formatting details that control the
|
|
presentation and layout of <a class="def" href="/books/NBK21106/def-item/app75/">HTML</a> and
|
|
<a class="def" href="/books/NBK21106/def-item/app198/">XML</a> elements. CSS can be used for
|
|
describing the formatting behavior and text decoration of simply structured XML
|
|
documents but cannot display structure that varies from the structure of the source
|
|
data.</p></dd><dt id="app33">Cubby</dt><dd><p>A tool of <a class="def" href="/books/NBK21106/def-item/app45/">Entrez</a>, the <a href="/entrez/login.fcgi?call=so.SignOn..Login" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Cubby</a> stores search strategies that may be updated at any time, stores LinkOut
|
|
preferences to specify which LinkOut providers have to be displayed in PubMed, and
|
|
changes the default document delivery service.</p></dd><dt id="app34">DCMS</dt><dd><p>Data Creation and Maintenance System</p></dd><dt id="app35">DDBJ</dt><dd><p>
|
|
<a href="http://www.ddbj.nig.ac.jp/Welcome-e.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">DNA Data Bank of
|
|
Japan</a>
|
|
</p></dd><dt id="app36">definition line</dt><dd><p>A sequence in FASTA format begins with a single-line description, followed by lines of
|
|
sequence data. The definition line or description line is distinguished from the
|
|
sequence data by a “greater than” (>) symbol in
|
|
the first column (see <a href="/BLAST/fasta.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">example</a>); also
|
|
DEFLINE, as in a flatfile. </p></dd><dt id="app37">DNA</dt><dd><p>Deoxyribonucleic acid is the chemical inside the nucleus of a cell that carries the
|
|
genetic instructions for making living organisms. DNA is composed of two anti-parallel
|
|
strands, each a linear polymer of nucleotides. Each nucleotide has a phosphate group
|
|
linked by a phosphoester bond to a pentose (a five-carbon sugar molecule, deoxyribose),
|
|
that in turn is linked to one of four organic bases, adenine, guanine, cytosine, or
|
|
thymine, abbreviated A, G, C, and T, respectively. The bases are of two types: purines,
|
|
which have two rings and are slightly larger (A and G); and pyrimidines, which have only
|
|
one ring (C and T). Each nucleotide is joined to the next nucleotide in the chain by a
|
|
covalent phosphodiester bond between the 5′ carbon of one deoxyribose group
|
|
and the 3′ carbon of the next. DNA is a helical molecule with the
|
|
sugar–phosphate backbone on the outside and the nucleotides extending toward
|
|
the central axis. There is specific base-pairing between the bases on opposite strands
|
|
in such a way that A always pairs with T and G always pairs with C.</p></dd><dt id="app38">domain</dt><dd><p>A “domain” refers to a discrete portion of a protein assumed to
|
|
fold independently of the rest of the protein and which possesses its own
|
|
function.</p></dd><dt id="app39">draft sequence</dt><dd><p>Draft sequence refers to DNA sequence that is not yet finished but is generally of high
|
|
quality (i.e., an accuracy of greater than 90%). Draft sequence data are mostly
|
|
in the form of 10,000 base pair-sized fragments, the approximate chromosomal locations
|
|
of which are known. The following keywords are associated with draft sequence: phase 0,
|
|
light-pass coverage of a clone, generally only 1× coverage; phase
|
|
1, 4–10× coverage of a <a class="def" href="/books/NBK21106/def-item/app6/">BAC</a> clone
|
|
(order and orientation of the fragments are unknown); and phase 2, 4–10×
|
|
coverage of a BAC clone (order and orientation of the fragments are known). Phase 3
|
|
refers to the completely <a class="def" href="/books/NBK21106/def-item/app55/">finished
|
|
sequence</a>.</p></dd><dt id="app40">DTD</dt><dd><p>Document Type Definition. The DTD is an optional part of the prolog of an XML document
|
|
that defines the rules of the document. It sets constraints for an XML document by
|
|
specifying which elements are present in the document and the relationships between
|
|
elements, e.g., which tags can contain other tags, the number and sequence of the tags,
|
|
and attributes of the tags. The DTD helps to validate the data when the receiving
|
|
application does not have a built-in description of the incoming data.</p></dd><dt id="app41">DUST</dt><dd><p>A program for filtering low-complexity regions from nucleic acid sequences.</p></dd><dt id="app42">E-value</dt><dd><p>Expect value. The E-value is a parameter that describes the number of hits one can
|
|
“expect” to see by chance when searching a database of a particular
|
|
size. It decreases exponentially with the score (S) that is assigned to a match between
|
|
two sequences. Essentially, the E-value describes the random background noise that
|
|
exists for matches between sequences. For example, an E-value of 1 assigned to a hit can
|
|
be interpreted as meaning that in a database of the current size, one might expect to
|
|
see one match with a similar score simply by chance. This means that the lower the
|
|
E-value, or the closer it is to “0”, the higher is the
|
|
“significance” of the match. However, it is important to note that
|
|
searches with short sequences can be virtually identical and have relatively high
|
|
E-value. This is because the calculation of the E-value also takes into account the
|
|
length of the query sequence. This is because shorter sequences have a high probability
|
|
of occurring in the database purely by chance. For more information, see the following
|
|
<a href="http://www.ncbi.nih.gov/BLAST/tutorial/Altschul-1.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">tutorial</a>.</p></dd><dt id="app43">EC number</dt><dd><p>A number assigned to a type of enzyme according to a scheme of standardized enzyme
|
|
nomenclature developed by the Enzyme Commission of the Nomenclature Committee of the
|
|
International Union of Biochemistry and Molecular Biology (IUBMB). EC numbers may be
|
|
found in <a href="http://us.expasy.org/enzyme/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">ENZYME</a>, the
|
|
Enzyme nomenclature database, maintained at the <a class="def" href="/books/NBK21106/def-item/app52/">ExPASy</a> molecular biology server.
|
|
</p></dd><dt id="app44">EMBL</dt><dd><p>
|
|
<a href="http://www1.embl-heidelberg.de/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">European Molecular Biology
|
|
Laboratory</a>
|
|
</p></dd><dt id="app45">Entrez</dt><dd><p>Entrez is a retrieval system for searching several linked databases. It provides access
|
|
to the following NCBI databases: <a class="def" href="/books/NBK21106/def-item/app150/">PubMed</a>,
|
|
<a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a>, Protein, Structure, Genome,
|
|
PopSet, <a class="def" href="/books/NBK21106/def-item/app124/">OMIM</a>, Taxonomy, Books, ProbeSet,
|
|
3D Domains, <a class="def" href="/books/NBK21106/def-item/app188/">UniSTS</a>, SNP, and <a class="def" href="/books/NBK21106/def-item/app20/">CDD</a>. (See the <a href="/books/n/handbook/ch15/?report=reader">Entrez chapter</a> or the <a href="/Entrez/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Entrez web page</a>.)</p></dd><dt id="app97">Entrez Gene </dt><dd><p>(formerly known as LocusLink). Entrez Gene provides tracked, unique identifiers for
|
|
genes (<a class="def" href="/books/NBK21106/def-item/app96/">GeneID</a>s) and reports information
|
|
associated with those identifiers for unrestricted public use. See the Entrez Gene
|
|
<a href="/books/n/handbook/ch19/?report=reader">chapter</a> or <a href="http://www.ncbi.nih.gov/entrez/query.fcgi?db=gene" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">web page</a>.)</p></dd><dt id="app46">EST</dt><dd><p>Expressed Sequence Tag. ESTs are short (usually approximately 300–500 base
|
|
pairs), single-pass sequence reads from <a class="def" href="/books/NBK21106/def-item/app21/">cDNA</a>. Typically, they are produced in large batches. They represent the
|
|
genes expressed in a given tissue and/or at a given developmental stage. They are tags
|
|
(some coding, others not) of expression for a given cDNA library. They are useful in
|
|
identifying full-length genes and in mapping.
|
|
</p></dd><dt id="app47">e-PCR</dt><dd><p>Electronic <a class="def" href="/books/NBK21106/def-item/app129/">PCR</a> is used to compare a
|
|
query sequence to mapped sequence-tagged sites (<a class="def" href="/books/NBK21106/def-item/app173/">STS</a>s) to find a possible map location for the query sequence. e-PCR finds
|
|
STSs in DNA sequences by searching for subsequences that closely match the PCR primers
|
|
present in mapped markers. The subsequences must have the correct order, orientation,
|
|
and spacing that they could plausibly prime the amplification of a PCR product of the
|
|
correct molecular weight.</p></dd><dt id="app48">epub citation</dt><dd><p>“Ahead-of-print” citation. <a class="def" href="/books/NBK21106/def-item/app150/">PubMed</a> now accepts citations from publishers for articles that have been
|
|
published electronically ahead of the printed issue. PubMed displays the category
|
|
“[epub ahead of print]” in the part of the citation where the volume
|
|
and pagination would ordinarily display. For example: Proc Natl Acad Sci U S A. 2000 May
|
|
2 [epub ahead of print].</p></dd><dt id="app49">ExoFish</dt><dd><p>Exon Finding by Sequence Homology. Exofish is a tool based on homology searches for the
|
|
rapid and reliable identification of human genes. It relies on the sequence of another
|
|
vertebrate, the pufferfish <i>Tetraodon nigroviridis</i> (similar to Fugu), to
|
|
detect conserved sequences with a very low background. The genome of <i>T.
|
|
nigroviridis</i> is eight times more compact than the human genome and has been
|
|
used in the comparative identification of human genes from the rough draft of the human
|
|
genome (<a href="/pubmed?term=10835645" ref="pagearea=body&targetsite=entrez&targetcat=term&targettype=pubmed">Roest Crollius et al., Nat Genet
|
|
25:235-238; 2000</a>).</p></dd><dt id="app50">exon</dt><dd><p>Refers to the portion of a gene that encodes for a part of that gene's mRNA. A gene may
|
|
comprise many exons, some of which may include only protein-coding sequence; however, an
|
|
exon may also include 5' or 3' untranslated sequence. Each exon codes for a specific
|
|
portion of the complete protein. In some species (including humans), a gene's exons are
|
|
separated by long regions of DNA (called <a class="def" href="/books/NBK21106/def-item/app86/">intron</a>s or sometimes “junk DNA”) that often have no
|
|
apparent function but have been shown to encode small untranslated RNAs or regulatory
|
|
information. (See also <a class="def" href="/books/NBK21106/def-item/app170/">splice
|
|
sites</a>.)</p></dd><dt id="app51">exon-trapped</dt><dd><p>Exon trapping is a technique for cloning exon sequences from genomic DNA by selecting
|
|
for functional splice sites, relying on the cellular splicing machinery. The genomic DNA
|
|
containing the putative exon(s) is cloned into an exon-trap vector, which has a
|
|
promoter, polyadenylation signals, and splice sites, and then transfected into a cell
|
|
line. If there are functional splice sites in the genomic DNA fragment, the segments of
|
|
DNA between the splice sites will be removed. Total RNA is isolated and
|
|
reverse-transcribed. After <a class="def" href="/books/NBK21106/def-item/app21/">cDNA</a> synthesis
|
|
and <a class="def" href="/books/NBK21106/def-item/app129/">PCR</a> amplification, the exon of
|
|
interest is cloned.</p></dd><dt id="app52">ExPASy</dt><dd><p>
|
|
<a href="http://www.expasy.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Expert Protein Analysis System</a>
|
|
is a proteomics server of the Swiss Bioinformatics Institute (SIB).</p></dd><dt id="app53">FASTA</dt><dd><p>The first widely used algorithm for similarity searching of protein and DNA sequence
|
|
databases. The program looks for optimal local alignments by scanning the sequence for
|
|
small matches called “words”. Initially, the scores of segments in
|
|
which there are multiple word hits are calculated (“init1”). Later,
|
|
the scores of several segments may be summed to generate an “initn”
|
|
score. An optimized alignment that includes gaps is shown in the output as
|
|
“opt”. The sensitivity and speed of the search are inversely related
|
|
and controlled by the “k-tup” variable, which specifies the size of
|
|
a “word” (<a href="/pubmed?term=3162770" ref="pagearea=body&targetsite=entrez&targetcat=term&targettype=pubmed">Pearson and
|
|
Lipman</a>). Also refers to a <a href="/BLAST/fasta.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">format</a> for a nucleic
|
|
acid or protein
|
|
sequence.
|
|
</p></dd><dt id="app54">fingerprint</dt><dd><p>The pattern of bands on a gel produced by a clone when restricted by a particular
|
|
enzyme, such as <i>Hin</i>dIII.</p></dd><dt id="app55">finished sequence</dt><dd><p>High-quality, low-error DNA sequence that is free of gaps. To qualify as a finished sequence, only a single error out of every 10,000 bases (i.e., an accuracy of
|
|
99.999%) is allowed.</p></dd><dt id="app56">FISH</dt><dd><p>Fluorescence <i>in situ</i> hybridization. In this technique, fluorescent
|
|
molecules are used to label a <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a> probe,
|
|
which can then hybridize to a specific DNA sequence in a chromosome spread so that the
|
|
site becomes visible through a microscope. FISH has been used to highlight the locations
|
|
of genes, subchromosome regions, entire chromosomes, or specific DNA sequences. It has
|
|
been used for mapping and the detection of genomic rearrangements, as well as studies on
|
|
DNA replication.
|
|
</p></dd><dt id="app57">flatfile or flat file</dt><dd><p>A flat file is a data file that contains records (each corresponding to a row in a
|
|
table); however, these records have no structured relationships. To interpret these
|
|
files, the format properties of the file should be known. For example, a database
|
|
management system may allow the user to export data to a comma-delimited file. Such a
|
|
file is called a flat file because it has no inherent information about the data, and
|
|
interpretation requires additional information. Files in a database management system
|
|
have more complex storage structures.
|
|
</p></dd><dt id="app210">freeze</dt><dd><p>To copy changing data so as to preserve the dataset as it existed at a particular point
|
|
in time. Also used to refer to the resulting set of frozen data.</p></dd><dt id="app58">FTP</dt><dd><p>File Transfer Protocol. A method of retrieving files over a network directly to the
|
|
user's computer or to his/her home directory using a set of protocols that govern how
|
|
the data are to be transported.</p></dd><dt id="app59">gap</dt><dd><p>A gap is a space introduced into an alignment to compensate for insertions and
|
|
deletions in one sequence relative to another. To prevent the accumulation of too many
|
|
gaps in an alignment, introduction of a gap causes the deduction of a fixed amount (the
|
|
gap score) from the alignment score. Extension of the gap to encompass additional
|
|
nucleotides or amino acid is also penalized in the scoring of an alignment. (See the
|
|
<a href="/Education/BLASTinfo/Alignment_Scores2.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">figure</a> for more information.)</p></dd><dt id="app60">GB</dt><dd><p>gigabytes</p></dd><dt id="app61">GBFF</dt><dd><p>
|
|
<a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> Flat File. Refers to a format
|
|
.gbff.
|
|
</p></dd><dt id="app62">GenBank</dt><dd><p>GenBank is a database of nucleotide sequences from more than 100,000 organisms. Records
|
|
that are annotated with coding region features also include amino acid translations.
|
|
GenBank belongs to an international collaboration of sequence databases that also
|
|
includes <a class="def" href="/books/NBK21106/def-item/app44/">EMBL</a> and <a class="def" href="/books/NBK21106/def-item/app35/">DDBJ</a>. [See the <a href="/books/n/handbook/ch1/?report=reader">GenBank</a> chapter (Chapter 1) or the <a href="/Genbank/submit.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GenBank web
|
|
page</a>.]</p></dd><dt id="app96">GeneID</dt><dd><p>GeneID is a unique identifier that is assigned to a gene record in <a class="def" href="/books/NBK21106/def-item/app97/">Entrez Gene</a>. It is an integer and is species
|
|
specific. In other words, the integer assigned to dystrophin in human is different from
|
|
that in any other species. For genomes that had been represented in LocusLink, the
|
|
GeneID is the same as the LocusID. The GeneID is reported in RefSeq records as a
|
|
'db_xref' (e.g. /db_xref="GeneID:856646", in GenBank format). </p></dd><dt id="app63">genetic code</dt><dd><p>The instructions in a gene that tell the cell how to make a specific protein. A, T, G,
|
|
and C are the “letters” of the <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a> code; they stand for the chemicals adenine, thymine, guanine, and
|
|
cytosine, respectively, that make up the nucleotide bases of DNA. Each gene's code
|
|
combines the four chemicals in various ways to spell out three-letter
|
|
“words” that specify which amino acid is needed at every position
|
|
for making a protein.
|
|
</p></dd><dt id="app64">GenomeScan</dt><dd><p>A gene identification algorithm that is used to identify exon–intron
|
|
structures in genomic DNA sequence.
|
|
</p></dd><dt id="app65">genotype</dt><dd><p>The genetic identity of an individual that does not show as outward characteristics.
|
|
The genotype refers to the pair of alleles for a given region of the genome that an
|
|
individual carries.</p></dd><dt id="app66">GEO</dt><dd><p>Gene Expression Omnibus. GEO is a gene expression data repository and online resource
|
|
for the retrieval of gene expression data from any organism or artificial source. Many
|
|
types of gene expression data from platform types, such as spotted microarray,
|
|
high-density oligonucleotide array, hybridization filter, and serial analysis of gene
|
|
expression (<a class="def" href="/books/NBK21106/def-item/app160/">SAGE</a>) data, are accepted,
|
|
accessioned, and archived as a public dataset. [See the <a href="/books/n/handbook/ch6/?report=reader">GEO chapter</a> (Chpater 6) or the <a href="/geo/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">GEO web page</a>.]</p></dd><dt id="app67">GI</dt><dd><p>The GenInfo Identifier is a sequence identification number for a nucleotide sequence.
|
|
If a nucleotide sequence changes in any way, a new GI number will be assigned. A
|
|
separate GI number is also assigned to each protein translation within a nucleotide
|
|
sequence record, and a new GI is assigned if the protein translation changes in any way.
|
|
GI sequence identifiers run parallel to the new accession.version system of sequence
|
|
identifiers (see the description of <a href="/Sitemap/samplerecord.html#VersionB" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Version</a>).</p></dd><dt id="app68">GSS</dt><dd><p>Genome Survey Sequences are analogous to <a class="def" href="/books/NBK21106/def-item/app46/">EST</a>s except that the sequences are genomic in origin, rather than cDNA
|
|
(mRNA). The GSS division of <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a>
|
|
contains (but is not limited to) the following types of data: random
|
|
“single-pass read” genome survey sequences, cosmid/<a class="def" href="/books/NBK21106/def-item/app6/">BAC</a>/<a class="def" href="/books/NBK21106/def-item/app201/">YAC</a> end sequences, <a class="def" href="/books/NBK21106/def-item/app51/">exon-trapped</a> genomic sequences, and <a class="def" href="/books/NBK21106/def-item/app2/"><i>Alu</i></a>-PCR sequences.</p></dd><dt id="app69">heterozygosity</dt><dd><p>The probability that a diploid individual will have two different alleles at a
|
|
particular genome locus. These individuals are defined as heterozygous, whereas
|
|
individuals who have two identical alleles at the locus are defined as homozygous. The
|
|
probability can be estimated by sampling a representative number of individuals from the
|
|
population and dividing the number of heterozygotes by the total number sampled.</p></dd><dt id="app70">HIV</dt><dd><p>Human Immunodeficiency Virus. HIV-1 is a retrovirus that is recognized as the causative
|
|
agent of AIDS (Acquired Immunodeficiency Syndrome).
|
|
</p></dd><dt id="app71">HNPCC</dt><dd><p>Hereditary nonpolyposis colon cancer</p></dd><dt id="app72">homogeneously staining region</dt><dd><p>A region of the chromosome identified cytologically by DNA staining or the <a class="def" href="/books/NBK21106/def-item/app56/">FISH</a> technique because of the presence of
|
|
multiple copies of a subchromosomal region resulting from amplification.
|
|
</p></dd><dt id="app73">homologous</dt><dd><p>The term refers to similarity attributable to descent from a common ancestor.
|
|
Homologous chromosomes are members of a pair of essentially identical chromosomes, each
|
|
derived from one parent. They have the same or allelic genes with genetic loci arranged
|
|
in the same order. Homologous chromosomes synapse during
|
|
meiosis.
|
|
</p></dd><dt id="app74">HTGS</dt><dd><p>High-Throughput Genomic Sequences. The source of HTGS are large-scale genome sequencing
|
|
centers; <a class="def" href="/books/NBK21106/def-item/app186/">unfinished sequence</a>s are in
|
|
phases 0, 1, and 2, and <a class="def" href="/books/NBK21106/def-item/app55/">finished sequence</a>s
|
|
are in phase 3.</p></dd><dt id="app211">HTGS_CANCELLED</dt><dd><p>A keyword added to GenBank entries by sequencing centers to indicate that work has
|
|
stopped on a clone and that the existing sequence will not be finished. Sequencing
|
|
centers may stop work because the clone is redundant or for various other reasons.</p></dd><dt id="app212">HTGS_PHASE0, HTGS_PHASE1, HTGS_PHASE2, HTGS_PHASE3</dt><dd><p>Keywords added to GenBank entries by sequencing centers to indicate the status (phase)
|
|
of the sequence (see phase definitions described under <a class="def" href="/books/NBK21106/def-item/app39/">draft sequence</a>).</p></dd><dt id="app75">HTML</dt><dd><p>Hypertext Markup Language. HTML is derived from <a class="def" href="/books/NBK21106/def-item/app163/">SGML</a>. It is a text-based mark-up language and is used to primarily
|
|
display information using a web browser and to link pieces of information via
|
|
hyperlinks. The tags used in an HTML document provide information only on how the
|
|
content is to be displayed but do not provide information about the content they
|
|
encompass.</p></dd><dt id="app76">HUP</dt><dd><p>Hold Until Published. HUP refers to the category for data that is electronically
|
|
submitted for when it should be released to the public.
|
|
</p></dd><dt id="app77">ICBN</dt><dd><p>International Code of Botanical Nomenclature</p></dd><dt id="app78">ICD</dt><dd><p>International Classification of
|
|
Diseases
|
|
</p></dd><dt id="app79">ICD-O-3</dt><dd><p>
|
|
<a href="http://training.seer.cancer.gov/module_icdo3/icdo3_home.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">International Classification of Diseases for Oncology, 3rd edition</a>
|
|
</p></dd><dt id="app80">ICNB</dt><dd><p>International Code of Nomenclature of Bacteria</p></dd><dt id="app81">ICNCP</dt><dd><p>International Code of Nomenclature for Cultivated Plants</p></dd><dt id="app82">ICTV</dt><dd><p>
|
|
<a href="/ICTV/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">International Committee on
|
|
Taxonomy of Viruses</a>
|
|
</p></dd><dt id="app83">ICVCN</dt><dd><p>
|
|
<a href="/ICTV/rules.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">International
|
|
Code of Virus Classification and Nomenclature</a>
|
|
</p></dd><dt id="app84">ICZN</dt><dd><p>
|
|
<a href="http://www.iczn.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">International Code of Zoological
|
|
Nomenclature</a>
|
|
</p></dd><dt id="app203">ideogram</dt><dd><p>A diagrammatic representation of the karyotype of an organism.</p></dd><dt id="app85">IMAGE Consortium</dt><dd><p>Integrated Molecular Analysis of Genomes and their Expression. A consortium of academic
|
|
groups that share high-quality, arrayed cDNA libraries and place sequence, map, and
|
|
expression data of the clones in these arrays into the public domain. With the use of
|
|
this information, unique clones can be rearrayed to form a “master
|
|
array”, with the aim of ultimately having a representative cDNA from every
|
|
gene in the genome under study. To date, human, mouse, rat, zebrafish, and <i>Xenopus laevis</i> genomes have been
|
|
studied.
|
|
</p></dd><dt id="app86">intron</dt><dd><p>Refers to that portion of the DNA sequence that is present in the primary transcript
|
|
and that is removed by splicing during RNA processing and is not included in the mature,
|
|
functional <a class="def" href="/books/NBK21106/def-item/app114/">mRNA</a>, rRNA, or tRNA. Also
|
|
called an intervening sequence. (See also <a class="def" href="/books/NBK21106/def-item/app170/">splice
|
|
sites</a>.)</p></dd><dt id="app87">ISAM</dt><dd><p>Indexed Sequential-Access Method. ISAM is a database access method. It allows data
|
|
records in a database to be accessed either sequentially (in the order in which they
|
|
were entered) or randomly (using an index). In the index, each record has a unique key
|
|
that enables its rapid location. The key is the field used to reference the
|
|
record.</p></dd><dt id="app88">ISCN</dt><dd><p>International System for Human Cytogenetic Nomenclature
|
|
</p></dd><dt id="app89">ISO</dt><dd><p>
|
|
<a href="http://www.iso.ch/iso/en/ISOOnline.openerpage" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">International
|
|
Organization for Standardization</a>
|
|
</p></dd><dt id="app90">ISSN</dt><dd><p>
|
|
<a href="http://www.issn.org:8080/English/pub/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">International Standard
|
|
Serial Number</a>. The ISSN is an eight-digit number that identifies periodical
|
|
publications, including electronic serials.</p></dd><dt id="app91">karyotype</dt><dd><p>The particular chromosome complement of an individual or a related group of
|
|
individuals, as defined by both the number and morphology of the chromosomes, usually in
|
|
mitotic metaphase, and arranged by pairs according to the standard classification.</p></dd><dt id="app92">LANL</dt><dd><p>
|
|
<a href="http://www.lanl.gov/worldview/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Los Alamos National
|
|
Lab</a>
|
|
</p></dd><dt id="app93">LIMS</dt><dd><p>Laboratory Information Management Systems. LIMS comprise software that helps biological
|
|
and chemical laboratories handle data generation, information management, and data
|
|
archiving.</p></dd><dt id="app94">LinkOut</dt><dd><p>A registry service to create links from specific articles, journals, or biological data
|
|
in <a class="def" href="/books/NBK21106/def-item/app45/">Entrez</a> to resources on external web
|
|
sites. Third parties can provide a URL, resource name, brief description of their web
|
|
sites, and specification of the NCBI data from which they would like to establish links.
|
|
The specification can be written as a valid Boolean query to Entrez or as a list of
|
|
identifiers for specific articles or sequences. Entrez PubMed users can then select
|
|
which external links are visible in their searches through the NCBI Cubby service (see
|
|
above). (See the <a href="/books/n/handbook/ch17/?report=reader">LinkOut</a> chapter or <a href="/entrez/linkout/doc/linkoutoverview.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">web
|
|
page</a>.)</p></dd><dt id="app95">locus</dt><dd><p>In a genomic contect, locus refers to position on a chromosome. It may, therefore,
|
|
refer to a marker, a gene, or any other landmark that can be described.</p></dd><dt id="app98">MACAW</dt><dd><p>Multiple Alignment Construction and Analysis Workbench. MACAW is a program for
|
|
locating, analyzing, and editing blocks of localized sequence similarity among multiple
|
|
seqences and linking them into a composite multiple alignment.</p></dd><dt id="app99">Map Viewer</dt><dd><p>The Map Viewer is a software component of <a href="/entrez/query.fcgi?db=Genome" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Entrez
|
|
Genomes</a> that provides special browsing capabilities for a subset of organisms.
|
|
It allows one to view and search an organism's complete genome, display chromosome maps,
|
|
and zoom into progressively greater levels of detail, down to the sequence data for a
|
|
region of interest. If multiple maps are available for a chromosome, it displays them
|
|
aligned to each other based on shared marker and gene names and, for the sequence maps,
|
|
based on a common sequence coordinate system. The organisms currently represented in the
|
|
Map Viewer are listed in the <a href="/PMGifs/Genomes/MapViewerHelp.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Entrez Map
|
|
Viewer help document</a>, which provides general information on how to use that
|
|
tool. The number and types of available maps vary by organism and are described in the
|
|
“data and search tips” file provided for each organism.</p></dd><dt id="app100">MB</dt><dd><p>megabytes</p></dd><dt id="app102">MEDLINE</dt><dd><p>MEDLINE is <a class="def" href="/books/NBK21106/def-item/app121/">NLM</a>'s database of indexed
|
|
journal citations and abstracts in the fields of biomedicine and healthcare. It
|
|
encompasses nearly 4,500 journals published in the United States and more than 70 other
|
|
countries. (For more information, see the <a href="http://www.nlm.nih.gov/pubs/factsheets/dif_med_pub.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Fact
|
|
Sheet</a>.)</p></dd><dt id="app103">MegaBLAST</dt><dd><p>MegaBLAST is a program for aligning sequences that differ slightly as a result of
|
|
sequencing or other similar “errors”. When larger word size is used,
|
|
it is up to 10 times faster than more common sequence-similarity programs. MegaBLAST is
|
|
also able to efficiently handle much longer DNA sequences than the <a class="def" href="/books/NBK21106/def-item/app10/">blastn</a> program of the traditional BLAST algorithm. It uses
|
|
the GREEDY algorithm for a nucleotide sequence alignment search.</p></dd><dt id="app104">MeSH</dt><dd><p>Medical Subject Headings. MeSH refers to the controlled vocabulary of <a class="def" href="/books/NBK21106/def-item/app121/">NLM</a> used for indexing articles in PubMed. MeSH
|
|
terminology provides a consistent way to retrieve information that may use different
|
|
terminology for the same concepts. (See the <a href="http://www.nlm.nih.gov/mesh/meshhome.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">MeSH homepage</a>.)</p></dd><dt id="app105">Metathesaurus</dt><dd><p>
|
|
<a href="http://ncievs.nci.nih.gov/indexMetaphrase.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Metathesaurus</a> is a National Cancer Institute browser containing different
|
|
biomedical vocabularies, including the International Classification of Diseases for
|
|
Oncology <a class="def" href="/books/NBK21106/def-item/app79/">ICD-O-3</a>.</p></dd><dt id="app106">mFASTA</dt><dd><p>Multi-FASTA format.
|
|
</p></dd><dt id="app107">MGC</dt><dd><p>Mammalian Gene Collection. <a href="http://mgc.nci.nih.gov/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">MGC</a> is a project of the <a class="def" href="/books/NBK21106/def-item/app120/">NIH</a> to
|
|
provide a complete set of full-length (open reading frame) sequences and cDNA clones of
|
|
expressed genes for human and mouse.</p></dd><dt id="app108">MGD</dt><dd><p>
|
|
<a href="http://www.informatics.jax.org/mgihome/MGD/aboutMGD.shtml" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Mouse Genome Database</a>. MGD contains information on mouse genetic markers,
|
|
molecular segments, phenotypes, comparative mapping data, experimental mapping data, and
|
|
graphical displays for genetic, physical, and cytogenetic maps.</p></dd><dt id="app109">MGI</dt><dd><p>
|
|
<a href="http://www.informatics.jax.org/mgihome/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Mouse Genome
|
|
Informatics</a>. MGI houses a database that provides integrated access to data
|
|
on the genetics, genomics, and biology of the laboratory mouse.</p></dd><dt id="app110">microsatellite</dt><dd><p>Repetitive stretches of short sequences of DNA used as genetic markers to track
|
|
inheritance in families (e.g., CC[TATATATA]CCCT). Also known as short tandem repeats
|
|
(STRs).</p></dd><dt id="app111">MIM</dt><dd><p>Mendelian Inheritance in Man. First published in 1966, <i><a href="http://www.press.jhu.edu/press/books/titles/f97/f97mcme.htm" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Mendelian
|
|
Inheritance in Man (MIM)</a></i> is a genetic knowledge base that serves clinical medicine and biomedical research,
|
|
including the Human Genome Project.</p></dd><dt id="app213">minimal tiling path</dt><dd><p>An ordered list or map that defines the minimal set of overlapping clones needed to
|
|
provide complete coverage of a chromosome or other extended segment of DNA (compare with
|
|
<a class="def" href="/books/NBK21106/def-item/app182/">tiling path</a>).</p></dd><dt id="app112">MMDB</dt><dd><p>Molecular Modeling Database. MMDB is a database of three-dimensional biomolecular
|
|
structures derived from X-ray crystallography and nuclear magnetic resonance (NMR)
|
|
spectroscopy.</p></dd><dt id="app113">MMDB-ID</dt><dd><p>Molecular Modeling Database Accession number.</p></dd><dt id="app114">mRNA</dt><dd><p>messenger RNA. mRNA describes the section of a genomic DNA sequence that is
|
|
transcribed, and can include the 5' untranslated region (5'UTR), <a class="def" href="/books/NBK21106/def-item/app22/">CDS</a>, and 3' untranslated region (3'UTR). Successful
|
|
translation of the CDS section of an mRNA results in the synthesis of a protein.</p></dd><dt id="app115">mutation</dt><dd><p>A permanent structural alteration in DNA. In most cases, DNA changes have either no
|
|
effect or cause harm, but occasionally a mutation can improve an organism's chance of
|
|
surviving, and the beneficial change is passed on to the organism's descendants.
|
|
Typically, mutations are more rare than polymorphisms in population samples because
|
|
natural selection recognizes their lower fitness and removes them from the
|
|
population.</p></dd><dt id="app116">NCBI</dt><dd><p>
|
|
<a href="http://www.ncbi.nlm.nih.gov/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">National Center for
|
|
Biotechnology Information</a>
|
|
</p></dd><dt id="app117">NCBI Toolkit</dt><dd><p>Contains supported software tools from the Information Engineering Branch (IEB) of the
|
|
NCBI. The NCBI Toolkit describes the three components of the ToolBox: data model, data
|
|
encoding, and programming libraries. Provides access to documentation for the DataModel,
|
|
C Toolkit, C++ Toolkit, NCBI C Toolkit Source Browser, XML Demo Program, XML DTDs, and
|
|
the <a href="ftp://ftp.ncbi.nih.gov/toolbox/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">FTP site</a>.</p></dd><dt id="app118">NCI</dt><dd><p>
|
|
<a href="http://www.nci.nih.gov/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">National Cancer Institute</a>
|
|
</p></dd><dt id="app119">NEXUS</dt><dd><p>NEXUS refers to a file format designed to contain data for processing by computer
|
|
programs. NEXUS files should end with .nxs or .nex for purposes of clarity (<a href="/pubmed?term=11975335" ref="pagearea=body&targetsite=entrez&targetcat=term&targettype=pubmed">Maddison et al., Syst Biol 46:590-621;
|
|
1997</a>).</p></dd><dt id="app120">NIH</dt><dd><p>
|
|
<a href="http://www.nih.gov/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">National Institutes of Health</a>
|
|
</p></dd><dt id="app121">NLM</dt><dd><p>
|
|
<a href="http://www.nlm.nih.gov/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">National Library of Medicine</a>
|
|
</p></dd><dt id="app122">NMR</dt><dd><p>Nuclear Magnetic Resonance. NMR is a spectroscopic technique used for the determination
|
|
of protein structure.</p></dd><dt id="app123">nr-PDB</dt><dd><p>non-redundant Protein Data Bank</p></dd><dt id="app124">OMIM</dt><dd><p>Online Mendelian Inheritance in Man. OMIM is a directory of human genes and genetic
|
|
disorders, with links to literature references, sequence records, maps, and related
|
|
databases.</p></dd><dt id="app125">ortholog</dt><dd><p>Orthology describes genes in different species that derive from a single ancestral gene
|
|
in the last common ancestor of the respective species.</p></dd><dt id="app126">orthology</dt><dd><p>Orthology describes genes in different species that derive from a common ancestor,
|
|
i.e., they are direct evolutionary counterparts.</p></dd><dt id="app127">paralog</dt><dd><p>A paralog is one of a set of homologous genes that have diverged from each other as a
|
|
consequence of gene duplication. For example, the mouse α-<i>globin</i> and β-<i>globin</i> genes are paralogs. The relationship
|
|
between mouse α-<i>globin</i> and chick β-<i>globin</i> is also considered paralogous.</p></dd><dt id="app128">paralogy</dt><dd><p>Paralogy describes the relationship of homologous genes that arose by gene
|
|
duplication.</p></dd><dt id="app129">PCR</dt><dd><p>Polymerase Chain Reaction. A technique for amplifying a specific DNA segment in a
|
|
complex mixture. Also present in the DNA mixture are short oligonucleotide primers to
|
|
the DNA segment of interest and reagents for DNA synthesis. PCR relies on the ability of
|
|
DNA to separate into its two complementary strands at high temperature (a process called
|
|
denaturation) and for the two strands to anneal at an optimal lower temperature
|
|
(annealing). The annealing phase is followed by a DNA synthesis step at an optimal
|
|
temperature for a heat-stable DNA polymerase. After multiple rounds of denaturation,
|
|
annealing, and DNA synthesis, the DNA sequence specified by the oligonucleotide primers
|
|
is amplified.</p></dd><dt id="app130">PDB</dt><dd><p>
|
|
<a href="http://www.rcsb.org/pdb/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein Data Bank</a>. The PDB
|
|
is a database for 3D macromolecular structure data.</p></dd><dt id="app131">Pfam</dt><dd><p>
|
|
<a href="http://pfam.wustl.edu/index.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Pfam</a> is a database
|
|
housing a large collection of multiple sequence alignments and hidden Markov models
|
|
covering many common protein domains.</p></dd><dt id="app132">phenotype</dt><dd><p>The observable traits or characteristics of an organism, e.g., hair color, weight, or
|
|
the presence or absence of a disease. Phenotypic traits are not necessarily
|
|
genetic.</p></dd><dt id="app133">PHRAP</dt><dd><p>A computer program that assembles raw sequence into sequence contigs (see above) and
|
|
assigns to each position in the sequence an associated “quality
|
|
score”, on the basis of the <a class="def" href="/books/NBK21106/def-item/app134/">PHRED</a> scores of the raw sequence reads. A PHRAP quality score of <i>X</i> corresponds to an error probability of approximately 10<sup>-<i>X</i>/10</sup>
|
|
. Thus, a PHRAP quality score of 30 corresponds to 99.9% accuracy
|
|
for a base in the assembled sequence.
|
|
</p></dd><dt id="app134">PHRED</dt><dd><p>A computer program that analyses raw sequence to produce a “base
|
|
call” with an associated “quality score” for each position
|
|
in the sequence. A PHRED quality score of <i>X</i> corresponds to an error
|
|
probability of approximately 10<sup>-<i>X</i>/10</sup>
|
|
. Thus, a PHRED quality score of 30 corresponds to
|
|
99.9% accuracy for the base call in the raw
|
|
read.
|
|
</p></dd><dt id="app135">phyletic pattern</dt><dd><p>Pattern of presence–absence of a cluster of orthologs (COG) in different
|
|
species.</p></dd><dt id="app136">PHYLIP</dt><dd><p>
|
|
<a href="http://evolution.genetics.washington.edu/phylip.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">PHYLogeny Inference Package</a>. A package of programs for various computer
|
|
platforms to infer phylogenies or evolutionary trees, freely available from the
|
|
Web.</p></dd><dt id="app137">PIR</dt><dd><p>
|
|
<a href="http://pir.georgetown.edu/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein Information
|
|
Resource</a>
|
|
</p></dd><dt id="app138">PMC</dt><dd><p>
|
|
<a href="http://www.pubmedcentral.nih.gov/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">PubMed Central</a>.
|
|
NLM's digital archive of life sciences journal literature.</p></dd><dt id="app139">PMID</dt><dd><p>PubMed ID number</p></dd><dt id="app140">PNG</dt><dd><p>Portable Network Graphics. An extensible file format for the lossless, well-compressed
|
|
storage of raster images (images that are composed of horizontal lines of pixels, such
|
|
as those created by a computer screen). Compression of image, media, and application
|
|
files is necessary to reduce the transmission time across the web. The technique of
|
|
lossless compression reduces the size of the file without sacrificing any original data,
|
|
and the image after expansion is exactly as it was before compression. PNG overcomes the
|
|
patent issues of GIF (Graphic Interchange Format) and can replace many common uses of
|
|
TIFF (Tagged Image File Format). Several features such as indexed color, grayscale, and
|
|
truecolor are supported, as well as an optional alpha-channel. PNG is designed to work
|
|
well in online viewing applications and is supported as an image standard by the
|
|
<a class="def" href="/books/NBK21106/def-item/app197/">WWW</a>.</p></dd><dt id="app141">poly A</dt><dd><p>A string of adenylic acid residues that are added to the 3′ end of the
|
|
primary <a class="def" href="/books/NBK21106/def-item/app114/">mRNA</a> transcript. Poly(A)
|
|
polymerase is the enzyme that adds the poly A tail, which is between 100 and 250 bases
|
|
long.</p></dd><dt id="app142">polymorphism</dt><dd><p>A common variation in the sequence of <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a>
|
|
among individuals. Genetic variations occurring in more than 1% of the
|
|
population would be considered useful polymorphisms for genetic linkage analysis.</p></dd><dt id="app204">polypeptide</dt><dd><p>Linear polymer of amino acids connected by peptide bonds. Proteins are large
|
|
polypeptides, and the two terms are commonly used interchangeably.</p></dd><dt id="app143">PRF</dt><dd><p>
|
|
<a href="http://www.prf.or.jp/en/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein Research Foundation</a>
|
|
</p></dd><dt id="app144">private polymorphism</dt><dd><p>Variations that are only common in specific populations. Usually such populations are
|
|
reproductively isolated from other, larger groups. These variations may be completely
|
|
absent in other groups.</p></dd><dt id="app145">ProtEST</dt><dd><p>A database of protein sequences from eight organisms: human (<i>Homo
|
|
sapiens</i>), mouse (<i>Mus musculus</i>), rat (<i>Rattus
|
|
norvegicus</i>), fruitfly (<i>Drosophila melanogaster</i>), worm (<i>Caenorhabditis elegans</i>), yeast (<i>Saccharomyces cerevisiae</i>),
|
|
plant (<i>Arabidopsis thaliana</i>), and bacteria (<i>Escherichia
|
|
coli</i>). (See the <a href="/UniGene/ProtEST/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">ProtEST web
|
|
page</a>.)</p></dd><dt id="app146">PROW</dt><dd><p>
|
|
<a href="/PROW/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Protein Reviews On the
|
|
Web</a>. An online resource that features PROW Guides—authoritative,
|
|
short, structured reviews on proteins and protein families. The Guides provide
|
|
approximately 20 standardized categories of information (abstract, biochemical function,
|
|
ligands, references, etc.) for each protein.</p></dd><dt id="app147">pseudogene</dt><dd><p>A sequence of DNA that is very similar to a normal gene but that has been altered
|
|
slightly so that it is not expressed. Such genes were probably once functional but, over
|
|
time, acquired one or more mutations that rendered them incapable of producing a protein
|
|
product.</p></dd><dt id="app148">PSI-BLAST</dt><dd><p>Position-Specific Iterated BLAST. PSI-BLAST (<a href="/pubmed?term=9254694" ref="pagearea=body&targetsite=entrez&targetcat=term&targettype=pubmed">Altschul et al., J Mol Biol 215:403-410; 1990</a>) is used for iterative
|
|
protein–sequence similarity searches using a position-specific score matrix
|
|
(<a class="def" href="/books/NBK21106/def-item/app149/">PSSM</a>). It is a program for
|
|
searching protein databases using protein queries to find other members of the same
|
|
protein family. All statistically significant alignments found by <a class="def" href="/books/NBK21106/def-item/app9/">BLAST</a> are combined into a multiple alignment, from which a
|
|
PSSM is constructed. This matrix is used to search the database for additional
|
|
significant alignments, and the process may be iterated until no new alignments are
|
|
found.</p></dd><dt id="app149">PSSM</dt><dd><p>Position-Specific Score Matrix. The PSSM gives the log-odds score for finding a
|
|
particular matching amino acid in a target sequence.</p></dd><dt id="app150">PubMed</dt><dd><p>A retrieval system containing citations, abstracts, and indexing terms for journal
|
|
articles in the biomedical sciences. It includes literature citations supplied directly
|
|
to NCBI by publishers as well as <a class="def" href="/books/NBK21106/def-item/app190/">URL</a>s to
|
|
full text articles on the publishers' web sites. PubMed contains the complete contents
|
|
of the <a class="def" href="/books/NBK21106/def-item/app102/">MEDLINE</a> and PREMEDLINE databases.
|
|
It also contains some articles and journals considered out of scope for MEDLINE, based
|
|
on either content or on a period of time when the journal was not indexed and,
|
|
therefore, is a superset of
|
|
MEDLINE.
|
|
</p></dd><dt id="app151">PXML</dt><dd><p>PubMed Central XML file</p></dd><dt id="app152">QBLAST</dt><dd><p>A queuing system to BLAST that allows users to retrieve their results at their
|
|
convenience and format their results multiple times with different formatting
|
|
options.</p></dd><dt id="app153">QTL</dt><dd><p>Quantitative Trait Locus. A QTL is a hypothesis that a certain region of the chromosome
|
|
contains genes that contribute significantly to the expression of a complex trait. QTLs
|
|
are generally identified by comparing the linkage of polymorphic molecular markers and
|
|
phenotypic trait measurements. The density of the linkage map is important in the
|
|
accurate and precise location of QTLs; the higher the map density, the more precise the
|
|
location of the putative QTL, although there is increased likelihood that false
|
|
positives will be detected. Once QTLs have been mapped to a relatively small chromosomal
|
|
region, other molecular methods can be used to isolate specific genes.</p></dd><dt id="app154">RCSB</dt><dd><p>
|
|
<a href="http://www.rcsb.org/index.html" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Research Collaboratory for
|
|
Structural Bioinformatics</a>. RCSB is a nonprofit consortium that works toward
|
|
the elucidation of biological, macromolecular, 3-D structures.</p></dd><dt id="app215">Reciprocal best hits</dt><dd><p>Reciprocal best hits are proteins from different organisms that are each other's top
|
|
BLAST hit, when the proteomes from those organisms are compared to each other. For
|
|
example, proteins A–Z in organism 1 are compared against proteins
|
|
AA–ZZ in organism 2. If protein A has a best hit to protein RR, and RR's best
|
|
hit, when it is compared to all the proteins in organism 1, also turns out to protein A,
|
|
then A and RR are reciprocal best hits. However, if RR's best hit is to B rather than to
|
|
A, then A and RR are not reciprocal best hits.</p></dd><dt id="app155">RefSeq</dt><dd><p>RefSeq is the NCBI database of reference sequences; a curated, non-redundant set
|
|
including genomic DNA contigs, mRNAs and proteins for known genes, and entire
|
|
chromosomes.</p></dd><dt id="app218">RepeatMasker</dt><dd><p>
|
|
<a href="http://ftp.genome.washington.edu/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Program</a> that
|
|
screens DNA sequences for interspersed repeats and low-complexity DNA sequences.</p></dd><dt id="app156">RFLP</dt><dd><p>Restriction Fragment Length Polymorphism. Genetic variations at the site where a
|
|
restriction enzyme cuts a piece of DNA. Such variations affect the size of the resulting
|
|
fragments. These sequences can be used as markers on physical maps and linkage maps.
|
|
RFLP is also pronounced “rif lip”.</p></dd><dt id="app157">RH map</dt><dd><p>Radiation Hybrid map. A genome map in which <a class="def" href="/books/NBK21106/def-item/app173/">STS</a>s are positioned relative to one another on the basis of the frequency
|
|
with which they are separated by radiation-induced breaks. The frequency is assayed by
|
|
analyzing a panel of human–hamster hybrid cell lines. These hybrids are
|
|
produced by irradiating human cells, which damages the cells and fragments the DNA. The
|
|
dying human cells are fused with thymidine kinase negative (TK−) live hamster
|
|
cells. The fused cells are grown under conditions that select against hamster cells and
|
|
favor the growth of hybrid cells that have taken up the human <i>TK</i> gene. In
|
|
the RH maps, the unit of distance is centirays (cR), denoting a 1% chance of a
|
|
break occurring between two loci.</p></dd><dt id="app158">RNA</dt><dd><p>Ribonucleic Acid. A single-stranded nucleic acid, similar to <a class="def" href="/books/NBK21106/def-item/app37/">DNA</a>, but having a ribose sugar, instead of deoxyribose, and
|
|
uracil instead of thymine as one of its bases.</p></dd><dt id="app159">RPS-BLAST</dt><dd><p>Reverse Position-Specific BLAST. A program used to identify conserved domains in a
|
|
protein query sequence. It does this by comparing a query protein sequence to
|
|
position-specific score matrices (<a class="def" href="/books/NBK21106/def-item/app149/">PSSM</a>)s
|
|
that have been prepared from conserved domain alignments. RPS-BLAST is a
|
|
“reverse” version of position-specific iterated BLAST (<a class="def" href="/books/NBK21106/def-item/app148/">PSI-BLAST</a>); however, RPS-BLAST compares a
|
|
query sequence against a database of profiles prepared from ready-made alignments,
|
|
whereas PSI-BLAST builds alignments starting from a single protein sequence.</p></dd><dt id="app160">SAGE</dt><dd><p>Serial Analysis of Gene Expression. An experimental technique designed to
|
|
quantitatively measure gene expression.</p></dd><dt id="app161">Sequin</dt><dd><p>Sequin is a stand-alone software tool developed by the <a class="def" href="/books/NBK21106/def-item/app116/">NCBI</a> for submitting and updating entries to the <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a>, <a class="def" href="/books/NBK21106/def-item/app44/">EMBL</a>, or <a class="def" href="/books/NBK21106/def-item/app35/">DDBJ</a> sequence
|
|
databases. It is capable of handling simple submissions that contain a single, short
|
|
mRNA sequence and complex submissions containing long sequences, multiple annotations,
|
|
segmented sets of DNA, or phylogenetic and population
|
|
studies.
|
|
</p></dd><dt id="app162">SGD</dt><dd><p>Saccharomyces Genome Database. A database for the molecular biology and genetics of <i>Saccharomyces cerevisceae</i>, also known as baker's yeast.</p></dd><dt id="app163">SGML</dt><dd><p>Standard Generalized Markup Language. The international standard for specifying the
|
|
structure and content of electronic documents. SGML is used for the markup of data in a
|
|
way that is self-describing. SGML is not a language but a way of defining languages that
|
|
are developed along its general principles. A subset of SGML called <a class="def" href="/books/NBK21106/def-item/app198/">XML</a> is more widely used for the markup of
|
|
data. <a class="def" href="/books/NBK21106/def-item/app75/">HTML</a> (Hypertext Markup Language) is
|
|
based on SGML and uses some of its concepts to provide a universal markup language for
|
|
the display of information and the linking of different pieces of that
|
|
information.</p></dd><dt id="app164">SKY</dt><dd><p>Spectral Karyotyping. SKY is a technique that allows for the visualization of all of an
|
|
organism's chromosomes together, each labeled with a different color. This is achieved
|
|
by using chromosome-specific, single-stranded DNA probes (each labeled with a different
|
|
fluorophore) to hybridize or bind to the chromosomes of a cell; resulting in each
|
|
chromosome being painted a different color. This technique is useful for identifying
|
|
chromosome abnormalities because it is easy to spot instances where a chromosome painted
|
|
in one color has a small piece of another chromosome, painted in a different color,
|
|
attached to it. (Also see <a class="def" href="/books/NBK21106/def-item/app56/">FISH</a>, <a class="def" href="/books/NBK21106/def-item/app24/">CGH</a>.)</p></dd><dt id="app165">SKYGRAM</dt><dd><p>1. A software tool to automatically convert the short-form karyotype into an image
|
|
representation of a cell or clone, with each chromosome displayed in a different color,
|
|
with band overlay. The program will also incorporate the number of cells for each
|
|
structural abnormality, which is displayed in brackets. 2. The full ideogram or a cell
|
|
or clone, with each chromosome displayed in a different color, with band overlay.</p></dd><dt id="app166">SMART</dt><dd><p>Simple Modular Architecture Research Tool. A tool to allow automatic identification and
|
|
annotation of domains in user-supplied protein sequences. For example, the <a class="def" href="/books/NBK21106/def-item/app175/">SWISS-PROT</a> database is an extensively
|
|
annotated and nonredundant collection of protein sequences. SWISS-PROT annotations have
|
|
been mined for SMART-derived annotations of alignments.</p></dd><dt id="app167">SMD</dt><dd><p>
|
|
<a href="http://genome-www5.stanford.edu/MicroArray/SMD/index.shtml" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Stanford Microarray Database</a>. SMD stores raw and normalized data from
|
|
microarray experiments, as well as their corresponding image files. In addition, the SMD
|
|
provides interfaces for data retrieval, analysis, and visualization. Data are released
|
|
to the public at the researcher's discretion or upon publication.</p></dd><dt id="app168">SNP</dt><dd><p>Common, but minute, variations that occur in human DNA at a frequency of 1 every 1,000
|
|
bases. An SNP is a single base-pair site within the genome at which more than one of the
|
|
four possible base pairs is commonly found in natural populations. Several hundred
|
|
thousand SNP sites are being identified and mapped on the sequence of the genome,
|
|
providing the densest possible map of genetic differences. SNP is pronounced
|
|
“snip”.</p></dd><dt id="app169">SOFT</dt><dd><p>Simple Omnibus Format in Text. SOFT is an ASCII text format that was designed to be a
|
|
machine-readable representation of data retrieved from, or submitted to, the Gene
|
|
Expression Omnibus (<a class="def" href="/books/NBK21106/def-item/app66/">GEO</a>). SOFT is also a
|
|
line-based format, making it easy to parse, using commonly available text processing and
|
|
formatting languages. (For examples of SOFT, see the <a href="/geo/info/soft.cgi" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">guide</a>.)</p></dd><dt id="app170">splice sites</dt><dd><p>Refers to the location of the exon-intron junctions in a pre-mRNA (i.e., the primary
|
|
transcript that must undergo additional processing to become a mature RNA for
|
|
translation into a protein). Splice sites can be determined by comparing the sequence of
|
|
genomic DNA with that of the <a class="def" href="/books/NBK21106/def-item/app21/">cDNA</a>
|
|
sequence. In mRNA, introns (non-protein coding regions) are removed by the splicing
|
|
machinery; however, exons can also be removed. Depending on which exons (or parts of
|
|
exons) are removed, different proteins can be made from the same initial RNA or gene.
|
|
Different proteins created in this way are “splice variants” or
|
|
“alternatively spliced”.</p></dd><dt id="app171">SSAHA</dt><dd><p>Sequence Search and Alignment by Hashing Algorithm. SSAHA is a software tool for very
|
|
fast matching and alignment of DNA sequences and is used for searching databases
|
|
containing large amounts (gigabases) of genome sequence. It achieves its fast search
|
|
speed by converting sequence information into a “hash table” data
|
|
structure, which can then be searched very rapidly for matches (<a href="/pubmed?term=11591649" ref="pagearea=body&targetsite=entrez&targetcat=term&targettype=pubmed">Ning et al., Genome Res 11:1725-1729; 2001</a>).</p></dd><dt id="app172">SSLP</dt><dd><p>Simple Sequence Length Polymorphisms. SSLPs are markers based on the variation in the
|
|
number of short tandem repeats in DNA.</p></dd><dt id="app173">STS</dt><dd><p>A short DNA segment that occurs only once in the human genome, the exact location and
|
|
order of bases of which are known. Because each is unique, STSs are helpful for
|
|
chromosome placement of mapping and sequencing data from many different laboratories.
|
|
STSs serve as landmarks on the physical map of the human
|
|
genome.
|
|
</p></dd><dt id="app174">substitution matrix</dt><dd><p>A substitution matrix containing values proportional to the probability that amino acid
|
|
i mutates into amino acid j for all pairs of amino acids. Such matrices are constructed
|
|
by assembling a large and diverse sample of verified pairwise alignments of amino acids.
|
|
If the sample is large enough to be statistically significant, the resulting matrices
|
|
should reflect the true probabilities of mutations occurring through a period of
|
|
evolution. (See also <a class="def" href="/books/NBK21106/def-item/app15/">BLOSUM 62</a>.)</p></dd><dt id="app175">SWISS-PROT</dt><dd><p>
|
|
<a href="http://www.ebi.ac.uk/swissprot/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">SWISS-PROT</a> is a
|
|
curated protein sequence database that provides a high level of annotation (such as the
|
|
description of protein function, domain structures, post-translational modifications,
|
|
variants, etc.), a minimal level of redundancy, and high level of integration with other
|
|
databases.</p></dd><dt id="app176">Sybase</dt><dd><p>A trademarked family of products that include databases, development tools, integration
|
|
middleware, enterprise portals, and mobile and wireless servers.</p></dd><dt id="app177">synteny</dt><dd><p>On the same strand. The phrase “conserved synteny” refers to
|
|
conserved gene order on chromosomes of different, related species.</p></dd><dt id="app178">Tax BLAST</dt><dd><p>BLAST Taxonomy Reports page. Tax BLAST groups BLAST hits by source organism, according
|
|
to information in <a class="def" href="/books/NBK21106/def-item/app116/">NCBI</a>'s Taxonomy
|
|
database. Species are listed in order of sequence similarity with the query sequence,
|
|
the strongest match listed first.
|
|
</p></dd><dt id="app179">taxID</dt><dd><p>Taxonomy Identifier. The taxID is a stable unique identifier for each taxon (for a
|
|
species, a family, an order, or any other group in the taxonomy database). The taxID is
|
|
seen in the <a class="def" href="/books/NBK21106/def-item/app62/">GenBank</a> records as a
|
|
“source” feature table entry; for example,
|
|
/db_xref=“taxon:<9606>” is the taxID for <i>Homo sapiens</i>, and the line is therefore found in all recent human sequence
|
|
records.
|
|
</p></dd><dt id="app180">taxid</dt><dd><p>See <a class="def" href="/books/NBK21106/def-item/app179/">taxID</a>.</p></dd><dt id="app205">termination codon or stop codon</dt><dd><p>One of three codons that do not specify any amino acid and hence causes translation of
|
|
mRNA into protein to be terminated. These codons mark the end of a protein coding
|
|
sequence.</p></dd><dt id="app181">TIGR</dt><dd><p>
|
|
<a href="http://www.tigr.org" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">The Institute for Genomic
|
|
Research</a>
|
|
</p></dd><dt id="app182">tiling path</dt><dd><p>An ordered list or map that defines a set of overlapping clones that covers a
|
|
chromosome or other extended segment of DNA.</p></dd><dt id="app183">TPA</dt><dd><p>Third-Party Annotation
|
|
</p></dd><dt id="app214">TPF</dt><dd><p>Tiling Path Format. A table format used to specify the set of clones that will provide
|
|
the best possible sequence coverage for a particular chromosome, the order of the clones
|
|
along the chromosome, and the location of any gaps in the clone tiling path. Also used
|
|
to refer to a file (Tiling Path File) in which the <a class="def" href="/books/NBK21106/def-item/app213/">minimal tiling path</a> of clones covering a chromosome is specified in
|
|
Tiling Path Format or to the minimal tiling path of clones so defined.</p></dd><dt id="app207">translation start site</dt><dd><p>The position within an mRNA at which synthesis of a protein begins. The translation start site is usually an AUG codon, but occasionally, GUG or CUG codons are used to
|
|
initiate protein synthesis.</p></dd><dt id="app184">UID</dt><dd><p>Unique Identifier
|
|
</p></dd><dt id="app185">UMLS</dt><dd><p>
|
|
<a href="http://www.nlm.nih.gov/research/umls/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Unified Medical
|
|
Language System</a>. A project of the National Library of Medicine for the
|
|
development and distribution of multipurpose, electronic “Knowledge
|
|
Sources”, and associated lexical programs. The purpose of the UMLS is to aid
|
|
the development of systems that help health professionals and researchers retrieve and
|
|
integrate electronic biomedical information from a variety of sources and to make it
|
|
easy for users to link disparate information systems, including computer-based patient
|
|
records, bibliographic databases, factual databases, and expert systems.</p></dd><dt id="app186">unfinished sequence</dt><dd><p>See <a class="def" href="/books/NBK21106/def-item/app39/">draft sequence</a>.</p></dd><dt id="app187">UniGene cluster</dt><dd><p>
|
|
<a class="def" href="/books/NBK21106/def-item/app46/">EST</a>s and full-length mRNA sequences
|
|
organized into clusters such that each represents a unique known or putative gene within
|
|
the organism from which the sequences were obtained. UniGene clusters are annotated with
|
|
mapping and expression information when possible (e.g., for human) and include
|
|
cross-references to other resources. Sequence data can be downloaded by cluster through
|
|
the UniGene web pages, or the complete dataset can be downloaded from the <a href="ftp://ftp.ncbi.nih.gov/repository/UniGene/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=ftp">repository/UniGene
|
|
directory</a> of the FTP site.</p></dd><dt id="app188">UniSTS</dt><dd><p>
|
|
<a href="/entrez/query.fcgi?db=unists" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">UniSTS</a> presents a unified, non-redundant view of sequence-tagged sites
|
|
(<a class="def" href="/books/NBK21106/def-item/app173/">STS</a>s). UniSTS integrates marker and
|
|
mapping data from a variety of public resources. If two or more markers have different
|
|
names but the same primer pair, a single STS record is presented for the primer pair,
|
|
and all the marker names are shown.</p></dd><dt id="app189">UNIX</dt><dd><p>UNIX is an operating system that was developed by Dennis Ritchie and Kenneth Thompson
|
|
at Bell Labs more than 30 years ago. It allows multitasking and multiuser capabilities
|
|
and offers portability with other operating systems. It comes with hundreds of programs
|
|
that are of two types: integral utilites, such as the command line interpreter; and
|
|
tools such as email, which are not necessary for the operation of UNIX but provide
|
|
additional capabilities to the user. It is functionally organized at three levels: the
|
|
kernel, which schedules tasks and manages storage; the shell, which connects and
|
|
interprets user's commands, calls programs from memory, and executes them; and tools and
|
|
applications, which offer additional functionality to the operating system, such as word
|
|
processing and business applications. UNIX<sup>®</sup> was registered by <a href="http://www.lucent.com/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Bell Laboratories</a> as a trademark
|
|
for computer operating systems. Today, this mark is owned by <a href="http://www.opengroup.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">The Open Group</a>.</p></dd><dt id="app190">URL</dt><dd><p>Uniform Resource Locator. The address of a resource on the Internet. URL syntax is in
|
|
the form of protocol://host/localinfo, where “protocol” specifies
|
|
the means of fetching the object (such as HTTP, used by <a class="def" href="/books/NBK21106/def-item/app197/">WWW</a> browsers and servers to exchange information, or
|
|
<a class="def" href="/books/NBK21106/def-item/app58/">FTP</a>), “host”
|
|
specifies the remote location where the object resides, and
|
|
“localinfo” is a string (often a file name) passed to the protocol
|
|
handler at the remote location. Also called Uniform Resource Identifier (URI).</p></dd><dt id="app191">UTF-8</dt><dd><p>UCS (Universal Character Set) Transformation Format. An AscII-preserving encoding
|
|
method for Unicode (a standard to provide a unique number for every character
|
|
irrespective of the platform, program, or language).</p></dd><dt id="app192">UTR</dt><dd><p>Untranslated Region. The 3′ UTR is that portion of an <a class="def" href="/books/NBK21106/def-item/app114/">mRNA</a> from the position of the last codon that is used in
|
|
translation to the 3′ end. The 5′ UTR is that portion of an mRNA
|
|
from the 5′ end to the position of the first codon used in translation.</p></dd><dt id="app193">VAST</dt><dd><p>
|
|
<a href="/Structure/VAST/vast.shtml" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">Vector
|
|
Alignment Search Tool</a>. A computer algorithm used to identify similar protein
|
|
3D structures.</p></dd><dt id="app194">weight</dt><dd><p>An assignment of importance to a term in a search query. If a term in a search query is
|
|
found to match a word in a document, that word is given a “weight”.
|
|
The exact weight of the word will depend on the emphasis given to the word by the author
|
|
or its position in the document. For example, a word that occurs in a chapter title will
|
|
have a higher weight than the same word if it occurs in the body of the chapter.
|
|
Similarly, words that occur in data collections are also assigned weights, depending on
|
|
how frequently the terms occur in the collection.
|
|
|
|
</p></dd><dt id="app195">WGS sequence</dt><dd><p>Whole Genome Shotgun sequence. In this semi-automated sequencing technique,
|
|
high-molecular-weight DNA is sheared into random fragments, size selected (usually 2,
|
|
10, 50, and 150 kb), and cloned into an appropriate vector. The clones are then
|
|
sequenced from both ends. The two ends of the same clone are referred to as mate pairs.
|
|
The distance between two mate pairs can be inferred if the library size is known and has
|
|
a narrow window of deviation. The sequences are aligned using sequence assembly
|
|
software. Proponents of this approach argue that it is possible to sequence the whole
|
|
genome at once using large arrays of sequencers, which makes the whole process much more
|
|
efficient than the traditional approaches.</p></dd><dt id="app196">WHO</dt><dd><p>
|
|
<a href="http://www.who.int/en/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">World Health Organization</a>
|
|
</p></dd><dt id="app197">WWW</dt><dd><p>World Wide Web. A <a href="http://www.w3.org/" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">consortium</a>
|
|
(W3C) that develops technologies such specifications, guidelines, software, and tools
|
|
for the internet.</p></dd><dt id="app198">XML</dt><dd><p>Extensible Markup Language. XML describes a class of data objects called XML documents
|
|
and partially describes the behavior of computer programs that process them. XML is a
|
|
subset of SGML, and XML documents are conforming SGML documents. XML documents are made
|
|
up of storage units called entities, which contain either parsed or unparsed data.
|
|
Parsed data is made up of characters (a unit of text), some of which form character
|
|
data, and some of which form markup. Markup includes tags that provide information about
|
|
the data, i.e., a description of the structure and content of the document. Character
|
|
data comprises all the text that is not markup. XML provides a mechanism to impose
|
|
constraints on the storage layout and logical structure.</p></dd><dt id="app199">XSL</dt><dd><p>Extensible Stylesheet Language. XSL is used for the transformation of XML-based data
|
|
into HTML or other presentation formats, for display in a web browser. This is a
|
|
two-part process. First, the structure of the input XML tree must be transformed into a
|
|
new tree (e.g., HTML), allowing reordering of the elements, addition of text, and
|
|
calculations—all without modification to the source document. This process is described
|
|
by <a class="def" href="/books/NBK21106/def-item/app200/">XSLT</a>. Second, XSL-FO (XSL Formatting
|
|
Objects, an XML vocabulary for formatting) is used for formatting the output, defining
|
|
areas of the display page and their properties. In this way, the source XML document can
|
|
be maintained from the perspective of “pure content” and can be
|
|
separated from the presentation. An XML document can be delivered in different formats
|
|
to different target audiences by simply switching style sheets.</p></dd><dt id="app200">XSLT</dt><dd><p>Extensible Stylesheet Language: Transformations. XSLT is a language for transforming
|
|
the structure of an XML document. XSLT is designed for use as part of <a class="def" href="/books/NBK21106/def-item/app199/">XSL</a>, the stylesheet language for XML. A
|
|
transformation expressed in XSLT describes a sequence of template rules for transforming
|
|
a source tree into a result tree; elements from the source tree can be filtered and
|
|
reordered, and a different structure can be added. A template rule has two parts: a
|
|
pattern that is matched against nodes in the source tree; and a template that can be
|
|
instantiated to form part of the result tree. This makes XSLT a declarative language
|
|
because it is possible to specify what output should be produced when specific patterns
|
|
occur in the input, which distinguishes it from procedural programming languages, where
|
|
it is necessary to specify what tasks have to be performed in what order. XSLT makes use
|
|
of the expression language defined by XPath (a language for addressing the parts of an
|
|
XML document) for selecting elements for processing, for conditional processing, and for
|
|
generating text.</p></dd><dt id="app201">YAC</dt><dd><p>Yeast Artificial Chromosome. Extremely large segments of DNA from another species
|
|
spliced into the DNA of yeast. YACs are used to clone up to one million bases of foreign
|
|
DNA into a host cell, where the DNA is propagated along with the other chromosomes of
|
|
the yeast cell.</p></dd><dt id="app202">ZFIN</dt><dd><p>Zebrafish Information Network. <a href="http://zdb.wehi.edu.au:8282//" ref="pagearea=body&targetsite=external&targetcat=link&targettype=uri">ZFIN</a> is a database for the zebrafish model organism that holds information
|
|
on wild-type stocks, mutants, genes, gene expression data, and map markers.</p></dd></dl><div id="bk_toc_contnr"></div></div></div><div class="fm-sec"><h2 id="_NBK21106_pubdet_">Publication Details</h2><h3>Copyright</h3><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div></div><h3>Publisher</h3><p><a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&targetsite=external&targetcat=link&targettype=publisher">National Center for Biotechnology Information (US)</a>, Bethesda (MD)</p><h3>NLM Citation</h3><p>McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. Glossary.<span class="bk_cite_avail"></span></p></div><div class="small-screen-prev"><a href="/books/n/handbook/ch24/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="small-screen-next"></div></article></div><div id="jr-scripts"><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/libs.min.js"> </script><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.min.js"> </script></div></div>
|
|
|
|
|
|
|
|
|
|
<!-- Book content -->
|
|
|
|
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js"> </script>
|
|
|
|
|
|
<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal107 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
|
|
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
|
|
|
|
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3968615.js" snapshot="books"></script></body>
|
|
</html>
|