nih-gov/www.ncbi.nlm.nih.gov/books/NBK20259/index.html?report=reader

260 lines
37 KiB
Text

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" class="no-js no-jr">
<head>
<!-- For pinger, set start time and add meta elements. -->
<script type="text/javascript">var ncbi_startTime = new Date();</script>
<!-- Logger begin -->
<meta name="ncbi_db" content="books">
<meta name="ncbi_pdid" content="book-part">
<meta name="ncbi_acc" content="NBK20259">
<meta name="ncbi_domain" content="sef">
<meta name="ncbi_report" content="reader">
<meta name="ncbi_type" content="fulltext">
<meta name="ncbi_objectid" content="">
<meta name="ncbi_pcid" content="/NBK20259/?report=reader">
<meta name="ncbi_pagename" content="Preface - Sequence - Evolution - Function - NCBI Bookshelf">
<meta name="ncbi_bookparttype" content="section">
<meta name="ncbi_app" content="bookshelf">
<!-- Logger end -->
<!--component id="Page" label="meta"/-->
<script type="text/javascript" src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.boots.min.js"> </script><title>Preface - Sequence - Evolution - Function - NCBI Bookshelf</title>
<meta charset="utf-8">
<meta name="apple-mobile-web-app-capable" content="no">
<meta name="viewport" content="initial-scale=1,minimum-scale=1,maximum-scale=1,user-scalable=no">
<meta name="jr-col-layout" content="auto">
<meta name="jr-prev-unit" content="/books/n/sef/toc/?report=reader">
<meta name="jr-next-unit" content="/books/n/sef/A2/?report=reader">
<meta name="bk-toc-url" content="/books/n/sef/?report=toc">
<meta name="robots" content="INDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX">
<meta name="author" content="Eugene Koonin, Michael Galperin">
<meta name="citation_inbook_title" content="Sequence - Evolution - Function: Computational Approaches in Comparative Genomics">
<meta name="citation_title" content="Preface">
<meta name="citation_publisher" content="Kluwer Academic">
<meta name="citation_date" content="2003">
<meta name="citation_author" content="Eugene Koonin">
<meta name="citation_author" content="Michael Galperin">
<meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK20259/">
<link rel="schema.DC" href="http://purl.org/DC/elements/1.0/">
<meta name="DC.Title" content="Preface">
<meta name="DC.Type" content="Text">
<meta name="DC.Publisher" content="Kluwer Academic">
<meta name="DC.Contributor" content="Eugene Koonin">
<meta name="DC.Contributor" content="Michael Galperin">
<meta name="DC.Date" content="2003">
<meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK20259/">
<meta name="DC.Language" content="en">
<meta name="description" content="When the completion of the draft of the human genome sequence was announced on June 26, 2000, all the parties involved agreed that the major task of identifying the functions of all human genes was still many years ahead. In fact, even the much simpler task of mapping all the genes in the final version of the human genome sequence that should become available within the next few years remains a major problem. Identification of all protein-coding genes in the genome sequence and determination of the cellular functions of the proteins encoded in these genes can be accomplished only by combining powerful computational tools with a variety of experimental approaches from the arsenals of biochemistry, molecular biology, genetics and cell biology. Linking sequence to function and both to the evolutionary history of life is the fundamental task of new biology.">
<meta name="og:title" content="Preface">
<meta name="og:type" content="book">
<meta name="og:description" content="When the completion of the draft of the human genome sequence was announced on June 26, 2000, all the parties involved agreed that the major task of identifying the functions of all human genes was still many years ahead. In fact, even the much simpler task of mapping all the genes in the final version of the human genome sequence that should become available within the next few years remains a major problem. Identification of all protein-coding genes in the genome sequence and determination of the cellular functions of the proteins encoded in these genes can be accomplished only by combining powerful computational tools with a variety of experimental approaches from the arsenals of biochemistry, molecular biology, genetics and cell biology. Linking sequence to function and both to the evolutionary history of life is the fundamental task of new biology.">
<meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK20259/">
<meta name="og:site_name" content="NCBI Bookshelf">
<meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-sef-lrg.png">
<meta name="twitter:card" content="summary">
<meta name="twitter:site" content="@ncbibooks">
<meta name="bk-non-canon-loc" content="/books/n/sef/A1/?report=reader">
<link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK20259/">
<link href="https://fonts.googleapis.com/css?family=Archivo+Narrow:400,700,400italic,700italic&amp;subset=latin" rel="stylesheet" type="text/css">
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/libs.min.css">
<link rel="stylesheet" href="/corehtml/pmc/jatsreader/ptpmc_3.22/css/jr.min.css">
<meta name="format-detection" content="telephone=no">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css//books_print.min.css" type="text/css" media="print">
<link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_reader.min.css" type="text/css">
<style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style>
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico">
<meta name="ncbi_phid" content="CE8BABF47DB2C7810000000000E400BF.m_5">
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3849091.css"></head>
<body>
<!-- Book content! -->
<div id="jr" data-jr-path="/corehtml/pmc/jatsreader/ptpmc_3.22/"><div class="jr-unsupported"><table class="modal"><tr><td><span class="attn inline-block"></span><br />Your browser does not support the NLM PubReader view.<br />Go to <a href="/pmc/about/pr-browsers/">this page</a> to see a list of supported browsers<br />or return to the <br /><a href="/books/NBK20259/?report=classic">regular view</a>.</td></tr></table></div><div id="jr-ui" class="hidden"><nav id="jr-head"><div class="flexh tb"><div id="jr-tb1"><a id="jr-links-sw" class="hidden" title="Links"><svg xmlns="http://www.w3.org/2000/svg" version="1.1" x="0px" y="0px" viewBox="0 0 70.6 85.3" style="enable-background:new 0 0 70.6 85.3;vertical-align:middle" xml:space="preserve" width="24" height="24">
<style type="text/css">.st0{fill:#939598;}</style>
<g>
<path class="st0" d="M36,0C12.8,2.2-22.4,14.6,19.6,32.5C40.7,41.4-30.6,14,35.9,9.8"></path>
<path class="st0" d="M34.5,85.3c23.2-2.2,58.4-14.6,16.4-32.5c-21.1-8.9,50.2,18.5-16.3,22.7"></path>
<path class="st0" d="M34.7,37.1c66.5-4.2-4.8-31.6,16.3-22.7c42.1,17.9,6.9,30.3-16.4,32.5h1.7c-66.2,4.4,4.8,31.6-16.3,22.7 c-42.1-17.9-6.9-30.3,16.4-32.5"></path>
</g>
</svg> Books</a></div><div class="jr-rhead f1 flexh"><div class="head"><a href="/books/n/sef/toc/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="body"><div class="t">Preface</div><div class="j">Sequence - Evolution - Function: Computational Approaches in Comparative Genomics</div></div><div class="tail"><a href="/books/n/sef/A2/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div></div><div id="jr-tb2"><a id="jr-bkhelp-sw" class="btn wsprkl hidden" title="Help with NLM PubReader">?</a><a id="jr-help-sw" class="btn wsprkl hidden" title="Settings and typography in NLM PubReader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 512 512" preserveAspectRatio="none"><path d="M462,283.742v-55.485l-29.981-10.662c-11.431-4.065-20.628-12.794-25.274-24.001 c-0.002-0.004-0.004-0.009-0.006-0.013c-4.659-11.235-4.333-23.918,0.889-34.903l13.653-28.724l-39.234-39.234l-28.72,13.652 c-10.979,5.219-23.68,5.546-34.908,0.889c-0.005-0.002-0.01-0.003-0.014-0.005c-11.215-4.65-19.933-13.834-24-25.273L283.741,50 h-55.484l-10.662,29.981c-4.065,11.431-12.794,20.627-24.001,25.274c-0.005,0.002-0.009,0.004-0.014,0.005 c-11.235,4.66-23.919,4.333-34.905-0.889l-28.723-13.653l-39.234,39.234l13.653,28.721c5.219,10.979,5.545,23.681,0.889,34.91 c-0.002,0.004-0.004,0.009-0.006,0.013c-4.649,11.214-13.834,19.931-25.271,23.998L50,228.257v55.485l29.98,10.661 c11.431,4.065,20.627,12.794,25.274,24c0.002,0.005,0.003,0.01,0.005,0.014c4.66,11.236,4.334,23.921-0.888,34.906l-13.654,28.723 l39.234,39.234l28.721-13.652c10.979-5.219,23.681-5.546,34.909-0.889c0.005,0.002,0.01,0.004,0.014,0.006 c11.214,4.649,19.93,13.833,23.998,25.271L228.257,462h55.484l10.595-29.79c4.103-11.538,12.908-20.824,24.216-25.525 c0.005-0.002,0.009-0.004,0.014-0.006c11.127-4.628,23.694-4.311,34.578,0.863l28.902,13.738l39.234-39.234l-13.66-28.737 c-5.214-10.969-5.539-23.659-0.886-34.877c0.002-0.005,0.004-0.009,0.006-0.014c4.654-11.225,13.848-19.949,25.297-24.021 L462,283.742z M256,331.546c-41.724,0-75.548-33.823-75.548-75.546s33.824-75.547,75.548-75.547 c41.723,0,75.546,33.824,75.546,75.547S297.723,331.546,256,331.546z"></path></svg></a><a id="jr-fip-sw" class="btn wsprkl hidden" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-rtoc-sw" class="btn wsprkl hidden" title="Table of Contents"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,20h10v8H20V20zM36,20h44v8H36V20zM20,37.33h10v8H20V37.33zM36,37.33h44v8H36V37.33zM20,54.66h10v8H20V54.66zM36,54.66h44v8H36V54.66zM20,72h10v8 H20V72zM36,72h44v8H36V72z"></path></svg></a></div></div></nav><nav id="jr-dash" class="noselect"><nav id="jr-dash" class="noselect"><div id="jr-pi" class="hidden"><a id="jr-pi-prev" class="hidden" title="Previous page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a><div class="pginfo">Page <i class="jr-pg-pn">0</i> of <i class="jr-pg-lp">0</i></div><a id="jr-pi-next" class="hidden" title="Next page"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div><div id="jr-is-tb"><a id="jr-is-sw" class="btn wsprkl hidden" title="Switch between Figures/Tables strip and Progress bar"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><rect x="10" y="40" width="20" height="20"></rect><rect x="40" y="40" width="20" height="20"></rect><rect x="70" y="40" width="20" height="20"></rect></svg></a></div><nav id="jr-istrip" class="istrip hidden"><a id="jr-is-prev" href="#" class="hidden" title="Previous"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M80,40 60,65 80,90 70,90 50,65 70,40z M50,40 30,65 50,90 40,90 20,65 40,40z"></path><text x="35" y="25" textLength="60" style="font-size:25px">Prev</text></svg></a><a id="jr-is-next" href="#" class="hidden" title="Next"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M20,40 40,65 20,90 30,90 50,65 30,40z M50,40 70,65 50,90 60,90 80,65 60,40z"></path><text x="15" y="25" textLength="60" style="font-size:25px">Next</text></svg></a></nav><nav id="jr-progress"></nav></nav></nav><aside id="jr-links-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">NCBI Bookshelf</div></div><div class="cnt lol f1"><a href="/books/">Home</a><a href="/books/browse/">Browse All Titles</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://www.facebook.com/sharer/sharer.php?u=https://www.ncbi.nlm.nih.gov/books/NBK20259/"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24" preserveAspectRatio="none"><g><path d="M 17.996,32L 12,32 L 12,16 l-4,0 l0-5.514 l 4-0.002l-0.006-3.248C 11.993,2.737, 13.213,0, 18.512,0l 4.412,0 l0,5.515 l-2.757,0 c-2.063,0-2.163,0.77-2.163,2.209l-0.008,2.76l 4.959,0 l-0.585,5.514L 18,16L 17.996,32z"></path></g></svg> Share on Facebook</a><a class="btn share" target="_blank" rel="noopener noreferrer" href="https://twitter.com/intent/tweet?url=https://www.ncbi.nlm.nih.gov/books/NBK20259/&amp;text=Preface"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 33 33" style="vertical-align:middle" width="24" height="24"><g><path d="M 32,6.076c-1.177,0.522-2.443,0.875-3.771,1.034c 1.355-0.813, 2.396-2.099, 2.887-3.632 c-1.269,0.752-2.674,1.299-4.169,1.593c-1.198-1.276-2.904-2.073-4.792-2.073c-3.626,0-6.565,2.939-6.565,6.565 c0,0.515, 0.058,1.016, 0.17,1.496c-5.456-0.274-10.294-2.888-13.532-6.86c-0.565,0.97-0.889,2.097-0.889,3.301 c0,2.278, 1.159,4.287, 2.921,5.465c-1.076-0.034-2.088-0.329-2.974-0.821c-0.001,0.027-0.001,0.055-0.001,0.083 c0,3.181, 2.263,5.834, 5.266,6.438c-0.551,0.15-1.131,0.23-1.73,0.23c-0.423,0-0.834-0.041-1.235-0.118 c 0.836,2.608, 3.26,4.506, 6.133,4.559c-2.247,1.761-5.078,2.81-8.154,2.81c-0.53,0-1.052-0.031-1.566-0.092 c 2.905,1.863, 6.356,2.95, 10.064,2.95c 12.076,0, 18.679-10.004, 18.679-18.68c0-0.285-0.006-0.568-0.019-0.849 C 30.007,8.548, 31.12,7.392, 32,6.076z"></path></g></svg> Share on Twitter</a></div></aside><aside id="jr-rtoc-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Table of Content</div></div><div class="cnt lol f1"><a href="/books/n/sef/?report=reader">Title Information</a><a href="/books/n/sef/toc/?report=reader">Table of Contents Page</a></div></aside><aside id="jr-help-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Settings</div></div><div class="cnt f1"><div id="jr-typo-p" class="typo"><div><a class="sf btn wsprkl">A-</a><a class="lf btn wsprkl">A+</a></div><div><a class="bcol-auto btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 100" preserveAspectRatio="none"><text x="10" y="70" style="font-size:60px;font-family: Trebuchet MS, ArialMT, Arial, sans-serif" textLength="180">AUTO</text></svg></a><a class="bcol-1 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M15,25 85,25zM15,40 85,40zM15,55 85,55zM15,70 85,70z"></path></svg></a><a class="bcol-2 btn wsprkl"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M5,25 45,25z M55,25 95,25zM5,40 45,40z M55,40 95,40zM5,55 45,55z M55,55 95,55zM5,70 45,70z M55,70 95,70z"></path></svg></a></div></div><div class="lol"><a class="" href="/books/NBK20259/?report=classic">Switch to classic view</a><a href="/books/NBK20259/?report=printable">Print View</a></div></div></aside><aside id="jr-bkhelp-p" class="hidden flexv"><div class="tb sk-htbar flexh"><div><a class="jr-p-close btn wsprkl">Done</a></div><div class="title-text f1">Help</div></div><div class="cnt f1 lol"><a id="jr-helpobj-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/help.xml" href="">Help</a><a href="mailto:info@ncbi.nlm.nih.gov?subject=PubReader%20feedback%20%2F%20NBK20259%20%2F%20sid%3ACE8BC1E97D9F05E1_0182SID%20%2F%20phid%3ACE8BABF47DB2C7810000000000E400BF.4">Send us feedback</a><a id="jr-about-sw" data-path="/corehtml/pmc/jatsreader/ptpmc_3.22/" data-href="/corehtml/pmc/jatsreader/ptpmc_3.22/img/bookshelf/about.xml" href="">About PubReader</a></div></aside><aside id="jr-objectbox" class="thidden hidden"><div class="jr-objectbox-close wsprkl">&#10008;</div><div class="jr-objectbox-inner cnt"><div class="jr-objectbox-drawer"></div></div></aside><nav id="jr-pm-left" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Previous Page</text></svg></nav><nav id="jr-pm-right" class="hidden"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 40 800" preserveAspectRatio="none"><text font-stretch="ultra-condensed" x="800" y="-15" text-anchor="end" transform="rotate(90)" font-size="18" letter-spacing=".1em">Next Page</text></svg></nav><nav id="jr-fip" class="hidden"><nav id="jr-fip-term-p"><input type="search" placeholder="search this page" id="jr-fip-term" autocorrect="off" autocomplete="off" /><a id="jr-fip-mg" class="wsprkl btn" title="Find"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 550 600" preserveAspectRatio="none"><path fill="none" stroke="#000" stroke-width="36" stroke-linecap="round" style="fill:#FFF" d="m320,350a153,153 0 1,0-2,2l170,170m-91-117 110,110-26,26-110-110"></path></svg></a><a id="jr-fip-done" class="wsprkl btn" title="Dismiss find">&#10008;</a></nav><nav id="jr-fip-info-p"><a id="jr-fip-prev" class="wsprkl btn" title="Jump to previuos match">&#9664;</a><button id="jr-fip-matches">no matches yet</button><a id="jr-fip-next" class="wsprkl btn" title="Jump to next match">&#9654;</a></nav></nav></div><div id="jr-epub-interstitial" class="hidden"></div><div id="jr-content"><article data-type="main"><div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><div class="fm-sec"><h1 id="_NBK20259_"><span class="title" itemprop="name">Preface</span></h1><p class="contribs">Koonin E, Galperin M.</p><p class="fm-aai"><a href="#_NBK20259_pubdet_">Publication Details</a></p></div></div><div class="body-content whole_rhythm" itemprop="text"><blockquote><p>
<i>The use of genome sequences to solve biological problems has been afforded its
own label; for better or worse, it's called "functional genomics."</i>
</p><p>David J. Galas. Making Sense of the Sequence. Science, 2001, vol. 291, p. 1257</p></blockquote><p>When the completion of the draft of the human genome sequence was announced on June 26,
2000, all the parties involved agreed that the major task of identifying the functions of
all human genes was still many years ahead. In fact, even the much simpler task of mapping
all the genes in the final version of the human genome sequence that should become
available within the next few years remains a major problem. Identification of all
protein-coding genes in the genome sequence and determination of the cellular functions of
the proteins encoded in these genes can be accomplished only by combining powerful
computational tools with a variety of experimental approaches from the arsenals of
biochemistry, molecular biology, genetics and cell biology. Linking sequence to function
and both to the evolutionary history of life is the fundamental task of new biology.</p><p>This book is devoted to the principles, methods and some achievements of computational
comparative genomics, which has shaped up as a separate discipline only in the last 5-7
years. Its beginnings have been modest, with only the genome sequences of viruses and
organelles determined in the 1980&#x02019;s. These sequences were important for their
respective disciplines and as a test ground for computational methods of genome analysis,
but they were not particularly helpful for understanding how an autonomous cell works. By
1992, the first chromosomes of baker&#x02019;s yeast and large chunks of bacterial
genomes started to emerge, and researchers began pondering the question: What&#x02019;s
in the genome? The breakthrough came in 1995 with the complete sequencing of the first
genome of a cellular life form, the bacterium <i>Haemophilus influenzae</i>. The
second bacterial genome, <i>Mycoplasma genitalium</i>, followed within months.
The next year, the first complete genomes of an archaeon (<i>Methanococcus
jannaschii</i>) and a eukaryote (yeast <i>Saccharomyces cerevisiae</i>)
became available. Many more microbial genomes followed, and in 1999, the first genome of a
multicellular eukaryote, the nematode <i>Caenorhabiditis elegans</i>, has been
sequenced. The year 2000 brought us the complete genomes of the fruit fly
<i>Drosophila melanogaster</i> and the thale cress <i>Arabidopsis
thaliana</i>, and two independent drafts of the human genome followed suit in 2001.
Thus, we entered the 21<sup>st</sup> century already having at hand this 3.2 billion-letter
text that has been referred to as the Book of Life, as well as a number of accompanying
books on other life forms. The challenge is now to read and interpret them.</p><p>To extract biological information from enormous strings of As, Cs, Ts, and Gs, functional
genomics depends on computational analysis of the sequence data. It is unrealistic to
expect that every single gene or even a majority of the genes found in the sequenced
genomes would ever be studied experimentally. However, using the relatively cheap and fast
computational approaches, it is usually possible to reliably predict the protein-coding
regions in the DNA sequence with reasonable (albeit varying) confidence and to get at least
some insight into the possible functions of the encoded proteins. Such an analysis proves
valuable for many branches of biology, in large part, because it assists in classification
and prioritization of the targets for future experimental research.</p><p>Computations on genomes are inexpensive and fast compared to large-scale experimentation,
but it would be a mistake to equate this with &#x02018;easy&#x02019;. The history of
annotation and comparative analysis of the first sequenced genomes convincingly (and
sometimes painfully) shows that the quality and utility of the final product critically
depend on the employed methods and the depth of interpretation of the results obtained by
computer methods. Unfortunately, errors produced in the course of computer analysis are
propagated just as easily as real discoveries, which makes development of reliable
protocols and crystallization of the accumulating experience of genome analysis in easily
accessible forms particularly important.</p><p>While functional annotation of genomes may be the most obvious, and in a sense, the most
important purpose of computational genomics, it is not just a supporting service for
experimental functional genomics, but a discipline in itself, with its own fundamental
goals. The main such goal is <b>
<i>understanding genome evolution</i>
</b>. Ultimately, understanding here means being able to reconstruct the most likely
sequence of evolutionary events that produced these genomes. Attaining this goal will
require many more genomes, development of new algorithms, and years of careful analysis.
Nevertheless, even in its infancy, comparative genomics has brought genuine revelations
about evolution. We believe that the principal news that could not be easily foreseen in
the pre-genomic era is the extreme diversity of the gene composition in different
evolutionary lineages. This strongly suggests that, at least among prokaryotes, horizontal
gene transfer and lineage-specific gene loss were major, formative evolutionary forces,
rather than rare and relatively inconsequential events as assumed previously. Accordingly,
the straightforward image of evolution as the growth of the tree of life is replaced by one
of a &#x02018;grove&#x02019;, in which vertical, tree-type growth does occur, but
multiple horizontal connections are equally prominent&#x02014;an incomparably more
complex, but also more interesting, picture of life than ever suspected before.</p><p>This book describes the computational approaches that proved to be useful in analyzing
complete genomes. It is intended for a broad range of biologists, including experimental
biologists and graduate and advanced undergraduate students, whose work builds upon the
results of genome analysis and comprises the foundation of functional genomics. However, we
attempted to make the text interesting also for practitioners of genomics itself,
particularly those computational biologists whose main occupation is developing algorithms
and programs for genome analysis and who could benefit from an accessible discussion of
some biological implications of these methods. Most of the approaches discussed in this
book have been developed during comparative analysis of the first set of completely
sequenced bacterial and archaeal genomes, which are simpler and more amenable to
straightforward computational dissection than the much larger eukaryotic genomes. We show,
however, that the main principles remain the same for comparative genomics in general.</p><p>The book starts with a brief overview of the history of genomics. We list the completed and
ongoing genome sequencing projects and show how little is actually known, even about simple
genomes. We then discuss the conceptual basis of comparative genomics, emphasizing the
evolutionary principles of protein function assignments. The book then proceeds to discuss
the databases that store and organize genomic data, with their unique advantages and
pitfalls. Familiarity with these databases is useful for any biologist, but for those
interested in functional or evolutionary genomics, it is essential.</p><p>The central part of the book discusses, in some depth, the principles and methods of genome
analysis and annotation, including identification of genes in genomic DNA sequence and
using sequence comparisons for functional annotation of predicted proteins. We introduce
the most common sequence similarity search methods and discuss the ways to automate the
searches and increase search sensitivity, while minimizing the error rate. The common
sources of errors in functional annotation of genomes are discussed, and some simple rules
of thumb are provided that may help avoid them. We further focus on the approaches to
functional prediction that rely on the genome context, such as examination of phyletic
patterns, gene (domain) fusions, and conserved gene strings (operons). The discussion is
illustrated by examples from comparative genomics of prokaryotes.</p><p>The remaining parts of the book consider fundamental and practical applications of
comparative genomics. In particular, in <a href="/books/n/sef/A298/?report=reader">Chapter
6</a>, we discuss the impact of comparative genomics on our current understanding of
several fundamental problems of evolutionary biology and some major events of
life&#x02019;s history.</p><p>The book is non-technical with respect to the computer methods for genome analysis; we
discuss these methods from the user&#x02019;s viewpoint, without addressing mathematical
and algorithmic details. Prior practical familiarity with the basic methods for sequence
analysis is a major advantage, but a reader without such experience should be able to use
the book as an introduction to these methods. Knowledge of molecular biology and genetics
at the level of basic undergraduate courses is required for understanding the material;
similar knowledge of microbiology is a plus. The book is accompanied by a problem set,
designed to be solved by using tools available through the web. Hopefully, this will allow
the reader to develop a better feeling for the practical use of the methods discussed in
the text. <a href="/books/n/sef/A4/?report=reader">Chapters 1</a> through <a href="/books/n/sef/A264/?report=reader">5</a> are, definitely, at the introductory level,
although we attempted to include some non-trivial examples and discussion of open issues.
There is considerable cross-talk between <a href="/books/n/sef/A55/?report=reader">Chapters
3</a> and <a href="/books/n/sef/A166/?report=reader">4</a>, which might be perceived as
a degree of redundancy. We felt, however, that it was appropriate to discuss some key
notions in protein analysis twice, first from a purely practical and then from a more
fundamental standpoint. <a href="/books/n/sef/A298/?report=reader">Chapters 6</a>, <a href="/books/n/sef/A371/?report=reader">7</a>, and <a href="/books/n/sef/A517/?report=reader">8</a> are somewhat more involved and, we hope, might be of certain interest even to
experts. However, we tried to ensure that a non-expert reader would be in a position to
understand the material of these chapters after reading the book from the beginning.</p><p>Probably the main purpose of any Preface is a disclaimer and apologies. So what is <b>
<i>not</i>
</b> in this book? First of all, we could not even think of covering the entire field of
comparative genomics: this field is young but has already branched widely, and we cannot
claim even knowing of all important research directions, let alone being experts in them.
We cite many publications, but, again, we could not even think of citing all the relevant
ones: this would take the entire space of the book and the task still would not have been
accomplished. We sincerely apologize to all those colleagues whose important work is not
cited because of space considerations or, unfortunately, because of our ignorance and
negligence. Most of the case studies discussed in this book are drawn from our own work.
This is certainly not to imply that we believe it to be in any sense superior to the work
of others, but simply because this is what we know best. However, unfortunately, there may
be cases where, for the above reason, we cite and discuss our own work instead of more
decisive and interesting work of other researchers, and to them our heartfelt
apologies.</p><p>The parts of this book that deal with sequence and structure analysis algorithms might irk
some of our colleagues involved in the development of these methods by superficiality and
lack of rigor. We owe a great debt to these researchers and extend our regrets and
apologies. A more technical point: most of the research discussed in this book is done with
protein sequences and structures. Partly, this is because we believe that the main
knowledge so far accumulated by comparative genomics has been attained through this type of
analysis. The other reason, however, is that this is where our main experience is, and we
apologize to the readers for not covering numerous important studies on non-coding regions
of the genomes. Finally, a terminological point related to the last issue: throughout the
book, we rather freely substitute proteins for the genes that encode them by talking about
duplications, mutations and other evolutions of proteins. This is just for the sake of
brevity; we assure the reader that we are aware of the fact that proteins actually do not
undergo any of these events, only the respective genes do.</p><p>Despite of all these shortcomings and, undoubtedly, others that we are unaware of, we hope
that this book will help the reader to understand the principles and approaches of
comparative genomics and the potential and limitations of computational and experimental
approaches to genome analysis. This should go some distance to building a bridge across the
"digital divide" between biologists and computer scientists, hopefully, allowing biologists
of various directions and persuasions to better grasp the peculiarities of the emerging
field of Genome Biology and to learn how to benefit from the enormous amount of sequence
and structural data available in the public databases.</p><p>This book has become possible thanks to our close collaboration with numerous colleagues
from the NCBI and other institutions. It is, unfortunately, impossible to mention everyone,
but we must gratefully acknowledge many hours of illuminating discussions over the years of
interactions with L. Aravind, Peer Bork, Valerian Dolja, Mikhail Gelfand, Alexander
Gorbalenya, Alexey Kondrashov, David Lipman, Arcady Mushegian, Pavel Pevzner, Igor Rogozin,
and Yuri Wolf. We greatly appreciate all the work that Roman Tatusov and Darren Natale put
in the COG database, which permeates this book. We thank the following colleagues for
critical reading of individual chapters and helpful criticisms: <a href="/books/n/sef/A55/?report=reader">Chapter 3</a>, Peter Cooper, Aviva Jacobs, David Wheeler, and Jodie Yin;
<a href="/books/n/sef/A166/?report=reader">Chapters 4</a>, <a href="/books/n/sef/A264/?report=reader">5</a>, <a href="/books/n/sef/A298/?report=reader">6</a>, and <a href="/books/n/sef/A517/?report=reader">8</a>, Igor Rogozin; <a href="/books/n/sef/A298/?report=reader">Chapter 6</a>, Fyodor Kondrashov; and <a href="/books/n/sef/A517/?report=reader">Chapter 8</a>, Yuri Wolf. Yuri Wolf kindly provided <a href="/books/n/sef/A517/?report=reader#A525">Figures 8.3</a>, <a href="/books/n/sef/A517/?report=reader#A527">8.4</a>, and <a href="/books/n/sef/A517/?report=reader#A528">8.5</a>, and the entire sections 8.2 and 8.3 are largely
the result of collaboration and intense discussions with Yuri Wolf and Georgy Karev. We
thank L. Aravind, Trevor Fennon, Kira Makarova, Boris Mirkin, and Yuri Wolf for the kind
permission to cite some of our unpublished joint work. Several figures in this book come
from the NCBI Entrez Genomes web site. We appreciate the work of the team that supports
this site. We are grateful to our editor Joanne Tracy for her constant prodding and
encouragement, not to mention editorial support, without which this book would have never
come to life. Last but not least, we thank our families for their enormous patience and
understanding.</p><p>The opinions expressed in this book reflect personal views of the authors and have no
relation to the official positions (if any) on the issues involved held by the National
Library of Medicine, National Institutes of Health, or the US Department of Health and
Human Services.</p><p>Eugene Koonin</p><p>Michael Galperin</p><p>Bethesda, August 2002</p><div style="display:none"><div id="figA525"><img alt="Image ch8f3" src-large="/books/n/sef/A517/bin/ch8f3.jpg" /></div><div id="figA527"><img alt="Image ch8f4" src-large="/books/n/sef/A517/bin/ch8f4.jpg" /></div><div id="figA528"><img alt="Image ch8f5" src-large="/books/n/sef/A517/bin/ch8f5.jpg" /></div></div><div id="bk_toc_contnr"></div></div></div><div class="fm-sec"><h2 id="_NBK20259_pubdet_">Publication Details</h2><h3>Author Information and Affiliations</h3><p class="contrib-group"><h4>Authors</h4><span itemprop="author">Eugene Koonin</span> and <span itemprop="author">Michael Galperin</span>.</p><h3>Copyright</h3><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright</a> &#x000a9; 2003, Kluwer Academic.</div></div><h3>Publisher</h3><p><a href="http://www.springer.com/" ref="pagearea=page-banner&amp;targetsite=external&amp;targetcat=link&amp;targettype=publisher">Kluwer Academic</a>, Boston</p><h3>NLM Citation</h3><p>Koonin E, Galperin M. Preface. In: Koonin EV, Galperin MY. Sequence - Evolution - Function: Computational Approaches in Comparative Genomics. Boston: Kluwer Academic; 2003. <span class="bk_cite_avail"></span></p></div><div class="small-screen-prev"><a href="/books/n/sef/toc/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M75,30 c-80,60 -80,0 0,60 c-30,-60 -30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Prev</text></svg></a></div><div class="small-screen-next"><a href="/books/n/sef/A2/?report=reader"><svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100" preserveAspectRatio="none"><path d="M25,30c80,60 80,0 0,60 c30,-60 30,0 0,-60"></path><text x="20" y="28" textLength="60" style="font-size:25px">Next</text></svg></a></div></article></div><div id="jr-scripts"><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/libs.min.js"> </script><script src="/corehtml/pmc/jatsreader/ptpmc_3.22/js/jr.min.js"> </script></div></div>
<!-- Book content -->
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js"> </script>
<!-- CE8BC1E97D9F05E1_0182SID /projects/books/PBooks@9.11 portal104 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8BC1E97D9F05E1_0182SID"></span>
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3968615.js" snapshot="books"></script></body>
</html>