nih-gov/www.ncbi.nlm.nih.gov/books/n/handbook/ch16/index.html

565 lines
No EOL
87 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<!-- AppResources meta begin -->
<meta name="paf-app-resources" content="" />
<script type="text/javascript">var ncbi_startTime = new Date();</script>
<!-- AppResources meta end -->
<!-- TemplateResources meta begin -->
<meta name="paf_template" content="" />
<!-- TemplateResources meta end -->
<!-- Logger begin -->
<meta name="ncbi_db" content="books" /><meta name="ncbi_pdid" content="book-part" /><meta name="ncbi_acc" content="NBK21097" /><meta name="ncbi_domain" content="handbook" /><meta name="ncbi_report" content="record" /><meta name="ncbi_type" content="fulltext" /><meta name="ncbi_objectid" content="" /><meta name="ncbi_pcid" content="/NBK21097/" /><meta name="ncbi_pagename" content="The BLAST Sequence Analysis Tool - The NCBI Handbook - NCBI Bookshelf" /><meta name="ncbi_bookparttype" content="chapter" /><meta name="ncbi_app" content="bookshelf" />
<!-- Logger end -->
<title>The BLAST Sequence Analysis Tool - The NCBI Handbook - NCBI Bookshelf</title>
<!-- AppResources external_resources begin -->
<link rel="stylesheet" href="/core/jig/1.15.2/css/jig.min.css" /><script type="text/javascript" src="/core/jig/1.15.2/js/jig.min.js"></script>
<!-- AppResources external_resources end -->
<!-- Page meta begin -->
<meta name="robots" content="NOINDEX,NOFOLLOW,NOARCHIVE,NOIMAGEINDEX" /><meta name="citation_inbook_title" content="The NCBI Handbook [Internet]" /><meta name="citation_title" content="The BLAST Sequence Analysis Tool" /><meta name="citation_publisher" content="National Center for Biotechnology Information (US)" /><meta name="citation_date" content="2003/08/13" /><meta name="citation_author" content="Tom Madden" /><meta name="citation_fulltext_html_url" content="https://www.ncbi.nlm.nih.gov/books/NBK21097/" /><link rel="schema.DC" href="http://purl.org/DC/elements/1.0/" /><meta name="DC.Title" content="The BLAST Sequence Analysis Tool" /><meta name="DC.Type" content="Text" /><meta name="DC.Publisher" content="National Center for Biotechnology Information (US)" /><meta name="DC.Contributor" content="Tom Madden" /><meta name="DC.Date" content="2003/08/13" /><meta name="DC.Identifier" content="https://www.ncbi.nlm.nih.gov/books/NBK21097/" /><meta name="description" content="The comparison of nucleotide or protein sequences from the same or different organisms is a very powerful tool in molecular biology. By finding similarities between sequences, scientists can infer the function of newly sequenced genes, predict new members of gene families, and explore evolutionary relationships. Now that whole genomes are being sequenced, sequence similarity searching can be used to predict the location and function of protein-coding and transcription-regulation regions in genomic DNA." /><meta name="og:title" content="The BLAST Sequence Analysis Tool" /><meta name="og:type" content="book" /><meta name="og:description" content="The comparison of nucleotide or protein sequences from the same or different organisms is a very powerful tool in molecular biology. By finding similarities between sequences, scientists can infer the function of newly sequenced genes, predict new members of gene families, and explore evolutionary relationships. Now that whole genomes are being sequenced, sequence similarity searching can be used to predict the location and function of protein-coding and transcription-regulation regions in genomic DNA." /><meta name="og:url" content="https://www.ncbi.nlm.nih.gov/books/NBK21097/" /><meta name="og:site_name" content="NCBI Bookshelf" /><meta name="og:image" content="https://www.ncbi.nlm.nih.gov/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" /><meta name="twitter:card" content="summary" /><meta name="twitter:site" content="@ncbibooks" /><meta name="warning" content="This publication is provided for historical reference only and the information may be out of date." /><meta name="bk-non-canon-loc" content="/books/n/handbook/ch16/" /><link rel="canonical" href="https://www.ncbi.nlm.nih.gov/books/NBK21097/" /><link rel="stylesheet" href="/corehtml/pmc/css/figpopup.css" type="text/css" media="screen" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books.min.css" type="text/css" /><link rel="stylesheet" href="/corehtml/pmc/css/bookshelf/2.26/css/books_print.min.css" type="text/css" media="print" /><style type="text/css">.main-content {background:transparent repeat-y top left;background-image:url(/corehtml/pmc/css/bookshelf/2.26/img/archive.png);background-size: auto, contain; padding:0 0 0 3em }</style><style type="text/css">p a.figpopup{display:inline !important} .bk_tt {font-family: monospace} .first-line-outdent .bk_ref {display: inline} .body-content h2, .body-content .h2 {border-bottom: 1px solid #97B0C8} .body-content h2.inline {border-bottom: none} a.page-toc-label , .jig-ncbismoothscroll a {text-decoration:none;border:0 !important} .temp-labeled-list .graphic {display:inline-block !important} .temp-labeled-list img{width:100%}</style><script type="text/javascript" src="/corehtml/pmc/js/jquery.hoverIntent.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/common.min.js?_=3.18"> </script><script type="text/javascript" src="/corehtml/pmc/js/large-obj-scrollbars.min.js"> </script><script type="text/javascript">window.name="mainwindow";</script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/book-toc.min.js"> </script><script type="text/javascript" src="/corehtml/pmc/js/bookshelf/2.26/books.min.js"> </script><meta name="book-collection" content="NONE" />
<!-- Page meta end -->
<link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8B13DC7C8020610000000000430034.m_13" />
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3852956/3985586/3808861/4121862/3974050/3917732/251717/4216701/14534/45193/4113719/3849091/3984811/3751656/4033350/3840896/3577051/3852958/4008682/4207974/4206132/4062871/12930/3964959/3854974/36029/4128070/9685/3549676/3609192/3609193/3609213/3395586.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/css/3411343/3882866.css" media="print" /></head>
<body class="book-part">
<div class="grid">
<div class="col twelve_col nomargin shadow">
<!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
<div class="sysmessages">
<noscript>
<p class="nojs">
<strong>Warning:</strong>
The NCBI web site requires JavaScript to function.
<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
</p>
</noscript>
</div>
<!--/.sysmessage-->
<div class="wrap">
<div class="page">
<div class="top">
<div id="universal_header">
<section class="usa-banner">
<div class="usa-accordion">
<header class="usa-banner-header">
<div class="usa-grid usa-banner-inner">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
<p>An official website of the United States government</p>
<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
<span class="usa-banner-button-text">Here's how you know</span>
</button>
</div>
</header>
<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
<div class="usa-banner-guidance-gov usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
<div class="usa-media_block-body">
<p>
<strong>The .gov means it's official.</strong>
<br />
Federal government websites often end in .gov or .mil. Before
sharing sensitive information, make sure you're on a federal
government site.
</p>
</div>
</div>
<div class="usa-banner-guidance-ssl usa-width-one-half">
<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
<div class="usa-media_block-body">
<p>
<strong>The site is secure.</strong>
<br />
The <strong>https://</strong> ensures that you are connecting to the
official website and that any information you provide is encrypted
and transmitted securely.
</p>
</div>
</div>
</div>
</div>
</section>
<div class="usa-overlay"></div>
<header class="ncbi-header" role="banner" data-section="Header">
<div class="usa-grid">
<div class="usa-width-one-whole">
<div class="ncbi-header__logo">
<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
</a>
</div>
<div class="ncbi-header__account">
<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
<span class="fa fa-user" aria-hidden="true">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
<g style="fill: #fff">
<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9 C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
</g>
</svg>
</span>
<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
<span class="sr-only">Show account info</span>
</button>
</div>
<div class="ncbi-popup-anchor">
<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
<div class="ncbi-popup-head">
<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
<span class="fa fa-times">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
</svg>
</span>
<span class="usa-sr-only">Close</span></button>
<h4>Account</h4>
</div>
<div class="account-user-info">
Logged in as:<br />
<b><span class="username" id="uname_long">username</span></b>
</div>
<div class="account-links">
<ul class="usa-unstyled-list">
<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
</ul>
</div>
</div>
</div>
</div>
</div>
</header>
<div role="navigation" aria-label="access keys">
<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
</div>
<section data-section="Alerts">
<div class="ncbi-alerts-placeholder"></div>
</section>
</div>
<div class="header">
<div class="res_logo"><h1 class="res_name"><a href="/books/" title="Bookshelf home">Bookshelf</a></h1><h2 class="res_tagline"></h2></div>
<div class="search"><form method="get" action="/books/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="books" selected="selected" data-ac_dict="bookshelf-search">Books</option><option value="gquery">All Databases</option><option value="refseq">RefSeq</option><option value="pubmed" class="last">PubMed</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books" data-ac_dict="bookshelf-search">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Books. Use up and down arrows to choose an item from the autocomplete." value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="dictionary:'bookshelf-search',disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'no'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form><ul class="searchlinks inline_list"><li>
<a href="/books/browse/">Browse Titles</a>
</li><li>
<a href="/books/advanced/">Advanced</a>
</li><li class="help">
<a href="/books/NBK3833/">Help</a>
</li><li class="disclaimer">
<a target="_blank" data-ga-category="literature_resources" data-ga-action="link_click" data-ga-label="disclaimer_link" href="https://www.ncbi.nlm.nih.gov/books/about/disclaimer/">Disclaimer</a>
</li></ul></div>
</div>
<!--<component id="Page" label="headcontent"/>-->
</div>
<div class="content">
<!-- site messages -->
<!-- Custom content 1 -->
<div class="col1">
</div>
<div class="container">
<div id="maincontent" class="content eight_col col">
<!-- Custom content in the left column above book nav -->
<div class="col2">
</div>
<!-- Book content -->
<!-- Custom content between navigation and content -->
<div class="col3">
</div>
<div class="document">
<div class="pre-content"><div><div class="bk_prnt"><p class="small">NCBI Bookshelf. A service of the National Library of Medicine, National Institutes of Health.</p><p>McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. </p></div><div class="bk_msg_box bk_bttm_mrgn clearfix bk_noprnt"><div class="iconblock clearfix"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook2e/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook2e-lrg.png" alt="Cover" height="100px" width="80px" /></a><div class="icnblk_cntnt"><ul class="messages"><li class="info icon"><span class="icon"><a href="/books/n/handbook2e/">See "The NCBI Handbook, 2nd Edition"</a></span></li></ul></div></div></div><div class="messagearea bk_noprnt" style="margin-bottom:1.3846em "><ul class="messages"><li class="warn icon"><span class="icon">This publication is provided for historical reference only and the information may be out of date.</span></li></ul></div><div class="bk_prnt"><p style="color:red;"><strong>This publication is provided for historical reference only and the information may be out of date.</strong></p></div><div class="iconblock clearfix whole_rhythm no_top_margin bk_noprnt"><a class="img_link icnblk_img" title="Table of Contents Page" href="/books/n/handbook/"><img class="source-thumb" src="/corehtml/pmc/pmcgifs/bookshelf/thumbs/th-handbook-lrg.png" alt="Cover of The NCBI Handbook" height="100px" width="80px" /></a><div class="icnblk_cntnt eight_col"><h2>The NCBI Handbook [Internet].</h2><a data-jig="ncbitoggler" href="#__NBK21097_dtls__">Show details</a><div style="display:none" class="ui-widget" id="__NBK21097_dtls__"><div>McEntyre J, Ostell J, editors.</div><div>Bethesda (MD): <a href="https://www.ncbi.nlm.nih.gov/" ref="pagearea=page-banner&amp;targetsite=external&amp;targetcat=link&amp;targettype=publisher">National Center for Biotechnology Information (US)</a>; 2002-.</div></div><div class="half_rhythm"><ul class="inline_list"><li style="margin-right:1em"><a class="bk_cntns" href="/books/n/handbook/">Contents</a></li></ul></div></div><div class="icnblk_cntnt two_col"><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch15/" title="Previous page in this title">&lt; Prev</a><a class="active page_link next" href="/books/n/handbook/ch17/" title="Next page in this title">Next &gt;</a></div></div></div></div></div>
<div class="main-content lit-style" itemscope="itemscope" itemtype="http://schema.org/CreativeWork"><div class="meta-content fm-sec"><h1 id="_NBK21097_"><span class="label">Chapter 16</span><span class="title" itemprop="name">The BLAST Sequence Analysis Tool</span></h1><p class="contrib-group"><span itemprop="author">Tom Madden</span>.</p><p class="small">Created: <span itemprop="datePublished">October 9, 2002</span>; Last Update: <span itemprop="dateModified">August 13, 2003</span>.</p><p><em>Estimated reading time: 17 minutes</em></p></div><div class="jig-ncbiinpagenav body-content whole_rhythm" data-jigconfig="allHeadingLevels: ['h2'],smoothScroll: false" itemprop="text"><div id="_abs_rndgid_" itemprop="description"><h2 id="__abs_rndgid__">Summary</h2><p>The comparison of nucleotide or protein sequences from the same or different organisms is a very powerful tool in molecular biology. By finding similarities between sequences, scientists can infer the function of newly sequenced genes, predict new members of gene families, and explore evolutionary relationships. Now that whole genomes are being sequenced, sequence similarity searching can be used to predict the location and function of protein-coding and transcription-regulation regions in genomic <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a>.</p><p>Basic Local Alignment Search Tool (<a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a>) (<a class="bk_pop" href="#A648">1</a>, <a class="bk_pop" href="#A649">2</a>) is the tool most frequently used for calculating sequence similarity. BLAST comes in variations for use with different query sequences against different databases. All BLAST applications, as well as information on which BLAST program to use and other help documentation, are listed on the BLAST <a href="/BLAST/" ref="pagearea=abstract&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">homepage</a>. This chapter will focus more on how BLAST works, its output, and how both the output and program itself can be further manipulated or customized, rather than on how to use <a href="/Education/BLASTinfo/information3.html" ref="pagearea=abstract&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLAST</a> or interpret BLAST results.</p></div><div id="A611"><h2 id="_A611_">Introduction</h2><p>The way most people use <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> is to input a nucleotide or protein sequence as a query against all (or a subset of) the public sequence databases, pasting the sequence into the textbox on one of the BLAST <a href="/BLAST/" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">Web pages</a>. This sends the query over the Internet, the search is performed on the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> databases and servers, and the results are posted back to the person's browser in the chosen display format. However, many biotech companies, genome scientists, and bioinformatics personnel may want to use &#x0201c;stand-alone&#x0201d; BLAST to query their own, local databases or want to customize BLAST in some way to make it better suit their needs. Stand-alone BLAST comes in two forms: the executables that can be run from the <a href="/BLAST/blast_overview.html#executables" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">command line</a>; or the Standalone <a class="def" href="/books/n/handbook/A1237/def-item/app197/">WWW</a> <a href="/BLAST/blast_overview.html#wwwserver" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">BLAST Server</a>, which allows users to set up their own in-house versions of the BLAST Web pages.</p><p>There are many different <a href="/blast/html/BLASThomehelp.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">variations</a> of <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> available to use for different sequence comparisons, e.g., a <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a> query to a DNA database, a protein query to a protein database, and a DNA query, translated in all six reading frames, to a protein sequence database. Other <a href="/BLAST/producttable.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">adaptations</a> of BLAST, such as <a class="def" href="/books/n/handbook/A1237/def-item/app148/">PSI-BLAST</a> (for iterative protein sequence similarity searches using a position-specific score matrix) and <a class="def" href="/books/n/handbook/A1237/def-item/app159/">RPS-BLAST</a> (for searching for protein domains in the Conserved Domains Database, <a href="/books/n/handbook/ch3/">Chapter 3</a>) perform comparisons against sequence profiles.</p><p>This chapter will first describe the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> architecture&#x02014;how it works at the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> site&#x02014;and then go on to describe the various BLAST outputs. The best known of these outputs is the default display from BLAST Web pages, the so-called &#x0201c;traditional report&#x0201d;. As well as obtaining BLAST results in the traditional report, results can also be delivered in structured output, such as a hit table (see below), <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a>, or <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>. The optimal choice of output format depends upon the application. The final part of the chapter discusses stand-alone BLAST and describes possibilities for customization. There are many interfaces to BLAST that are often not exploited by users but can lead to more efficient and robust applications.</p></div><div id="A612"><h2 id="_A612_">How BLAST Works: The Basics</h2><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> algorithm is a heuristic program, which means that it relies on some smart shortcuts to perform the search faster. BLAST performs "local" alignments. Most proteins are modular in nature, with functional domains often being repeated within the same protein as well as across different proteins from different species. The BLAST algorithm is tuned to find these domains or shorter stretches of sequence similarity. The local alignment approach also means that a <a class="def" href="/books/n/handbook/A1237/def-item/app114/">mRNA</a> can be aligned with a piece of genomic <a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a>, as is frequently required in genome assembly and analysis. If instead BLAST started out by attempting to align two sequences over their entire lengths (known as a global alignment), fewer similarities would be detected, especially with respect to domains and motifs.</p><p>When a query is submitted via one of the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> Web pages, the sequence, plus any other input information such as the database to be searched, word size, expect value, and so on, are fed to the <a href="/Education/BLASTinfo/BLAST_algorithm.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">algorithm</a> on the BLAST server. BLAST works by first making a look-up table of all the &#x0201c;words&#x0201d; (short subsequences, which for proteins the default is three letters) and &#x0201c;neighboring words&#x0201d;, i.e., similar words in the query sequence. The sequence database is then scanned for these &#x0201c;hot spots&#x0201d;. When a match is identified, it is used to initiate <a href="/books/n/handbook/A1237/#app59">gap</a>-free and gapped extensions of the &#x0201c;word&#x0201d;.</p><p><a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> does not search <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a> flatfiles (or any subset of GenBank flatfiles) directly. Rather, sequences are made into BLAST databases. Each entry is split, and two files are formed, one containing just the header information and one containing just the sequence information. These are the data that the algorithm uses. If BLAST is to be run in &#x0201c;stand-alone&#x0201d; mode, the data file could consist of local, private data, downloaded <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> BLAST databases, or a combination of the two.</p><p>After the algorithm has looked up all possible "words" from the query sequence and extended them maximally, it assembles the best alignment for each query&#x02013;sequence pair and writes this information to an SeqAlign data structure (in <a href="/books/n/handbook/A1237/#app5">ASN</a>.1 ; also used by <a class="def" href="/books/n/handbook/A1237/def-item/app161/">Sequin</a>, see <a href="/books/n/handbook/ch12/">Chapter 12</a>). The SeqAlign structure in itself does not contain the sequence information; rather, it refers to the sequences in the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> database (<a class="figpopup" href="/books/NBK21097/figure/A613/?report=objectonly" target="object" rid-figpopup="figA613" rid-ob="figobA613">Figure 1</a>).
</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figA613" co-legend-rid="figlgndA613"><a href="/books/NBK21097/figure/A613/?report=objectonly" target="object" title="Figure 1" class="img_link icnblk_img figpopup" rid-figpopup="figA613" rid-ob="figobA613"><img class="small-thumb" src="/books/NBK21097/bin/ch16f1.gif" src-large="/books/NBK21097/bin/ch16f1.jpg" alt="Figure 1. How the BLAST results Web pages are assembled." /></a><div class="icnblk_cntnt" id="figlgndA613"><h4 id="A613"><a href="/books/NBK21097/figure/A613/?report=objectonly" target="object" rid-ob="figobA613">Figure 1</a></h4><p class="float-caption no_bottom_margin">How the BLAST results Web pages are assembled. The QBLAST system located on the BLAST server executes the search, writing information about the sequence alignment in ASN.1. The results can then be formatted by fetching the ASN.1 (<i>fetch ASN.1</i>) and fetching <a href="/books/NBK21097/figure/A613/?report=objectonly" target="object" rid-ob="figobA613">(more...)</a></p></div></div><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> Formatter, which sits on the BLAST server, can use the information in the SeqAlign to retrieve the similar sequences found and display them in a variety of ways. Thus, once a query has been completed, the results can be reformatted without having to re-execute the search. This is possible because of the <a href="/BLAST/blast_overview.html#blastq" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">QBLAST</a> system.</p></div><div id="A614"><h2 id="_A614_">BLAST Scores and Statistics</h2><p>Once <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> has found a similar sequence to the query in the database, it is helpful to have some idea of whether the alignment is &#x0201c;good&#x0201d; and whether it portrays a possible biological relationship, or whether the similarity observed is attributable to chance alone. BLAST uses <a href="/BLAST/tutorial/Altschul-1.html" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">statistical theory</a> to produce a <a href="/books/n/handbook/A1237/#app8">bit score</a> and expect value (<a href="/books/n/handbook/A1237/#app42">E-value</a>) for each alignment pair (query to hit).</p><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app8/">bit score</a> gives an indication of how good the alignment is; the higher the score, the better the alignment. In general terms, this score is calculated from a formula that takes into account the alignment of similar or identical residues, as well as any gaps introduced to align the sequences. A key element in this calculation is the &#x0201c;<a href="/books/n/handbook/A1237/#app174">substitution matrix</a> &#x0201d;, which assigns a score for aligning any possible pair of residues. The <a href="/books/n/handbook/A1237/#app15">BLOSUM62</a> matrix is the default for most <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> programs, the exceptions being <a class="def" href="/books/n/handbook/A1237/def-item/app10/">blastn</a> and <a class="def" href="/books/n/handbook/A1237/def-item/app103/">MegaBLAST</a> (programs that perform nucleotide&#x02013;nucleotide comparisons and hence do not use protein-specific matrices). Bit scores are normalized, which means that the bit scores from different alignments can be compared, even if different scoring matrices have been used.</p><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app42/">E-value</a> gives an indication of the statistical significance of a given pairwise alignment and reflects the size of the database and the scoring system used. The lower the E-value, the more significant the hit. A sequence alignment that has an E-value of 0.05 means that this similarity has a 5 in 100 (1 in 20) chance of occurring by chance alone. Although a statistician might consider this to be significant, it still may not represent a biologically meaningful result, and analysis of the alignments (see below) is required to determine &#x0201c;biological&#x0201d; significance.</p></div><div id="A615"><h2 id="_A615_">BLAST Output: 1. The Traditional Report</h2><p>Most <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> users are familiar with the so-called &#x0201c;traditional&#x0201d; BLAST report. The report consists of three major sections: (1) the header, which contains information about the query sequence, the database searched (<a class="figpopup" href="/books/NBK21097/figure/A616/?report=objectonly" target="object" rid-figpopup="figA616" rid-ob="figobA616">Figure 2</a>). On the Web, there is also a graphical overview (<a class="figpopup" href="/books/NBK21097/figure/A617/?report=objectonly" target="object" rid-figpopup="figA617" rid-ob="figobA617">Figure 3</a>); (2) the one-line descriptions of each database sequence found to match the query sequence; these provide a quick overview for browsing (<a class="figpopup" href="/books/NBK21097/figure/A618/?report=objectonly" target="object" rid-figpopup="figA618" rid-ob="figobA618">Figure 4</a>); (3) the alignments for each database sequence matched (<a class="figpopup" href="/books/NBK21097/figure/A619/?report=objectonly" target="object" rid-figpopup="figA619" rid-ob="figobA619">Figure 5</a>) (there may be more than one alignment for a database sequence it matches).
</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figA616" co-legend-rid="figlgndA616"><a href="/books/NBK21097/figure/A616/?report=objectonly" target="object" title="Figure 2" class="img_link icnblk_img figpopup" rid-figpopup="figA616" rid-ob="figobA616"><img class="small-thumb" src="/books/NBK21097/bin/ch16f2.gif" src-large="/books/NBK21097/bin/ch16f2.jpg" alt="Figure 2. The BLAST report header." /></a><div class="icnblk_cntnt" id="figlgndA616"><h4 id="A616"><a href="/books/NBK21097/figure/A616/?report=objectonly" target="object" rid-ob="figobA616">Figure 2</a></h4><p class="float-caption no_bottom_margin">The BLAST report header. The <i>top line</i> gives information about the type of program (in this case, <i>BLASTP</i>), the version (<i>2.2.1</i>), and a version release date. The research paper that describes BLAST is then cited, followed by the request ID (issued by QBLAST), <a href="/books/NBK21097/figure/A616/?report=objectonly" target="object" rid-ob="figobA616">(more...)</a></p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figA617" co-legend-rid="figlgndA617"><a href="/books/NBK21097/figure/A617/?report=objectonly" target="object" title="Figure 3" class="img_link icnblk_img figpopup" rid-figpopup="figA617" rid-ob="figobA617"><img class="small-thumb" src="/books/NBK21097/bin/ch16f3.gif" src-large="/books/NBK21097/bin/ch16f3.jpg" alt="Figure 3. Graphical overview of BLAST results." /></a><div class="icnblk_cntnt" id="figlgndA617"><h4 id="A617"><a href="/books/NBK21097/figure/A617/?report=objectonly" target="object" rid-ob="figobA617">Figure 3</a></h4><p class="float-caption no_bottom_margin">Graphical overview of BLAST results. The query sequence is represented by the <i>numbered red bar</i> at the <i>top</i> of the figure. Database hits are shown aligned to the query, <i>below</i> the red bar. Of the aligned sequences, the most similar are shown closest to the <a href="/books/NBK21097/figure/A617/?report=objectonly" target="object" rid-ob="figobA617">(more...)</a></p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figA618" co-legend-rid="figlgndA618"><a href="/books/NBK21097/figure/A618/?report=objectonly" target="object" title="Figure 4" class="img_link icnblk_img figpopup" rid-figpopup="figA618" rid-ob="figobA618"><img class="small-thumb" src="/books/NBK21097/bin/ch16f4.gif" src-large="/books/NBK21097/bin/ch16f4.jpg" alt="Figure 4. One-line descriptions in the BLAST report." /></a><div class="icnblk_cntnt" id="figlgndA618"><h4 id="A618"><a href="/books/NBK21097/figure/A618/?report=objectonly" target="object" rid-ob="figobA618">Figure 4</a></h4><p class="float-caption no_bottom_margin">One-line descriptions in the BLAST report. Each line is composed of four fields: (<i>a</i>) the gi number, database designation, Accession number, and locus name for the matched sequence, separated by vertical bars (Appendix 1); (<i>b</i>) a brief textual description <a href="/books/NBK21097/figure/A618/?report=objectonly" target="object" rid-ob="figobA618">(more...)</a></p></div></div><div class="iconblock whole_rhythm clearfix ten_col fig" id="figA619" co-legend-rid="figlgndA619"><a href="/books/NBK21097/figure/A619/?report=objectonly" target="object" title="Figure 5" class="img_link icnblk_img figpopup" rid-figpopup="figA619" rid-ob="figobA619"><img class="small-thumb" src="/books/NBK21097/bin/ch16f5.gif" src-large="/books/NBK21097/bin/ch16f5.jpg" alt="Figure 5. A pairwise sequence alignment from a BLAST report." /></a><div class="icnblk_cntnt" id="figlgndA619"><h4 id="A619"><a href="/books/NBK21097/figure/A619/?report=objectonly" target="object" rid-ob="figobA619">Figure 5</a></h4><p class="float-caption no_bottom_margin">A pairwise sequence alignment from a BLAST report. The alignment is preceded by the sequence identifier, the full definition line, and the length of the matched sequence, in amino acids. Next comes the bit score (the raw score is in <i>parentheses</i>) and then <a href="/books/NBK21097/figure/A619/?report=objectonly" target="object" rid-ob="figobA619">(more...)</a></p></div></div><p>The traditional report is really designed for human readability, as opposed to being parsed by a program. For example, the one-line descriptions are useful for people to get a quick overview of their search results, but they are rarely complete descriptors because of limited space. Also, for convenience, there are several pieces of information that are displayed in both the one-line descriptions and alignments (for example, the E-values, scores, and descriptions); therefore, the person viewing the search output does not need to move back and forth between sections.</p><p>New features may be added to the report, e.g., the addition of links to <a class="def" href="/books/n/handbook/A1237/def-item/app97/">Entrez Gene</a> records (<a href="/books/n/handbook/ch19/">Chapter 19</a>) from sequence hits, which result in a change of output format. These are easy for people to pick up on and take advantage of but can trip programs that parse this <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> output.</p><p>By default, a maximum of 500 sequence matches are displayed, which can be changed on the advanced <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> page with the <b>Alignments</b> option. Many components of the BLAST results display via the Internet and are hyperlinked to the same information at different places in the page, to additional information including help documentation, and to the <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> sequence records of matched sequences. These records provide more information about the sequence, including links to relevant research abstracts in <a class="def" href="/books/n/handbook/A1237/def-item/app150/">PubMed</a>.</p></div><div id="A620"><h2 id="_A620_">BLAST Output: 2. The Hit Table</h2><p>Although the traditional report is ideal for investigating the characteristics of one gene or protein, often scientists want to make a large number of <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> runs for a specialized purpose and need only a subset of the information contained in the traditional BLAST report. Furthermore, in cases where the BLAST output will be processed further, it can be unreliable to parse the traditional report. The traditional report is merely a display format with no formal structure or rules, and improvements may be made at any time, changing the underlying <a href="/books/n/handbook/A1237/#app75">HTML</a>. The hit table format provides a simple and clean alternative (<a class="figpopup" href="/books/NBK21097/figure/A621/?report=objectonly" target="object" rid-figpopup="figA621" rid-ob="figobA621">Figure 6</a>).
</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figA621" co-legend-rid="figlgndA621"><a href="/books/NBK21097/figure/A621/?report=objectonly" target="object" title="Figure 6" class="img_link icnblk_img figpopup" rid-figpopup="figA621" rid-ob="figobA621"><img class="small-thumb" src="/books/NBK21097/bin/ch16f6.gif" src-large="/books/NBK21097/bin/ch16f6.jpg" alt="Figure 6. BLAST output in hit table format." /></a><div class="icnblk_cntnt" id="figlgndA621"><h4 id="A621"><a href="/books/NBK21097/figure/A621/?report=objectonly" target="object" rid-ob="figobA621">Figure 6</a></h4><p class="float-caption no_bottom_margin">BLAST output in hit table format. This shows the results of a search of an <i>E. coli</i> database using a human sequence as a query. The lines starting with a # sign should be considered comments and ignored. The <i>last comment line</i> lists the fields in the table. <a href="/books/NBK21097/figure/A621/?report=objectonly" target="object" rid-ob="figobA621">(more...)</a></p></div></div><p>The screening of many newly sequenced human Expressed Sequence Tags (<a href="/books/n/handbook/A1237/#app46">EST</a>s) for contamination by the <i>Escherichia coli</i> cloning vector is a good example of when it is preferable to use the hit table output over the traditional report. In this case, a strict, high <a class="def" href="/books/n/handbook/A1237/def-item/app42/">E-value</a> threshold would be applied to differentiate between contaminating <i>E. coli</i> sequence and the human sequence. Those human ESTs that find very strong, near-exact <i>E.coli</i> sequence matches can be discarded without further examination. (Borderline cases may require further examination by a scientist.)</p><p>For these purposes, the hit table output is more useful than the traditional report; it contains only the information required in a more formal structure. The hit table output contains no sequences or definition lines, but for each sequence matched, it lists the sequence identifier, the start and stop points for stretches of sequence similarity (offset by one residue), the percent identity of the match, and the <a class="def" href="/books/n/handbook/A1237/def-item/app42/">E-value</a>.</p></div><div id="A622"><h2 id="_A622_">BLAST Output: 3. Structured Output</h2><p>There are drawbacks to parsing both the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> report and even the simpler hit table. There is no way to automatically check for truncated or otherwise corrupted output in cases when a large number of sequences are being screened. (This may happen if the disk is full, for example.) Also, there is no rigorous check for syntax changes in the output, such as the addition of new features, which can lead to erroneous parsing. Structured output allows for automatic and rigorous checks for syntax errors and changes. Both <a href="/books/n/handbook/A1237/#app198">XML</a> and <a href="/books/n/handbook/A1237/#app5">ASN</a>.1 are examples of structured output in which there are built-in checks for correct and complete syntax and structure. (In the case of <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a>, for example, this is ensured by the necessity for matching tags and the <a class="def" href="/books/n/handbook/A1237/def-item/app40/">DTD</a>.) For text reports, there is often no specification, but perhaps a (incomplete) description of the file is written afterward.</p><div id="A623"><h3>ASN.1 Is Used by the BLAST Server</h3><p>As well as the hit table and traditional report shown in <a class="def" href="/books/n/handbook/A1237/def-item/app75/">HTML</a>, <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> results can also be formatted in plain text, <a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a>, and <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> (<a class="figpopup" href="/books/NBK21097/figure/A624/?report=objectonly" target="object" rid-figpopup="figA624" rid-ob="figobA624">Figure 7</a>), and what's more, the format for a given BLAST result can be changed without re-executing the search.
</p><div class="iconblock whole_rhythm clearfix ten_col fig" id="figA624" co-legend-rid="figlgndA624"><a href="/books/NBK21097/figure/A624/?report=objectonly" target="object" title="Figure 7" class="img_link icnblk_img figpopup" rid-figpopup="figA624" rid-ob="figobA624"><img class="small-thumb" src="/books/NBK21097/bin/ch16f7.gif" src-large="/books/NBK21097/bin/ch16f7.jpg" alt="Figure 7. The different output formats that can be produced from ASN.1." /></a><div class="icnblk_cntnt" id="figlgndA624"><h4 id="A624"><a href="/books/NBK21097/figure/A624/?report=objectonly" target="object" rid-ob="figobA624">Figure 7</a></h4><p class="float-caption no_bottom_margin">The different output formats that can be produced from ASN.1. Note that some nodes can be viewed as both HTML and text. XML is also structured output but can be produced from ASN.1 because it has equivalent information. </p></div></div><p>A change in <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> format without re-executing the search is possible because when a scientist looks at a Web page of BLAST results at <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>, the <a class="def" href="/books/n/handbook/A1237/def-item/app75/">HTML</a> that makes that page has been created from <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> (<a class="figpopup" href="/books/NBK21097/figure/A624/?report=objectonly" target="object" rid-figpopup="figA624" rid-ob="figobA624">Figure 7</a>). Although the formatted results are requested from the server, the information about the alignments is fetched from a disk in ASN.1, as are the corresponding sequences from the BLAST databases (see <a class="figpopup" href="/books/NBK21097/figure/A613/?report=objectonly" target="object" rid-figpopup="figA613" rid-ob="figobA613">Figure 1</a>). The formatter on the BLAST server then puts these results together as a BLAST report. The BLAST search itself has been uncoupled from the way the result is formatted, thus allowing different output formats from the same search. The strict internal validation of ASN.1 ensures that these output formats can always be produced reliably.</p></div><div id="A625"><h3>Information about the Alignment Is Contained within a SeqAlign</h3><p>SeqAlign is the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> object that contains the alignment information about the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> search. The SeqAlign does not contain the actual sequence that was found in the match but does contain the start, stop, and <a class="def" href="/books/n/handbook/A1237/def-item/app59/">gap</a> information, as well as scores, E-values, sequence identifiers, and (<a class="def" href="/books/n/handbook/A1237/def-item/app37/">DNA</a>) strand information.</p><p>As mentioned above, the actual database sequences are fetched from the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> databases when needed. This means that an identifier must uniquely identify a sequence in the database. Furthermore, the query sequence cannot have the same identifier as any sequence in the database unless the query sequence itself is in the database. If one is using stand-alone BLAST with a custom database, it is possible to specify that every sequence is uniquely identified by using the <b>&#x02013;O</b> option with formatdb (the program that converts <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> files to BLAST database format). This also indexes the entries by identifier. Similarly, the <b>&#x02013;J</b> option in the (stand-alone) programs blastall, blastpgp, megablast, or rpsblast certifies that the query does not use an identifier already in the database for a different sequence. If the <b>&#x02013;O</b> and <b>&#x02013;J</b> options are not used, BLAST assigns unique identifiers (for that run) to all sequences and shields the user from this knowledge.</p><p>Any <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> database or <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> file from the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> Web site that contains gi numbers already satisfies the uniqueness criterion. Unique identifiers are normally a problem only when custom databases are produced and care is not taken in assigning identifiers. The identifier for a FASTA entry is the first token (meaning the letters up to the first space) after the &#x0003e; sign on the <a class="def" href="/books/n/handbook/A1237/def-item/app36/">definition line</a>. The simplest case is to simply have a unique token (e.g., 1, 2, and so on), but it is possible to construct more complicated identifiers that might, for example, describe the data source. For the FASTA identifiers to be reliably parsed, it is necessary for them to follow a specific syntax (see Appendix 1).</p><p>More information on the SeqAlign produced by <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> can be found <a href="ftp://ftp.ncbi.nih.gov/blast/demo/ieee_talk.pdf" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">here</a> or be downloaded as a <a href="ftp://ftp.ncbi.nih.gov/blast/demo/blast_programming.ppt" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=ftp">PowerPoint presentation</a>, as well as from the <a class="def" href="/books/n/handbook/A1237/def-item/app117/">NCBI Toolkit</a> Software Developer's <a href="/IEB/ToolBox/SDKDOCS/INDEX.HTML" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">handbook</a>.
</p></div><div id="A626"><h3>XML</h3><p><a class="def" href="/books/n/handbook/A1237/def-item/app198/">XML</a> and <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a> are both structured languages and can express the same information; therefore, it is possible to produce a SeqAlign in XML. Some users do not find the format of the information in the SeqAlign to be convenient because it does not contain actual sequence information, and when the sequence is fetched from the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> database, it is packed two or four bases per byte. Typically, these users are familiar with the BLAST report and want something similar but in a format that can be parsed reliably. The XML produced by BLAST meets this need, containing the query and database sequences, sequence definition lines, the start and stop points of the alignments (one offset), as well as scores, E-values, and percent identity. There is a public <a href="/data_specs/dtd/NCBI_BlastOutput.dtd" ref="pagearea=body&amp;targetsite=external&amp;targetcat=link&amp;targettype=uri">DTD</a> for this XML output.</p></div></div><div id="A627"><h2 id="_A627_">BLAST Code</h2><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> code is part of the <a class="def" href="/books/n/handbook/A1237/def-item/app117/">NCBI Toolkit</a>, which has many low-level functions to make it platform independent; the Toolkit is supported under Linux and many varieties of <a class="def" href="/books/n/handbook/A1237/def-item/app189/">UNIX</a>, NT, and MacOS. To use the Toolkit, developers should write a function &#x0201c;Main&#x0201d;, which is called by the Toolkit &#x0201c;main&#x0201d;. The BLAST code is contained mostly in the tools directory (see Appendix 2 for an example).</p><p>The <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> code has a modular design. For example, the Application Programming Interface (<a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a>) for retrieval from the BLAST databases is independent of the compute engine. The compute engine is independent from the formatter; therefore, it is possible (as mentioned above) to compute results once but view them in many different modes.</p><div id="A628"><h3>Readdb API</h3><p>The readdb <a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a> can be used to easily extract information from the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> databases. Among the data available are the date the database was produced, the title, the number of letters, number of sequences, and the longest sequence. Also available are the sequence and description of any entry. The latest version of the BLAST databases also contains a <a class="def" href="/books/n/handbook/A1237/def-item/app180/">taxid</a> (an integer specifying some node of the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> taxonomy tree; see <a href="/books/n/handbook/ch4/">Chapter 4</a>). Users are strongly encouraged to use the readdb API rather than reading the files associated with the database, because the the files are subject to change. The API, on the other hand, will support the newest version, and an attempt will be made to support older versions. See Appendix 2 for an example of a simple program (db2fasta.c) that demonstrates the use of the readdb API.</p></div><div id="A629"><h3>Performing a BLAST Search with C Function Calls</h3><p>Only a few function calls are needed to perform a <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> search. Appendix 3 shows an excerpt from a Demonstration Program doblast.c.</p></div><div id="A630"><h3>Formatting a SeqAlign</h3><p>MySeqAlignPrint (called in the example in Appendix 3) is a simple function to print a view of a SeqAlign (see Appendix 4).</p></div></div><div id="A631"><h2 id="_A631_">Appendix 1. FASTA identifiers</h2><p>The syntax of the <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> definition lines used in the <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a> <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> databases depends upon the database from which each sequence was obtained (see <a href="/books/n/handbook/ch1/">Chapter 1</a> on <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a>). <a class="figpopup" href="/books/NBK21097/table/A632/?report=objectonly" target="object" rid-figpopup="figA632" rid-ob="figobA632">Table 1</a> shows how the sequence source databases are identified.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figA632"><a href="/books/NBK21097/table/A632/?report=objectonly" target="object" title="Table 1" class="img_link icnblk_img figpopup" rid-figpopup="figA632" rid-ob="figobA632"><img class="small-thumb" src="/books/NBK21097/table/A632/?report=thumb" src-large="/books/NBK21097/table/A632/?report=previmg" alt="Table 1. Database identifiers in FASTA definition lines." /></a><div class="icnblk_cntnt"><h4 id="A632"><a href="/books/NBK21097/table/A632/?report=objectonly" target="object" rid-ob="figobA632">Table 1</a></h4><p class="float-caption no_bottom_margin">Database identifiers in FASTA definition lines. </p></div></div><p>For example, if the identifier of a sequence in a <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> result is gb|<a href="/nuccore/176485" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">M73307</a>|AGMA13GT, the gb tag indicates that sequence is from <a class="def" href="/books/n/handbook/A1237/def-item/app62/">GenBank</a>, <a href="/nuccore/176485" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=nuccore">M73307</a> is the GenBank <a class="def" href="/books/n/handbook/A1237/def-item/app208/">Accession number</a>, and AGMA13GT is the GenBank <a class="def" href="/books/n/handbook/A1237/def-item/app95/">locus</a>.</p><p>The bar (|) separates different fields. In some cases, a field is left empty, although the original specification called for including this field. To make these identifiers backwards-compatible for older parsers, the empty field is denoted by an additional bar (||).</p><p>A gi identifier has been assigned to each sequence in <a class="def" href="/books/n/handbook/A1237/def-item/app116/">NCBI</a>'s sequence databases. If the sequence is from an NCBI database, then the gi number appears at the beginning of the identifier in a traditional report. For example, gi|16760827|ref|<a href="/protein/16760827/?report=GenPept" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=genpept">NP_456444.1</a> indicates an NCBI reference sequence with the gi number 16760827 and <a class="def" href="/books/n/handbook/A1237/def-item/app208/">Accession number</a> <a href="/protein/16760827/?report=GenPept" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=genpept">NP_456444.1</a>. (In stand-alone <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a>, or when running BLAST from the command line, the <b>&#x02013;I</b> option should be used to display the gi number.)</p><p>The reason for adding the gi identifier is to provide a uniform, stable naming convention. If a nucleotide or protein sequence changes (for example, if it is edited by the original submitter of the sequence), a new gi identifier is assigned, but the <a class="def" href="/books/n/handbook/A1237/def-item/app208/">Accession number</a> of the record remains unchanged. Thus, the gi identifier provides a mechanism for identifying the exact sequence that was used or retrieved in a given search. This is also useful when creating crosslinks between different <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a> databases (<a href="/books/n/handbook/ch15/">Chapter 15</a>).</p></div><div id="A633"><h2 id="_A633_">Appendix 2. Readdb API</h2><p>A simple program (db2fasta.c) that demonstrates the use of the readdb <a class="def" href="/books/n/handbook/A1237/def-item/app4/">API</a>.</p><p>
<pre>
Int2 Main (void)
{
BioseqPtr bsp;
<a class="def" href="/books/n/handbook/A1237/def-item/app16/">Boolean</a> is_prot;
ReadDBFILEPtr rdfp;
FILE *fp;
Int4 index;
if (! GetArgs ("db2fasta", NUMARG, myargs))
{
return (1);
}
if (myargs[1].intvalue)
is_prot = TRUE;
else
is_prot = FALSE;
fp = FileOpen("stdout", "w");
rdfp = readdb_new(myargs[0].strvalue, is_prot);
index = readdb_acc2fasta(rdfp, myargs[2].strvalue);
bsp = readdb_get_bioseq(rdfp, index);
BioseqRawToFasta(bsp, fp, !is_prot);
bsp = BioseqFree(bsp);
rdfp = readdb_destruct(rdfp);
return 0;
}</pre>
</p><p>Note that:</p>
<dl class="temp-labeled-list"><dt>1.</dt><dd id="A634"><p class="no_top_margin">Readdb_new allocates an object for reading the database.</p></dd><dt>2.</dt><dd id="A635"><p class="no_top_margin">Readdb_acc2fasta fetches the ordinal number (zero offset) of the record given a <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> identifier (e.g., gb|<a href="/protein/13904990/?report=GenPept" class="bk_tag" ref="pagearea=body&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=genpept">AAH06776.1</a>|AAH0676).</p></dd><dt>3.</dt><dd id="A636"><p class="no_top_margin">Readdb_get_bioseq fetches the BioseqPtr (which contains the sequence, description, and identifiers) for this record.</p></dd><dt>4.</dt><dd id="A637"><p class="no_top_margin">BioseqRawToFasta dumps the sequence as <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a>.</p></dd></dl>
<p>Note also that Main is called, rather than &#x0201c;main&#x0201d;, and a call to GetArgs is used to get the command-line arguments. db2fasta.c is contained in the tar archive ftp://ftp.ncbi.nih.gov/blast/demo/blast_demo.tar.gz.</p></div><div id="A638"><h2 id="_A638_">Appendix 3. Excerpt from a demonstration program doblast.c</h2><p>
<pre>
/* Get default options. */
options = BLASTOptionNew(blast_program, TRUE);
if (options == NULL)
return 5;
options-&#x0003e;expect_value = (Nlm_FloatHi) myargs [3].floatvalue;
/* Perform the actual search. */
seqalign = BioseqBlastEngine(query_bsp, blast_program, blast_database, options,
NULL, NULL, NULL);
/* Do something with the SeqAlign... */
MySeqAlignPrint(seqalign, outfp);
/* clean up. */
seqalign = SeqAlignSetFree(seqalign);
options = BLASTOptionDelete(options);
sep = SeqEntryFree(sep);
FileClose(infp);
FileClose(outfp);</pre>
</p><p>The main steps here are:</p>
<dl class="temp-labeled-list"><dt>1.</dt><dd id="A639"><p class="no_top_margin">BLASTOptionNew allocates a BLASTOptionBlk with default values for the specified program (e.g., <a class="def" href="/books/n/handbook/A1237/def-item/app11/">blastp</a>); the <a class="def" href="/books/n/handbook/A1237/def-item/app16/">Boolean</a> argument specifies a gapped search.</p></dd><dt>2.</dt><dd id="A640"><p class="no_top_margin">The expect_value member of the BLASTOptionBlk is changed to a non-default value specified on the command-line.</p></dd><dt>3.</dt><dd id="A641"><p class="no_top_margin">BioseqBlastEngine performs the search of the BioseqPtr (query_bsp). The BioseqPtr could have been obtained from the <a class="def" href="/books/n/handbook/A1237/def-item/app9/">BLAST</a> databases, <a class="def" href="/books/n/handbook/A1237/def-item/app45/">Entrez</a>, or from <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> using the function call FastaToSeqEntry.</p></dd></dl>
<p>The BLASTOptionBlk structure contains a large number of members. The most useful ones and a brief description for each are listed in <a class="figpopup" href="/books/NBK21097/table/A642/?report=objectonly" target="object" rid-figpopup="figA642" rid-ob="figobA642">Table 2</a>.</p><div class="iconblock whole_rhythm clearfix ten_col table-wrap" id="figA642"><a href="/books/NBK21097/table/A642/?report=objectonly" target="object" title="Table 2" class="img_link icnblk_img figpopup" rid-figpopup="figA642" rid-ob="figobA642"><img class="small-thumb" src="/books/NBK21097/table/A642/?report=thumb" src-large="/books/NBK21097/table/A642/?report=previmg" alt="Table 2. The most frequently used BLAST options in the BLASTOptionBlk structure." /></a><div class="icnblk_cntnt"><h4 id="A642"><a href="/books/NBK21097/table/A642/?report=objectonly" target="object" rid-ob="figobA642">Table 2</a></h4><p class="float-caption no_bottom_margin">The most frequently used BLAST options in the BLASTOptionBlk structure. </p></div></div></div><div id="A643"><h2 id="_A643_">Appendix 4. A function to print a view of a SeqAlign: MySeqAlignPrint</h2><p>
<pre>
#define BUFFER_LEN 50
/*
Print a report on hits with start/stop. Zero-offset is used.
*/
static void MySeqAlignPrint(SeqAlignPtr seqalign, FILE *outfp)
{
Char query_id_buf[BUFFER_LEN+1], target_id_buf[BUFFER_LEN+1];
SeqIdPtr query_id, target_id;
while (seqalign)
{
query_id = SeqAlignId(seqalign, 0);
SeqIdWrite(query_id, query_id_buf, PRINTID_FASTA_LONG, BUFFER_LEN);
target_id = SeqAlignId(seqalign, 1);
SeqIdWrite(target_id, target_id_buf, PRINTID_FASTA_LONG, BUFFER_LEN);
fprintf(outfp, "%s:%ld-%ld\t%s:%ld-%ld\n",
query_id_buf, (long) SeqAlignStart(seqalign, 0), (long) SeqAlignStop(seqalign, 0),
target_id_buf, (long) SeqAlignStart(seqalign, 1), (long) SeqAlignStop(seqalign, 1));
seqalign = seqalign-&#x0003e;next;
}
return;
}</pre>
</p><p>Note that:</p><dl class="temp-labeled-list"><dt>1.</dt><dd id="A644"><p class="no_top_margin">SeqAlignId gets the sequence identifier for the zero-th identifier (zero offset). This is actually a C structure.</p></dd><dt>2.</dt><dd id="A645"><p class="no_top_margin">SeqIdWrite formats the information in query_id into a <a class="def" href="/books/n/handbook/A1237/def-item/app53/">FASTA</a> identifier (e.g., gi|129295) and places it into query_buf.</p></dd><dt>3.</dt><dd id="A646"><p class="no_top_margin">SeqAlignStart and SeqAlignStop return the start values of the zero-th and first sequences (or first and second).</p></dd></dl><p>All of this is done by high-level function calls, and it is not necessary to write low-level function calls to parse the <a class="def" href="/books/n/handbook/A1237/def-item/app5/">ASN.1</a>.</p></div><div id="A647"><h2 id="_A647_">References</h2><dl class="temp-labeled-list"><dt>1.</dt><dd><div class="bk_ref" id="A648">Altschul SF , Gish W , Miller W , Myers EW , Lipman DJ . Basic Local Alignment Search Tool. <span><span class="ref-journal">J Mol Biol. </span>1990;<span class="ref-vol">215</span>:403410.</span> [<a href="https://pubmed.ncbi.nlm.nih.gov/2231712" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 2231712</span></a>]</div></dd><dt>2.</dt><dd><div class="bk_ref" id="A649">Altschul SF , Madden TL , Schaffer AA , Zhang J , Zhang Z , Miller W , Lipman DJ . Gapped BLAST and PSI-BLAST: a new generation of protein database search programs. <span><span class="ref-journal">Nucleic Acids Res. </span>1997;<span class="ref-vol">25</span>:33893402.</span> [<a href="/pmc/articles/PMC146917/" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pmc">PMC free article<span class="bk_prnt">: PMC146917</span></a>] [<a href="https://pubmed.ncbi.nlm.nih.gov/9254694" ref="pagearea=cite-ref&amp;targetsite=entrez&amp;targetcat=link&amp;targettype=pubmed">PubMed<span class="bk_prnt">: 9254694</span></a>]</div></dd></dl></div><div id="bk_toc_contnr"></div></div></div>
<div class="post-content"><div><div class="half_rhythm"><a href="/books/about/copyright/">Copyright Notice</a></div><div class="small"><span class="label">Bookshelf ID: NBK21097</span></div><div style="margin-top:2em" class="bk_noprnt"><a class="bk_cntns" href="/books/n/handbook/">Contents</a><div class="pagination bk_noprnt"><a class="active page_link prev" href="/books/n/handbook/ch15/" title="Previous page in this title">&lt; Prev</a><a class="active page_link next" href="/books/n/handbook/ch17/" title="Next page in this title">Next &gt;</a></div></div></div></div>
</div>
<!-- Custom content below content -->
<div class="col4">
</div>
<!-- Book content -->
<!-- Custom contetnt below bottom nav -->
<div class="col5">
</div>
</div>
<div id="rightcolumn" class="four_col col last">
<!-- Custom content above discovery portlets -->
<div class="col6">
<div id="ncbi_share_book"><a href="#" class="ncbi_share" data-ncbi_share_config="popup:false,shorten:true" ref="id=NBK21097&amp;db=books">Share</a></div>
</div>
<div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Views</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="PDF_download" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="/books/NBK21097/?report=reader">PubReader</a></li><li><a href="/books/NBK21097/?report=printable">Print View</a></li><li><a data-jig="ncbidialog" href="#_ncbi_dlg_citbx_NBK21097" data-jigconfig="width:400,modal:true">Cite this Page</a><div id="_ncbi_dlg_citbx_NBK21097" style="display:none" title="Cite this Page"><div class="bk_tt">Madden T. The BLAST Sequence Analysis Tool. 2002 Oct 9 [Updated 2003 Aug 13]. In: McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. Chapter 16.<span class="bk_cite_avail"></span></div></div></li><li><a href="/books/NBK21097/pdf/Bookshelf_NBK21097.pdf">PDF version of this page</a> (257K)</li><li><a href="/books/n/handbook/pdf/">PDF version of this title</a> (7.2M)</li><li><a href="#" class="toggle-glossary-link" title="Enable/disable links to the glossary">Disable Glossary Links</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>In this Page</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="page-toc" id="Shutter"></a></div><div class="portlet_content"><ul xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" class="simple-list"><li><a href="#_abs_rndgid_" ref="log$=inpage&amp;link_id=inpage">Summary</a></li><li><a href="#A611" ref="log$=inpage&amp;link_id=inpage">Introduction</a></li><li><a href="#A612" ref="log$=inpage&amp;link_id=inpage">How BLAST Works: The Basics</a></li><li><a href="#A614" ref="log$=inpage&amp;link_id=inpage">BLAST Scores and Statistics</a></li><li><a href="#A615" ref="log$=inpage&amp;link_id=inpage">BLAST Output: 1. The Traditional Report</a></li><li><a href="#A620" ref="log$=inpage&amp;link_id=inpage">BLAST Output: 2. The Hit Table</a></li><li><a href="#A622" ref="log$=inpage&amp;link_id=inpage">BLAST Output: 3. Structured Output</a></li><li><a href="#A627" ref="log$=inpage&amp;link_id=inpage">BLAST Code</a></li><li><a href="#A631" ref="log$=inpage&amp;link_id=inpage">Appendix 1. FASTA identifiers</a></li><li><a href="#A633" ref="log$=inpage&amp;link_id=inpage">Appendix 2. Readdb API</a></li><li><a href="#A638" ref="log$=inpage&amp;link_id=inpage">Appendix 3. Excerpt from a demonstration program doblast.c</a></li><li><a href="#A643" ref="log$=inpage&amp;link_id=inpage">Appendix 4. A function to print a view of a SeqAlign: MySeqAlignPrint</a></li><li><a href="#A647" ref="log$=inpage&amp;link_id=inpage">References</a></li></ul></div></div><div class="portlet"><div class="portlet_head"><div class="portlet_title"><h3><span>Recent Activity</span></h3></div><a name="Shutter" sid="1" href="#" class="portlet_shutter" title="Show/hide content" remembercollapsed="true" pgsec_name="recent_activity" id="Shutter"></a></div><div class="portlet_content"><div xmlns:np="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" id="HTDisplay" class=""><div class="action"><a href="javascript:historyDisplayState('ClearHT')">Clear</a><a href="javascript:historyDisplayState('HTOff')" class="HTOn">Turn Off</a><a href="javascript:historyDisplayState('HTOn')" class="HTOff">Turn On</a></div><ul id="activity"><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=1" href="/portal/utils/pageresolver.fcgi?recordid=67c806676d1ec11b6f58f4c5">The BLAST Sequence Analysis Tool - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The BLAST Sequence Analysis Tool - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=2" href="/portal/utils/pageresolver.fcgi?recordid=67c80661feee5b00acf92d59">Exercises: Using Map Viewer - The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">Exercises: Using Map Viewer - The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=3" href="/portal/utils/pageresolver.fcgi?recordid=67c8065f6d1ec11b6f58a74f">The NCBI Handbook</a><div class="ralinkpop offscreen_noflow">The NCBI Handbook<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=4" href="/portal/utils/pageresolver.fcgi?recordid=67c80658b70fbb1960fe96b8">NLM Catalog - NLM Catalog Help</a><div class="ralinkpop offscreen_noflow">NLM Catalog - NLM Catalog Help<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li><li class="ra_rcd ralinkpopper two_line"><a class="htb ralinkpopperctrl" ref="log$=activity&amp;linkpos=5" href="/portal/utils/pageresolver.fcgi?recordid=67c80628d5edb449bf3aada7">Medical Genetics Summaries</a><div class="ralinkpop offscreen_noflow">Medical Genetics Summaries<div class="brieflinkpopdesc"></div></div><div class="tertiary"></div></li></ul><p class="HTOn">Your browsing activity is empty.</p><p class="HTOff">Activity recording is turned off.</p><p id="turnOn" class="HTOff"><a href="javascript:historyDisplayState('HTOn')">Turn recording back on</a></p><a class="seemore" href="/sites/myncbi/recentactivity">See more...</a></div></div></div>
<!-- Custom content below discovery portlets -->
<div class="col7">
</div>
</div>
</div>
<!-- Custom content after all -->
<div class="col8">
</div>
<div class="col9">
</div>
<script type="text/javascript" src="/corehtml/pmc/js/jquery.scrollTo-1.4.2.js"></script>
<script type="text/javascript">
(function($){
$('.skiplink').each(function(i, item){
var href = $($(item).attr('href'));
href.attr('tabindex', '-1').addClass('skiptarget'); // ensure the target can receive focus
$(item).on('click', function(event){
event.preventDefault();
$.scrollTo(href, 0, {
onAfter: function(){
href.focus();
}
});
});
});
})(jQuery);
</script>
</div>
<div class="bottom">
<div id="NCBIFooter_dynamic">
<!--<component id="Breadcrumbs" label="breadcrumbs"/>
<component id="Breadcrumbs" label="helpdesk"/>-->
</div>
<div class="footer" id="footer">
<section class="icon-section">
<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
<div class="grid-container container">
<div class="icon-section_container">
<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11 {
fill: #737373;
}
</style>
</defs>
<title>Twitter</title>
<path class="cls-11" d="M250.11,105.48c-7,3.14-13,3.25-19.27.14,8.12-4.86,8.49-8.27,11.43-17.46a78.8,78.8,0,0,1-25,9.55,39.35,39.35,0,0,0-67,35.85,111.6,111.6,0,0,1-81-41.08A39.37,39.37,0,0,0,81.47,145a39.08,39.08,0,0,1-17.8-4.92c0,.17,0,.33,0,.5a39.32,39.32,0,0,0,31.53,38.54,39.26,39.26,0,0,1-17.75.68,39.37,39.37,0,0,0,36.72,27.3A79.07,79.07,0,0,1,56,223.34,111.31,111.31,0,0,0,116.22,241c72.3,0,111.83-59.9,111.83-111.84,0-1.71,0-3.4-.1-5.09C235.62,118.54,244.84,113.37,250.11,105.48Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>Facebook</title>
<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<title>LinkedIn</title>
<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
</path>
</svg></a>
<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
<defs>
<style>
.cls-11,
.cls-12 {
fill: #737373;
}
.cls-11 {
fill-rule: evenodd;
}
</style>
</defs>
<title>GitHub</title>
<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
</path>
<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
</path>
<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
</path>
<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
</path>
<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
</path>
<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
</path>
<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
</svg></a>
<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
<defs><style>.cls-1{fill:#737373;}</style></defs>
<title>NCBI Insights Blog</title>
<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
</svg>
</a>
</div>
</div>
</section>
<section class="container-fluid bg-primary">
<div class="container pt-5">
<div class="row mt-3">
<div class="col-lg-3 col-12">
<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
<ul class="list-inline social_media">
<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
<style type="text/css">
.st20 {
fill: #FFFFFF;
}
.st30 {
fill: none;
stroke: #FFFFFF;
stroke-width: 8;
stroke-miterlimit: 10;
}
</style>
<title>Twitter</title>
<g>
<g>
<g>
<path class="st20" d="M192.9,88.1c-5,2.2-9.2,2.3-13.6,0.1c5.7-3.4,6-5.8,8.1-12.3c-5.4,3.2-11.4,5.5-17.6,6.7 c-10.5-11.2-28.1-11.7-39.2-1.2c-7.2,6.8-10.2,16.9-8,26.5c-22.3-1.1-43.1-11.7-57.2-29C58,91.6,61.8,107.9,74,116 c-4.4-0.1-8.7-1.3-12.6-3.4c0,0.1,0,0.2,0,0.4c0,13.2,9.3,24.6,22.3,27.2c-4.1,1.1-8.4,1.3-12.5,0.5c3.6,11.3,14,19,25.9,19.3 c-11.6,9.1-26.4,13.2-41.1,11.5c12.7,8.1,27.4,12.5,42.5,12.5c51,0,78.9-42.2,78.9-78.9c0-1.2,0-2.4-0.1-3.6 C182.7,97.4,189.2,93.7,192.9,88.1z"></path>
</g>
</g>
<circle class="st30" cx="124.4" cy="128.8" r="108.2"></circle>
</g>
</svg></a></li>
<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
<style type="text/css">
.st10 {
fill: #FFFFFF;
}
.st110 {
fill: none;
stroke: #FFFFFF;
stroke-width: 8;
stroke-miterlimit: 10;
}
</style>
<title>Facebook</title>
<g>
<g>
<path class="st10" d="M159,99.1h-24V88.4c0-5,3.3-6.2,5.7-6.2h16.8V60l-24.4-0.1c-22.1,0-26.2,16.5-26.2,27.1v12.1H90v22.5h16.9 v67.5H135v-67.5h21.7L159,99.1z"></path>
</g>
</g>
<circle class="st110" cx="123.6" cy="123.2" r="108.2"></circle>
</svg>
</a></li>
<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer"><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" x="0px" y="0px" viewBox="0 0 249 249" style="enable-background:new 0 0 249 249;" xml:space="preserve">
<title>Youtube</title>
<style type="text/css">
.st4 {
fill: none;
stroke: #FFFFFF;
stroke-width: 8;
stroke-miterlimit: 10;
}
.st5 {
fill: #FFFFFF;
}
</style>
<circle class="st4" cx="124.2" cy="123.4" r="108.2"></circle>
<g transform="translate(0,-952.36218)">
<path class="st5" d="M88.4,1037.4c-10.4,0-18.7,8.3-18.7,18.7v40.1c0,10.4,8.3,18.7,18.7,18.7h72.1c10.4,0,18.7-8.3,18.7-18.7 v-40.1c0-10.4-8.3-18.7-18.7-18.7H88.4z M115.2,1058.8l29.4,17.4l-29.4,17.4V1058.8z"></path>
</g>
</svg></a></li>
</ul>
</div>
<div class="col-lg-3 col-12">
<p class="address_footer text-white">National Library of Medicine<br />
<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
Bethesda, MD 20894</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
</div>
<div class="col-lg-3 col-12 centered-lg">
<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
</div>
</div>
<div class="row">
<div class="col-lg-12 centered-lg">
<nav class="bottom-links">
<ul class="mt-3">
<li>
<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
</li>
<li>
<a class="text-white" href="https://www.nih.gov/">NIH</a>
</li>
<li>
<a class="text-white" href="https://www.hhs.gov/">HHS</a>
</li>
<li>
<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
</li>
</ul>
</nav>
</div>
</div>
</div>
</section>
<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
</div>
</div>
<!--/.page-->
</div>
<!--/.wrap-->
</div><!-- /.twelve_col -->
</div>
<!-- /.grid -->
<span class="PAFAppResources"></span>
<!-- BESelector tab -->
<noscript><img alt="statistics" src="/stat?jsdisabled=true&amp;ncbi_db=books&amp;ncbi_pdid=book-part&amp;ncbi_acc=NBK21097&amp;ncbi_domain=handbook&amp;ncbi_report=record&amp;ncbi_type=fulltext&amp;ncbi_objectid=&amp;ncbi_pcid=/NBK21097/&amp;ncbi_pagename=The BLAST Sequence Analysis Tool - The NCBI Handbook - NCBI Bookshelf&amp;ncbi_bookparttype=chapter&amp;ncbi_app=bookshelf" /></noscript>
<!-- usually for JS scripts at page bottom -->
<!--<component id="PageFixtures" label="styles"></component>-->
<!-- CE8B5AF87C7FFCB1_0191SID /projects/books/PBooks@9.11 portal104 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>
<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4216699/js/3879255/4121861/3501987/4008961/3893018/3821238/4062932/4209313/4212053/4076480/3921943/3400083/3426610.js" snapshot="books"></script></body>
</html>