4178 lines
302 KiB
HTML
4178 lines
302 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
|
<!-- PubChem template version 5.1 2012-05 -->
|
|
<!-- $Id: about.html 367034 2012-06-20 14:44:12Z jiazhang $ -->
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head xmlns:xi="http://www.w3.org/2001/XInclude">
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
|
|
<meta name="description" content="Help document for SPARCLE, the Subfamily Protein Architecture Labeling Engine, a resource of the National Center for Biotechnology Information, Structure Group" />
|
|
<meta name="ncbi_db" content="pubchem" />
|
|
<meta name="keywords" content="SPARCLE, Subfamily Protein Architecture Labeling Engine, Conserved Domains Database (CDD), CD-Search, RPS-BLAST, proteins, protein sequences, classification, families, superfamilies, superfamily, protein function, homologous, homologs, orthologs, multiple sequence alignment, protein sequence analysis and annotation, CDTree, CD Tree, Conserved Domain Architecture Retrieval Tool (CDART), Molecular Modeling Database (MMDB), Cn3D structure viewing software, 3-D, three-dimensional structures, macromolecular structures, x-ray crystallography, nuclear magnetic resonance (NMR) spectroscopy, nucleic acids, nucleotide sequences, DNA, RNA, ligands, bound chemicals, drugs, medicine, NCBI, National Center for Biotechnology Information" />
|
|
<meta name="robots" content="index,follow,noarchive" />
|
|
<title>SPARCLE Help Document</title>
|
|
<script type="text/javascript" src="/Structure/scripts/loadbasics.min.js"></script>
|
|
<script type="text/javascript" src="/Structure/scripts/doc_tmp.min.js"></script>
|
|
<link rel="stylesheet" type="text/css" href="/Structure/stylesheets/struct_style_subset.min.css">
|
|
<!-- ========== BEGIN_GOVT_SHUTDOWN_NOTICE_added_20130927 =========== -->
|
|
|
|
<script type="text/javascript">
|
|
jQuery.getScript("/core/alerts/alerts.js", function () {
|
|
galert(['div#governmentshutdown', 'body > *:nth-child(1)'])
|
|
});
|
|
</script>
|
|
|
|
<!-- ========== END_GOVT_SHUTDOWN_NOTICE_added_20130927 =========== -->
|
|
</head>
|
|
|
|
<body>
|
|
<A NAME="Top"></A>
|
|
<A NAME="top"></A>
|
|
|
|
<!-- ======= BEGIN_GET_PUBCHEM_HEADER_AND_NCBILOCALNAV_PULLDOWN_MENUS ========= -->
|
|
<script type="text/javascript">doc_tmp.getDocHdr('sparcle');</script>
|
|
<!-- ======= END_GET_PUBCHEM_HEADER_AND_NCBILOCALNAV_PULLDOWN_MENUS ========= -->
|
|
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell MiniText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
|
|
<!-- ##################### BEGIN_EDITABLE_CONTENT ##################### -->
|
|
<A NAME="3"></A>
|
|
|
|
|
|
<!-- ######### BEGIN_BLUE_EDGE_BOX_WITH_TITLE_OF_DOCUMENT_AND_TOC ######## -->
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" class-"NormalText" width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#FFFFFF">
|
|
<TR>
|
|
<TD class="WhiteCell">
|
|
|
|
|
|
<!-- ==================== TITLE_OF_DOCUMENT =================== -->
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#FFFFFF">
|
|
<TR>
|
|
|
|
<TD class="DocTitleText" align="left"> <A HREF="/sparcle">SPARCLE</A> Help</TD>
|
|
|
|
<!-- ===================== GREEN_BUTTONS ====================== -->
|
|
|
|
<TD width="500" bgcolor="#FFFFFF">
|
|
|
|
<TABLE width="500" border="0" style="border-collapse:separate;" cellspacing="1" cellpadding="0" bgcolor="#FFFFFF">
|
|
<TR>
|
|
|
|
<TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="sparcle_about.html"> ABOUT </A></TD>
|
|
|
|
<TD class="NavResourceButtonActive" align="center" valign="center" style="white-space: nowrap;"><A href="sparcle_help.html"> HELP </A></TD>
|
|
|
|
<!-- TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="sparcle_faq.html"> FAQ </A></TD -->
|
|
|
|
<TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="/Structure/cdd/wrpsb.cgi"> INPUT PROTEIN SEQUENCE </A></TD>
|
|
|
|
<TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="/sparcle"> SEARCH BY TEXT WORD </A></TD>
|
|
|
|
<TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="//ftp.ncbi.nih.gov/pub/mmdb/cdd/sparcle/"> FTP </A></TD>
|
|
|
|
<TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="sparcle_publications.html"> PUBLICATIONS </A></TD>
|
|
|
|
<TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="../../cdd/cdd.shtml"> RESOURCES </A></TD>
|
|
|
|
<TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="../../cdd/docs/cdd_news.html"> NEWS </A></TD>
|
|
|
|
<!-- TD class="NavResourceButtonInactive" align="center" valign="center" style="white-space: nowrap;"><A href="../../structure_discover.html"> DISCOVER </A></TD -->
|
|
|
|
<TD class="WhiteCell" width="1"> </TD>
|
|
<TR>
|
|
</TABLE>
|
|
|
|
</TD>
|
|
|
|
<!-- =================== END_GREEN_BUTTONS ==================== -->
|
|
|
|
<TD class="MiniText" width="5"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- =========== BOXED_SCOPE_NOTE =========== -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#FFFFFF">
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="10" align="center"> </TD>
|
|
|
|
<TD class="WhiteCellBlueEdgeAll NormalText" align="left">
|
|
<SPAN class="MiniText"> <BR></SPAN>
|
|
<P class="indent20bottomspace">This help document describes describes how to use <A HREF="/sparcle"><B>SPARCLE</B>, the <B>S</B>ubfamily <B>P</B>rotein <B>Arc</B>hitecture <B>L</B>abeling <B>E</B>ngine</A>, a resource for protein classification. The <A HREF="../../cdd/cdd.shtml"><B>Conserved Domains resources</B></A> page describes additional, related resources and provides "<A HREF="../../cdd/docs/cdd_how_to.html"><B>How To</B></A>" guides that illustrate how those resources can be used.</P>
|
|
<!-- P class="indent20bottomspace">This help document describes <A HREF="/sparcle"><B>SPARCLE</B>, the <B>S</B>ubfamily <B>P</B>rotein <B>Arc</B>hitecture <B>L</B>abeling <B>E</B>ngine</A>, is a resource for protein classification through the <SPAN style="color:#D70000"><B>functional characterization</B></SPAN> and labeling of protein sequences that have been grouped by their characteristic domain architecture. (<I>Note:</I> A separate resource, the <A HREF="../..//Structure/lexington/docs/cdart_about.html">Conserved Domain Architecture Retrieval Tool (<B>CDART</B>)</A>, finds similar domain architectures that share at least one of the conserved domain superfamily.)</P -->
|
|
</TD>
|
|
|
|
<TD class="WhiteCell NormalText" width="10" align="center"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="10" align="center"> </TD>
|
|
<TD class="WhiteCell NormalText" align="center"> </TD>
|
|
<TD class="WhiteCell NormalText" width="10" align="center"> </TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<!-- =========== END_BOXED_SCOPE_NOTE =========== -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell MiniText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
<!-- ==================== TOC_TABLE_OF_CONTENTS ================= -->
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#FFFFFF">
|
|
<TR>
|
|
|
|
<!-- ================= LEFT_SIDE_TOC ==================== -->
|
|
|
|
<TD class="WhiteCell TOCText" ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
     DETAILED TABLE OF CONTENTS:
|
|
|
|
<A NAME="TOCWhatIs"></A>
|
|
<UL>
|
|
<LI><A HREF="#WhatIs"><B>What is SPARCLE?</B></A></LI>
|
|
<UL>
|
|
<LI><A HREF="#Overview">Overview</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#Architecture"><SPAN style="color:#D70000">What is a conserved domain architecture?</SPAN></A></LI>
|
|
<LI><A HREF="#OverviewTypesOfArchitectures">Types of architectures</A></LI>
|
|
<!-- UL>
|
|
<LI><A HREF="#OverviewTypesOfArchitecturesSuperfamily">Superfamily architectures</A></LI>
|
|
<LI><A HREF="#OverviewTypesOfArchitecturesSubfamily">Subfamily architectures</A></LI>
|
|
</UL -->
|
|
<LI><A HREF="#OverviewArchitecturesWithSingleConservedDomainFootprint">Architectures with single conserved domain footprint</A></LI>
|
|
<LI><A HREF="#OverviewArchitectureID">Each architecture receives a unique and stable architecture ID</A></LI>
|
|
<!-- LI><A HREF="#OverviewAdditionalInfo">Additional information about conserved domains</A></LI -->
|
|
</UL>
|
|
<LI><A HREF="#ExamplesOfUse"><SPAN style="color:#D70000">How can SPARCLE be used to learn more about proteins?</SPAN></A></LI>
|
|
<UL>
|
|
<LI><A HREF="#ExampleOfUseClassifyProtein">Classify a protein based on its conserved domain architecture</A></LI>
|
|
<LI><A HREF="#ExampleOfUseRetrieveByKeyword">Retrieve conserved domain architectures whose descriptions contain the keywords you specify</A></LI>
|
|
<LI><A HREF="#ExampleOfUseRetrieveProteinsWithSameArchitecture">Retrieve proteins that have the same conserved domain architecture, regardless of the extent of their overall sequence similarity</A></LI>
|
|
<LI><A HREF="#ExampleOfUseInferFunctionOfHypotheticalProtein">Infer the biological function of a hypothetical protein</A></LI>
|
|
</UL>
|
|
<!-- LI><A HREF="#ExamplesOfUse"><SPAN style="color:#D70000"><I>Illustrated example</I></SPAN> of how SPARCLE can be used</A></LI -->
|
|
<LI><A HREF="#Compare">Compare CDD, CDART, and SPARCLE</A></LI>
|
|
</UL>
|
|
</UL>
|
|
|
|
|
|
<A NAME="TOCInput"></A>
|
|
<UL>
|
|
<LI><A HREF="#Input"><B>Input Options</B></A></LI>
|
|
|
|
<UL>
|
|
|
|
<LI><A HREF="#InputSequence"><B>Enter a query sequence into CD-Search</B></A></LI>
|
|
<UL>
|
|
<LI><I><A HREF="#InputSequenceIllustration"><span style="color:#d70000">Illustrated example</span></A></I></LI>
|
|
<LI><A HREF="#InputSequenceFootnote">Note about <I><span style="color:#d70000">ongoing research</span></I></A></LI>
|
|
<LI><A HREF="#CDSearchHelp">CD-Search help document provides additional details</A></LI>
|
|
</UL>
|
|
|
|
<LI><A HREF="#InputKeywords"><B>Search the SPARCLE database by keyword</B></A></LI>
|
|
<UL>
|
|
<LI><I><A HREF="#InputKeywordsIllustration"><span style="color:#d70000">Illustrated example</span></A></I></LI>
|
|
<LI><A HREF="#InputKeywordsScopeOfSearch">Scope of keyword search</A></LI>
|
|
<LI><A HREF="#InputKeywordsSearchTips">Search tips to narrow or broaden search</A></LI>
|
|
<UL>
|
|
<!-- LI><A HREF="#SearchTipsAllFields">All Fields are searched by default</A></LI -->
|
|
<LI><A HREF="#SearchTipsSearchFields">How to limit your query to a specific search field</A></LI>
|
|
<LI><A HREF="#SearchTipsQuotes">How to use quotes to force a phrase search</A></LI>
|
|
<LI><A HREF="#SearchTipsTruncation">How to use an asterisk (*) for truncation</A></LI>
|
|
<LI><A HREF="#SearchTipsCompare">Compare some sample search strategies</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SearchFields">Search fields</A></LI>
|
|
<!-- UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL -->
|
|
</UL>
|
|
|
|
</UL>
|
|
</UL>
|
|
|
|
<A NAME="TOCOutput"></A>
|
|
<UL>
|
|
<LI><A HREF="#Output"><B>Output</B></A></LI>
|
|
|
|
<UL>
|
|
|
|
<LI><A HREF="#OutputSequence">Output from a <B>sequence search</B></A></LI>
|
|
<!-- UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
</UL>
|
|
</UL -->
|
|
|
|
<LI><A HREF="#OutputKeywords">Output from a <B>keyword search</B></A></LI>
|
|
<!-- UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
</UL>
|
|
</UL -->
|
|
|
|
</UL>
|
|
</UL>
|
|
|
|
<A NAME="TOCSummaryPage"></A>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPage"><B>Sample SPARCLE Record<!-- Conserved Domain Architecture --></B></A></LI>
|
|
<UL>
|
|
|
|
<LI><A HREF="#SummaryPageClassificationOfProteins">Classification of proteins by domain architecture</A></LI>
|
|
<LI><A HREF="#SummaryPageDescription">Description of architecture</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageName">Name of architecture</A></LI>
|
|
<LI><A HREF="#SummaryPageLabel">Label (description of function)</A></LI>
|
|
<LI><A HREF="#SummaryPageArchitectureID">Architecture ID</A></LI>
|
|
<LI><A HREF="#SummaryPageVersion">Version</A></LI>
|
|
<LI><A HREF="#SummaryPageDatePublished">Date Published</A></LI>
|
|
<LI><A HREF="#SummaryPageReviewLevel">Review Level</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageSequences">Sequences with this architecture</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageSequencesFolderTabs">Folder tabs</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageSequencesAll">All</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesProteinWithPubMed">Protein with PubMed Reference</A></LI>
|
|
<LI><A HREF="#SummaryPageSequences3DStructure">3D Structure</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesGene">Gene</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesRefSeq">RefSeq</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesSwissProt">Swiss-Prot</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageSequencesFilters">Filters</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersTags">Tags</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersSource">Source</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersOrganism">Organism</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersDescription">Description</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersGeneSymbol">Gene Symbol</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageSequencesEmptySet">Note: Empty Set</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageCuratedNamesAndLabels">Curated Names and Labels</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageTaxonomicScope">Taxonomic Scope</A></LI>
|
|
<LI><A HREF="#SummaryPageAssignedName">Name</A></LI>
|
|
<LI><A HREF="#SummaryPageAssignedLabel">Label</A></LI>
|
|
|
|
|
|
<LI><A HREF="#SummaryPageEvidence">Supporting evidence</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageEvidenceProteins">Protein sequences</A></LI>
|
|
<LI><A HREF="#SummaryPageEvidenceConservedDomains">Conserved domains</A></LI>
|
|
<LI><A HREF="#SummaryPageEvidencePublications">Publications</A></LI>
|
|
<LI><A HREF="#SummaryPageEvidenceOther">Other</A></LI>
|
|
</UL>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageConservedDomains">Conserved domains in this architecture</A></LI>
|
|
<LI><A HREF="#SummaryPageFunctionalSites">Functional sites in this architecture</A></LI>
|
|
<!-- LI><A HREF="#SummaryPage________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
</UL -->
|
|
</UL>
|
|
</UL>
|
|
|
|
<A NAME="TOCDataProcessing"></A>
|
|
<UL>
|
|
<LI><A HREF="#DataProcessing"><B>Data Processing</B></A></LI>
|
|
<UL>
|
|
<LI><A HREF="#DataProcessingOverview">Data processing overview</A></LI>
|
|
<LI><A HREF="#DataProcessingReviewLevel">Three tiers of data:</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#DataProcessingCurated">Curated architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingAutonamed">Autonamed architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingNamedByDomain">NamedByDomain architectures</A></LI>
|
|
<!-- LI><A HREF="#DataProcessingComputed"><B>computed</B> name architectures</A></LI -->
|
|
</UL>
|
|
|
|
<LI><A HREF="#DataProcessingTypesOfArchitectures">Two types of architectures:</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#DataProcessingTypesOfArchitecturesSuperfamily">Superfamily architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingTypesOfArchitecturesSubfamily">Subfamily architectures</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#DataProcessingArchitecturesWithSingleConservedDomainFootprint">Architectures with single conserved domain footprint</A></LI>
|
|
<LI><A HREF="#DataProcessingArchitectureID">Each architecture receives a unique and stable architecture ID</A></LI>
|
|
<LI><A HREF="#DataProcessingOngoingResearch"><span style="color:#d70000">Ongoing Research</span></A></LI>
|
|
<LI><A HREF="#DataProcessingLinks">Links from architectures to other data types</A></LI>
|
|
<!-- UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
</UL>
|
|
</UL -->
|
|
|
|
</UL>
|
|
</UL>
|
|
|
|
|
|
<A NAME="TOCChangeLog"></A>
|
|
<UL>
|
|
<LI><A HREF="#ChangeLog"><B>Log of changes to SPARCLE</B></A></LI>
|
|
</UL>
|
|
|
|
|
|
<A NAME="TOCReferences"></A>
|
|
<UL>
|
|
<LI><A HREF="#References"><B>References</B></A></LI>
|
|
<UL>
|
|
|
|
<LI><A HREF="#Citing">Citing SPARCLE</A></LI>
|
|
<LI><A HREF="#ReferencesAdditional">Additional References</A></LI>
|
|
</UL>
|
|
</UL>
|
|
|
|
<!-- A NAME="TOC__anchorname___"></A>
|
|
<UL>
|
|
<LI><A HREF="#__anchorname___"><B>_____Section_Header_____</B></A></LI>
|
|
<UL>
|
|
|
|
<LI><A HREF="#________"><B>________</B></A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
<LI><A HREF="#________">________</A></LI>
|
|
</UL>
|
|
</UL>
|
|
</UL>
|
|
|
|
</UL>
|
|
</UL -->
|
|
|
|
<!-- ================= END_LEFT_SIDE_TOC ==================== -->
|
|
|
|
<!-- ======= SPACER_COLUMN_TO_RIGHT_OF_TOC_LEFT_SIDE_TEXT ========= -->
|
|
|
|
<TD class="WhiteCell TOCText" width="5"> </TD>
|
|
|
|
|
|
<!-- =============== RIGHT_SIDE_THUMBNAILS ================ -->
|
|
|
|
<TD WIDTH="300" class="WhiteCell ThumbText" ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<!-- BR -->
|
|
|
|
<!-- =============== BRIEF_TABLE_OF_CONTENTS ================ -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
|
|
<TR>
|
|
<TD class="WhiteCell ThumbText2" ALIGN="Left" VALIGN="TOP">
|
|
|
|
<TABLE width="100%" border="0" class="Yellow1CellBlueEdge ThumbText2">
|
|
<TR>
|
|
<TD ALIGN="Center" VALIGN="TOP" class="Yellow1Cell" style="white-space: nowrap;">BRIEF TABLE OF CONTENTS</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<TABLE width="100%" border="0" class="Yellow1CellBlueEdgeBottomAndSides ThumbText2">
|
|
|
|
<TR>
|
|
<TD ALIGN="Left" class="MicroText Yellow1Cell" VALIGN="TOP" colspan="3"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD ALIGN="Left" class="MiniText Yellow1Cell" VALIGN="TOP"> </TD>
|
|
<TD ALIGN="Left" VALIGN="TOP" style="white-space: nowrap;" class="Yellow1Cell">
|
|
<A HREF="#WhatIs">What is SPARCLE?</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#Overview">Overview</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#Architecture"><SPAN style="color:#D70000">What is a conserved domain architecture?</SPAN></A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#OverviewTypesOfArchitectures">Types of architectures</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#ExamplesOfUse"><SPAN style="color:#D70000">How can SPARCLE be used?</SPAN></A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#Compare">Compare CDD, CDART, and SPARCLE</A><BR>
|
|
<!-- img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#ExamplesOfUse"><SPAN style="color:#D70000"><I>Illustrated example</I></SPAN> of use</A><BR -->
|
|
|
|
<A HREF="#Input">Input options</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#InputSequence">Enter a query sequence into CD-Search</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><I><A HREF="#InputSequenceIllustration"><SPAN style="color:#D70000">Illustrated example</SPAN></A></I><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#InputSequenceFootnote">Note about <I><span style="color:#d70000">ongoing research</span></I></A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#CDSearchHelp">CD-Search help</A><BR>
|
|
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#InputKeywords">Search SPARCLE database by keyword</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><I><A HREF="#InputKeywordsIllustration"><SPAN style="color:#D70000">Illustrated example</SPAN></A></I><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#InputKeywordsScopeOfSearch">Scope of search</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#InputKeywordsSearchTips">Search tips</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#SearchFields">Search fields</A><BR>
|
|
|
|
<A HREF="#Output">Output</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#OutputSequence">Sequence search</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#OutputKeywords">Keyword search</A><BR>
|
|
|
|
|
|
<A HREF="#SummaryPage">Sample SPARCLE Record</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#SummaryPageClassificationOfProteins">Classification of proteins by architecture</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#SummaryPageDescription">Description of architecture</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#SummaryPageSequences">Sequences with this architecture</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#SummaryPageCuratedNamesAndLabels">Curated names and labels</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#SummaryPageTaxonomicScope">Taxonomic Scope</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#SummaryPageAssignedName">Name</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#SummaryPageAssignedLabel">Label</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#SummaryPageEvidence">Supporting evidence</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#SummaryPageConservedDomains">Conserved domains in this architecture</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#SummaryPageFunctionalSites">Functional sites in this architecture</A><BR>
|
|
|
|
<A HREF="#DataProcessing">Data Processing</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#DataProcessingOverview">Data processing overview</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#DataProcessingReviewLevel">Three tiers of data:</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#DataProcessingCurated">Curated architectures</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#DataProcessingAutonamed">Autonamed architectures</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#DataProcessingNamedByDomain">NamedByDomain architectures</A><BR>
|
|
<!-- img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#DataProcessingComputed">Computed name architectures</A><BR -->
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#DataProcessingTypesOfArchitectures">Two types of architectures:</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#DataProcessingTypesOfArchitecturesSuperfamily">Superfamily architectures</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="50" height="1" border="0"><A HREF="#DataProcessingTypesOfArchitecturesSubfamily">Subfamily architectures</A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#DataProcessingOngoingResearch"><span style="color:#d70000">Ongoing research</span></A><BR>
|
|
<img SRC="../../IMG/spacer.gif" width="30" height="1" border="0"><A HREF="#DataProcessingLinks">Links from architectures to other data</A><BR>
|
|
|
|
|
|
<A HREF="#ChangeLog">Log of changes to SPARCLE</A><BR>
|
|
|
|
<A HREF="#References">References</A><BR>
|
|
</TD>
|
|
<TD ALIGN="Left" class="MiniText Yellow1Cell" VALIGN="TOP"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD ALIGN="Left" class="MicroText Yellow1Cell" VALIGN="TOP" colspan="3"> </TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<TD class="WhiteCell ThumbText2" ALIGN="Left" VALIGN="TOP" WIDTH="10"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
<BR>
|
|
<BR>
|
|
|
|
<!-- ======= SINGLE_THUMBNAIL_YELLOW_BACKGROUND_BIOSYSTEM_RECORD ========== -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
|
|
<TR>
|
|
<TD class="WhiteCell ThumbText2" ALIGN="Left" VALIGN="TOP">
|
|
|
|
<TABLE width="100%" border="0" class="Yellow1CellBlueEdge ThumbText2">
|
|
<TR>
|
|
<TD ALIGN="Center" VALIGN="TOP" class="Yellow1Cell" style="white-space: nowrap;"><A HREF="#SummaryPage">SAMPLE SPARCLE RECORD</A></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<TABLE width="100%" border="0" class="WhiteCellBlueEdgeBottomAndSides ThumbText2">
|
|
|
|
<TR>
|
|
<TD ALIGN="Left" class="MicroText WhiteCell" VALIGN="TOP"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD ALIGN="CENTER" VALIGN="TOP" style="white-space: nowrap;" class="WhiteCell">
|
|
<A HREF="#SummaryPage"><IMG src="images/DNA_gyrase_NP_387887_SPARCLE_record_thumbnail_MEDIUM.png" width="281" height="367" border=0 align="center" alt="Sample SPARCLE record, showing the name and functional label of the conserved domain architecture found in the protein query sequence, NP_387887, DNA gyrase subunit B from Bacillus subtilis. The SPARCLE record also lists supporting evidence and links to other proteins with the same architecture. Click on the image to read more about SPARCLE records."></A><!-- Click on this graphic to open the SPARCLE record for the domain architecture (architecture ID 10647733) that was found in the protein query sequence, NP_387887, DNA gyrase subunit B from Bacillus subtilis -->
|
|
</TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD ALIGN="Left" class="MicroText WhiteCell" VALIGN="TOP"> </TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<TD class="WhiteCell ThumbText2" ALIGN="Left" VALIGN="TOP" WIDTH="10"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
<BR><BR>
|
|
|
|
</TD>
|
|
|
|
<!-- =============== END_RIGHT_SIDE_THUMBNAILS ================ -->
|
|
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- =========== PAGE_MARGIN_TO_RIGHT_OF_TABLE_OF_CONTENTS =============== -->
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############ END_BLUE_EDGE_BOX_WITH_TITLE_OF_DOCUMENT_AND_TOC ######## -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell NormalText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
|
|
<!-- ########### BEGIN_BLUE_HEADER_SECTION_1 ############# -->
|
|
|
|
<A NAME="WhatIs"></A>
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="SteelBlueCell"><SPAN class="HeaderText1">What is SPARCLE?</SPAN></TD>
|
|
<TD class="SteelBlueCell" WIDTH="15" ALIGN="left" VALIGN="center"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############## END_BLUE_HEADER_SECTION_1 ############ -->
|
|
|
|
<!-- ########## BEGIN_BLUE_EDGE_BOX_WITH_SECTION_1_CONTENTS ########### -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="WhiteCellBlueEdgeAll NormalText">
|
|
|
|
<!-- ============ MINI_TOC_FOR_THIS_SECTION ============== -->
|
|
<BR>
|
|
<BLOCKQUOTE><BLOCKQUOTE>
|
|
|
|
<A HREF="#Overview">overview</A> | <A HREF="#Architecture">what is a conserved domain architecture?</A> | <A HREF="#OverviewTypesOfArchitectures">two types of architectures:</A> <A HREF="#OverviewTypesOfArchitecturesSuperfamily">superfamily architectures</A>, <A HREF="#OverviewTypesOfArchitecturesSubfamily">subfamily architectures</A> | <A HREF="#OverviewArchitecturesWithSingleConservedDomainFootprint">single domain architectures</A> | <A HREF="#OverviewArchitectureID">each architecture receives a unique and stable architecture ID</A> |<BR>
|
|
<A HREF="#ExamplesOfUse">how can SPARCLE be used to learn more about proteins?</A> | <A HREF="#Compare">compare CDD, CDART, and SPARCLE</A>
|
|
|
|
</BLOCKQUOTE></BLOCKQUOTE>
|
|
|
|
<!-- ========== END_MINI_TOC_FOR_THIS_SECTION ============ -->
|
|
|
|
<!-- ================= LEVEL_1_TOPIC_OVERVIEW ==================== -->
|
|
|
|
<A NAME="Overview"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Overview</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<!-- ============ MINI_TOC_FOR_THIS_SECTION ============== -->
|
|
|
|
<!-- BLOCKQUOTE><BLOCKQUOTE>
|
|
|
|
<A HREF="#Architecture">what is a conserved domain architecture?</A> | <A HREF="#OverviewTypesOfArchitectures">types of architectures</A> (<A HREF="#OverviewTypesOfArchitecturesSuperfamily">superfamily architectures</A>, <A HREF="#OverviewTypesOfArchitecturesSubfamily">subfamily architectures</A>) | <A HREF="#OverviewArchitecturesWithSingleConservedDomainFootprint">architectures with single conserved domain footprint</A> | <A HREF="#OverviewArchitectureID">each architecture receives a unique and stable architecture ID</A>
|
|
|
|
</BLOCKQUOTE></BLOCKQUOTE -->
|
|
|
|
<!-- ========== END_MINI_TOC_FOR_THIS_SECTION ============ -->
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<A HREF="/sparcle"><B>SPARCLE</B>, the <B>S</B>ubfamily <B>P</B>rotein <B>Arc</B>hitecture <B>L</B>abeling <B>E</B>ngine</A>, is a resource for the <B>functional characterization</B> and labeling of <B>protein sequences</B> that have been grouped by their characteristic <!-- A HREF="../../lexington/docs/cdart_help.html#WhatIs" -->conserved domain architecture<!-- /A -->.<BR><BR>
|
|
|
|
<SPAN style="background-color: #FFFF00">
|
|
<A NAME="WhatIsArchitecture"></A>
|
|
<A NAME="DefineArchitecture"></A>
|
|
<A NAME="ConservedDomainArchitecture"></A>
|
|
<A NAME="Architecture"></A>
|
|
A <B>conserved domain architecture</B> is defined as the sequential order of <A HREF="../../cdd/cdd_help.shtml#CDWhat"><B>conserved domains</B></A> in a protein sequence.</SPAN><BR><BR>
|
|
|
|
To given an <B>example</B> of proteins that have similar function but different domain architectures:<BR>
|
|
|
|
<UL>
|
|
<LI><A HREF="/protein/NP_387887">DNA gyrase B (NP_387887)</A>, an antibiotic target, has a <A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074">conserved domain architecture</A> that includes a histidine kinase-like ATPase domain, a transducer domain, a topoisomerase-primase domain, followed by a type II topoisomerase carboxy domain<!-- (labeled as a DNA gyrase B domain by Pfam) -->.</LI><BR>
|
|
|
|
<LI>In contrast, enzymes of similar function, <!-- like the type II topoisomerase known as --> such as <A HREF="/protein/Q45066">topoisomerase IV (Q45066)</A>, have a different <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=Q45066">conserved domain architecture</A>.<BR><BR>
|
|
|
|
<I><B>Note</B>: In each of the examples above, the default <B>graphical summary</B> that appears when you click on the "conserved domain architecture" link depicts the full length protein model. Click on the <A HREF="../../cdd/cdd_help.shtml#GlobalOptions"><B>display option</B></A> to "<B>View: Full Results</B>" link in the upper right hand corner of the display to see the individual conserved domains that compose the full length protein model. (The <A HREF="../../cdd_help.shtml#TOC_CDSearch">CD-Search help document</A> provides additional information about the features and options on the <A HREF="../../cdd_help.shtml#RPSBResults">search results display</A>.)<BR><BR>
|
|
|
|
Regardless of which display option you use, the <A HREF="../../cdd_help.shtml#ProteinClassification"><B>"Protein Classification" section</B></A> that appears above the graphical summary includes a "domain <A HREF="#ArchitectureID"><B>architecture ID</B></A> xxxxxx" link, which opens the corresponding SPARCLE record.<BR><BR>
|
|
|
|
The <B>SPARCLE record</B> (<A HREF="#SummaryPage">illustrated example</A>), also referred to as the conserved domain architecture's "summary page," shows the architecture's <A HREF="#Name"><B>name</B></A> and functional <A HREF="#Label"><B>label</B></A> (description) of the domain architecture, the supporting <A HREF="#Evidence"><B>evidence</B></A> that was used to name the architecture, as well as links to <A HREF="#SequencesWithThisArchitecture"><B>other protein sequences with the same architecture</B></A> and to the individual <A HREF="#ConservedDomainsInThisArchitecture"><B>conserved domains that are in the architecture</B></A>.</I></LI><BR>
|
|
|
|
<!-- Also, in each example, you can follow the "<A HREF="#ArchitectureID">domain architecture ID</A> xxxxxxx" link that appears in the "<B>Protein Classification</B>" section of the display to open the corresponding <A HREF="#SummaryPage">SPARCLE record</A>. The SPARCLE record, in turn, lists the <A HREF="#Evidence">evidence</A> that was used to name the architecture and contains links to other <A HREF="#SummaryPageSequences">protein sequences that have the same architecture</A>. -->
|
|
</LI>
|
|
</UL>
|
|
|
|
<!-- =========== OVERVIEW_TYPES_OF_ARCHITECTURES ========= -->
|
|
|
|
<A NAME="OverviewTypesOfArchitectures"></A>
|
|
<A NAME="OverviewArchitectureTypes"></A>
|
|
<A NAME="OverviewArchitectureType"></A>
|
|
|
|
<B>There are two types of conserved domain architectures</B>:<BR>
|
|
|
|
<UL>
|
|
|
|
<!-- ========== OVERVIEW_SUPERFAMILY_ARCHITECTURES ========= -->
|
|
|
|
<LI><A NAME="OverviewTypesOfArchitecturesSuperfamily"></A><A NAME="OverviewSuperfamilyArchitectures"></A><A NAME="OverviewSuperfamilyArchitecture"></A><B>Superfamily architectures</B><BR><BR>
|
|
|
|
Superfamily architectures consist solely of conserved domain <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamilies</A>. This infers a <B>general functional category</B> for the proteins which have that architecture.<BR><BR>
|
|
|
|
<A HREF="#DataProcessingTypesOfArchitecturesSuperfamily">Additional details about superfamily architectures</A> are provided in the <A HREF="#DataProcessing">data processing</A> section of this document.<BR><BR>
|
|
|
|
<I>Note: Superfamily architectures are currently found only the <A HREF="/Structure/lexington/lexington.cgi">CDART</A> resource. A <A HREF="#CompareCDART">brief description of CDART</A> is provided in the "<A HREF="#Compare">Compare CDD, CDART, and SPARCLE</A>" section of this document.</I>
|
|
</LI><BR>
|
|
|
|
<!-- ======== END_OVERVIEW_SUPERFAMILY_ARCHITECTURES ======= -->
|
|
|
|
<!-- ========= OVERVIEW_SUBFAMILY_ARCHITECTURES ========== -->
|
|
|
|
<LI><A NAME="OverviewTypesOfArchitecturesSubfamily"></A><A NAME="OverviewSubfamilyArchitectures"></A><A NAME="OverviewSubfamilyArchitecture"></A><B>Subfamily architectures</B><BR><BR>
|
|
|
|
Subfamily architectures <B>either</B> contain a <B>mix</B> of conserved domain <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamilies</A> and <A HREF="../../cdd/cdd_help.shtml#Hierarchy">subfamilies</A>, <B>or</B> consist <B>solely</B> of conserved domain subfamilies.<BR><BR>
|
|
|
|
A subfamily is represented by a conserved domain model that gets a <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hit</a> to the protein query sequence. The <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</a> represent a high confidence that the query sequence belongs to the same protein family as the sequences used to create each conserved domain model, and therefore a <B>high confidence level</B> for the inferred function of the protein query sequence.<BR><BR>
|
|
|
|
To see if a conserved domain is a superfamily or subfamily, mouse over a conserved domain's footprint in the architecture's graphical display. A <B>superfamily</B> will have a <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl"><B>"cl" prefix</B></A> in the accession number; the "cl" stands for <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamily</A> <B>cl</B>uster. A <B>subfamily</B> will have an <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number <B>prefix other than "cl"</B></A>.<BR><BR>
|
|
|
|
<!-- Conserved domain models that represent superfamilies verus subfamilies can be distinguished by their <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number prefix</A>. Superfamilies have a <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl"><B>"cl" prefix</B></A> (which stands for <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamily</A> <B>cl</B>uster), while subfamilies have an accession number prefix that is <B>anything other than "cl"</B>.<BR><BR -->
|
|
|
|
<A HREF="#DataProcessingTypesOfArchitecturesSubfamily">Additional details about subfamily architectures</A> are provided in the <A HREF="#DataProcessing">data processing</A> section of this document.<BR><BR>
|
|
|
|
<I>Note: Subfamily architectures are currently found only the <A HREF="/sparcle">SPARCLE</A> resource. A <A HREF="#CompareSPARCLE">brief description of SPARCLE</A> is provided in the <A HREF="#Compare">Compare CDD, CDART, and SPARCLE</A> section of this document.</I>
|
|
</LI><BR>
|
|
|
|
<!-- ======= END_OVERVIEW_SUBFAMILY_ARCHITECTURES ======== -->
|
|
|
|
</UL>
|
|
|
|
<!-- ========== END_OVERVIEW_TYPES_OF_ARCHITECTURES ============ -->
|
|
|
|
<!-- ========== OVERVIEW_SINGLE_DOMAIN_ARCHITECTURES ============= -->
|
|
|
|
<A NAME="OverviewArchitecturesWithSingleConservedDomainFootprint"></A><A NAME="ArchitecturesWithSingleConservedDomainFootprint"></A><A NAME="SingleConservedDomainArchitectures"></A><A NAME="SingleConservedDomainArchitecture"></A><A NAME="SingleDomainArchitectures"></A><A NAME="SingleDomainArchitecture"></A>
|
|
|
|
<B>Architectures with single conserved domain footprint</B>:<BR>
|
|
|
|
<UL>
|
|
<LI>It is also possible for a domain architecture to consist of a <B>single conserved domain footprint</B>. That footprint can represent either a superfamily architecture or a subfamily architecture.</LI>
|
|
</UL><BR>
|
|
|
|
<!-- ============ END_SINGLE_DOMAIN_ARCHITECTURES =========== -->
|
|
|
|
<!-- ============== OVERVIEW_ARCHITECTURE_ID ============ -->
|
|
|
|
<A NAME="OverviewArchitectureID"></A>
|
|
<B>Each architecture receives a unique and stable architecture ID</B>:<BR>
|
|
|
|
<UL>
|
|
<LI>Each conserved domain architecture receives a unique and stable <A HREF="#ArchitectureID"><B>architecture ID</B></A>, which reflects the set of conserved domain models that are <B>top-scoring hits</B> (as determined by the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> service) on the proteins that possess the architecture, the <B>sequential order</B> of those domains, and the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types"><B>type of hit</B></a> each domain has to the proteins. Architectures that consist of a single conserved domain footprint also receive an architecture ID.</LI><BR>
|
|
</UL>
|
|
|
|
<!-- ============== END_OVERVIEW_ARCHITECTURE_ID ============ -->
|
|
|
|
<!-- ==== OVERVIEW_ADDITIONAL_INFO_ABOUT_CONSERVED_DOMAINS ==== -->
|
|
|
|
<A NAME="OverviewAdditionalInfo"></A>
|
|
<B>Additional information about conserved domains</B>:<BR>
|
|
|
|
<UL>
|
|
<LI>The <A HREF="../../cdd/cdd_help.shtml">Conserved Domain Database (CDD) help document</A> provides additional information about <A HREF="../../cdd/cdd_help.shtml#Hierarchy">domain family hierarchies</A>, including superfamilies and subfamilies. It also provides additional information about the companion <A HREF="../../cdd/cdd_help.shtml#CDSearch_help_contents">CD-Search</A> tool, including the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">hit types</a> displayed in <A HREF="../../cdd/cdd_help.shtml#RPSBResults">CD-Search results</A>, such as <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</a>, <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_non_specific_hit">non-specific hits</a>, the <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily">superfamily(ies)</a> to which those hits belong, and <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_multi_domain">multi-domain models</a>. Each superfamily on a CD-Search results page is represented by a cartoon with a distinct <A HREF="../../cdd/cdd_help.shtml#RPSB_HitColors">color/shape combination</A>, in order to distinguish domains from each other.</LI><BR>
|
|
</UL>
|
|
|
|
<!-- ==== END_OVERVIEW_ADDITIONAL_INFO_ABOUT_CONSERVED_DOMAINS ==== -->
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<BR>
|
|
|
|
<!-- ============== END_LEVEL_1_TOPIC_OVERVIEW =============== -->
|
|
|
|
<!-- ============= LEVEL_1_TOPIC_EXAMPLES_OF_USE ============= -->
|
|
|
|
<A NAME="ExamplesOfUse"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>How can SPARCLE be used to learn more about proteins?</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="ExampleOfUseClassifyProtein"></A>Classify a protein based on its conserved domain architecture</LI><BR>
|
|
|
|
<UL>
|
|
<LI>If you <A HREF="#InputSequence">enter a query sequence into CD-Search</A>, the results page will include a "Protein Classification" section, if the query protein has a hit to a curated domain architecture in the <A HREF="/sparcle">SPARCLE database</A>. (See an <I><A HREF="#InputSequenceIllustration">illustrated example</A></I> that uses <A HREF="/protein/NP_387887">NP_387887</A>, <I>Bacillus subtilis</I> DNA gyrase subunit B, as the protein query sequence.)</LI><BR>
|
|
</UL>
|
|
<BR>
|
|
|
|
|
|
<LI><A NAME="ExampleOfUseRetrieveByKeyword"></A>Retrieve conserved domain architectures whose descriptions contain the keywords you specify</LI><BR>
|
|
|
|
<UL>
|
|
<LI>You can <A HREF="#InputKeyword">search the SPARCLE database by keyword</A> to retrieve conserved domain architectures that contain the term(s) of interest in their descriptions. (See an <I><A HREF="#InputKeywordsIllustration">illustrated example</A></I> that looks for the words <A HREF="/sparcle/?term=chloride+channel+AND+curated%5BReviewLevel%5D">"<I>chloride</I>" and "<I>channel</I>" and limits the results to curated domain architecture records</A> by adding <I>curated[ReviewLevel]</I> to the search.)</LI><BR>
|
|
</UL>
|
|
<BR>
|
|
|
|
<LI><A NAME="ExampleOfUseRetrieveProteinsWithSameArchitecture"></A>Retrieve proteins that have the same conserved domain architecture, regardless of the extent of their overall sequence similarity</LI><BR>
|
|
|
|
<UL>
|
|
<LI>Use either of the search methods described in the <A HREF="#Input">Input Options</A> section of this document to retrieve conserved domain architectures. Then click on an architecture of interest to open its <A HREF="#SummaryPage">summary page</A>. (As an <span style="color:#D70000"><B>example</B></span>, open the <A HREF="/Structure/sparcle/archview.html?archid=10002697">summary page for <B>domain architecture ID 10002697, cytochrome c biogenesis protein DipZ</B></A>.) Scroll to the section of the record labeled "<B>Sequences with the domain architecture</B>." There, you can view all sequences with that architecture or a pre-defined subset. Subsets include protein sequences that have links to corresponding literature references in PubMed, 3D structures, genes, and reference sequence (RefSeq) records. <I>(A separate section of this document provides <A HREF="#SummaryPageSequences">additional details about the "sequences with this architecture"</A> section of a SPARCLE record.)</I>
|
|
<!-- (See an <I><span style="color:#D70000">illustrated example</span></I> of the <A HREF="#SummaryPage">SPARCLE summary page</A> for <A HREF="/Structure/sparcle/archview.html?archid=10647733">domain architecture ID 10647733, DNA gyrase subunit B</A>. Click on the illustration to open the corresponding live SPARCLE record.) --></LI><BR>
|
|
</UL>
|
|
<BR>
|
|
|
|
<LI><A NAME="ExampleOfUseInferFunctionOfHypotheticalProtein"></A>Infer the biological function of a hypothetical protein</LI><BR>
|
|
|
|
<UL>
|
|
<LI>The examples below, from bacterial genome sequencing projects, have been named "hypothetical protein" by the data submitters. SPARCLE can be used infer a rather precise biological function for these proteins with good confidence:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<!-- LI><A HREF="/protein/ERL52576.1"><B>Hypothetical protein BJB45_08465</B></A> [Halomonas huangheensis], Accession.Version: ERL52576.1<BR>
|
|
View the <A HREF="/protein/ERL52576.1?report=fasta"><B>sequence data</B> in FASTA format</A><BR>
|
|
View the <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=ERL52576.1">[<A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=545315399">]<B>CD-Search results</B></A>, which include <B>protein classification</B><BR>
|
|
</LI><BR -->
|
|
|
|
<LI><A HREF="/protein/EAL23604.1"><B>Hypothetical protein CNBA2510</B></A> [Cryptococcus neoformans var. neoformans B-3501A], Accession.Version: EAL23604.1<BR>
|
|
View the <A HREF="/protein/EAL23604.1?report=fasta"><B>sequence data</B> in FASTA format</A><BR>
|
|
View the <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=EAL23604.1"><!-- A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=50260954" --><B>CD-Search results</B></A>, which include <B>protein classification</B><BR>
|
|
</LI><BR>
|
|
|
|
<!-- LI><A HREF="/protein/EKA97624.1"><B>Hypothetical protein HMPREF1310_01886</B></A> [Proteus mirabilis WGLW4], Accession.Version: EKA97624.1<BR>
|
|
View the <A HREF="/protein/EKA97624.1?report=fasta"><B>sequence data</B> in FASTA format</A><BR>
|
|
View the <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=EKA97624.1">[<A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=404597118">]<B>CD-Search results</B></A>, which include <B>protein classification</B><BR>
|
|
</LI><BR -->
|
|
|
|
<LI><A HREF="/protein/EKD69980.1"><B>Hypothetical protein ACD_46C00685G0010</B></A> [uncultured bacterium], Accession.Version: EKD69980.1<BR>
|
|
View the <A HREF="/protein/EKD69980.1?report=fasta"><B>sequence data</B> in FASTA format</A><BR>
|
|
View the <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=EKD69980.1"><!-- A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=406936192" --><B>CD-Search results</B></A>, which include <B>protein classification</B><BR>
|
|
</LI><BR>
|
|
|
|
<LI><A HREF="/protein/BAH05462.1"><B>Hypothetical protein CKR_0411</B></A> [Clostridium kluyveri NBRC 12016], Accession.Version: BAH05462.1<BR>
|
|
View the <A HREF="/protein/BAH05462.1?report=fasta"><B>sequence data</B> in FASTA format</A><BR>
|
|
View the <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=BAH05462.1"><!-- A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=219567478" --><B>CD-Search results</B></A>, which include <B>protein classification</B><BR>
|
|
</LI><BR>
|
|
|
|
<LI><A HREF="/protein/AGU14953.1"><B>Hypothetical protein CARG_04030</B> [Corynebacterium argentoratense DSM 44202], Accession.Version: AGU14953.1</A><BR>
|
|
View the <A HREF="/protein/AGU14953.1?report=fasta"><B>sequence data</B> in FASTA format</A><BR>
|
|
View the <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=AGU14953.1"><!-- A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=533215625" --><B>CD-Search results</B></A>, which include <B>protein classification</B><BR>
|
|
</LI><BR>
|
|
|
|
<!-- LI><A HREF="/protein/_______.1">______________definition_line_and_organism_______________, Accession.Version: _______.1</A><BR>
|
|
View the <A HREF="/protein/_______.1?report=fasta"><B>sequence data</B> in FASTA format</A><BR>
|
|
View the <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=_______.1">[<A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=___gi____">]<B>CD-Search results</B></A>, which include <B>protein classification</B><BR>
|
|
</LI><BR -->
|
|
|
|
</UL>
|
|
</UL>
|
|
<BR>
|
|
|
|
<!-- LI>______________</LI><BR>
|
|
|
|
<UL>
|
|
<LI>______________</LI><BR>
|
|
<LI>______________</LI><BR>
|
|
<UL>
|
|
<LI>______________</LI><BR>
|
|
<LI><SPAN style="color:#D70000">___red_text___</SPAN>, using <A HREF="/protein/NP_387887?report=fasta">NP_387887</A></LI><BR>
|
|
</UL>
|
|
</UL>
|
|
<BR -->
|
|
|
|
</UL>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
|
|
<!-- ============== END_LEVEL_1_TOPIC_EXAMPLES_OF_USE =============== -->
|
|
|
|
<!-- ========= LEVEL_1_TOPIC_COMPARE_CDD_CDART_SPARCLE ========= -->
|
|
|
|
<A NAME="Compare"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Compare CDD, CDART, and SPARCLE</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<!-- The <A HREF="/cdd/">Conserved Domain Database (<B>CDD</B>)</A>, <A HREF="/Structure/lexington/lexington.cgi">Conserved Domain Architecture Retrieval Tool (<B>CDART</B>)</A>, and <A HREF="/sparcle">Subfamily Protein Architecture Labeling Engine (<B>SPARCLE</B>)</A> resources are related to each other, although they differ in scope and function. Below, the <A HREF="#CompareCDD"><B>CDD overview</B></A>, <A HREF="#CompareCDART"><B>CDART overview</B></A>, and <A HREF="#CompareSPARCLE"><B>SPARCLE overview</B></A> describe the resources, how they differ, and provide examples of how each resource can be used.<BR><BR -->
|
|
|
|
What is the association among the <A HREF="#CompareCDD">CDD</A>, <A HREF="#CompareCDART">CDART</A>, and <A HREF="#CompareSPARCLE">SPARCLE</A> resources?<BR>
|
|
How are they related to each other, and how do they differ?<BR>
|
|
For what purpose would you use one versus another?<BR>
|
|
These questions are answered below.<BR><BR>
|
|
|
|
<!-- ========= COMPARE_CDD_OVERVIEW =========== -->
|
|
|
|
<A NAME="CompareCDD"></A>
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="5" bgcolor="#FFFFFF">
|
|
<TR>
|
|
<TD class="Yellow1CellBlueEdge NormalText" align="left">
|
|
<A HREF="/cdd/"><B>Conserved Domain Database</B> (<B>CDD</B>)</A>
|
|
<A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" valign="center" alt="back to top"></A>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- UL>
|
|
|
|
<LI><A HREF="/cdd/"><B>Conserved Domain Database</B> (<B>CDD</B>)</A></LI><BR -->
|
|
|
|
<UL>
|
|
|
|
<LI>The <A HREF="/cdd/">Conserved Domain Database (CDD)</A> is the foundation upon which CDART and SPARCLE are built.</LI><BR>
|
|
|
|
<LI>CDD is a <B>repository of <A HREF="../../cdd/cdd_help.shtml#CDWhat">conserved domain</A> models</B> from a variety of <A HREF="../../cdd/cdd_help.shtml#CDSource">source databases</A>, including <A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">NCBI-curated</A> conserved domain models, which use 3D-structure information to explicitly to define domain boundaries, aligned blocks, and amend alignment details. Sets of conserved domain models that generate overlapping annotation on the same protein sequences are grouped into <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamilies</A>.</LI><BR>
|
|
|
|
<LI>The individual conserved domain models and superfamilies are used by <A HREF="../../cdd/cdd_help.shtml#CDSearchWhat"><B>CD-Search</B></A> (<A HREF="../../cdd/cdd_help.shtml#RPSBWhat">RPS-BLAST</A>) to identify conserved domains in protein sequences, and thereby <B>infer the function of the proteins</B>. Each conserved domain model can fall into one of four <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">types of RPS-BLAST hits</A>, and CD-Search offers a <A HREF="../../cdd/cdd_help.shtml#GlobalOptions">three levels of detail</A> in the search results (<A HREF="../../cdd/cdd_help.shtml#ConciseDisplay">concise</A>, <A HREF="../../cdd/cdd_help.shtml#StandardDisplay">standard</A>, <A HREF="../../cdd/cdd_help.shtml#FullDisplay">full</A> results).</LI><BR>
|
|
|
|
<LI>In addition to being accessible through CD-Search, the conserved domain models in CDD can also be <A HREF="/cdd/">searched by text term</A>.</LI><BR>
|
|
|
|
<LI>Additional details are provided in the <A HREF="../../cdd/cdd_help.shtml">CDD help document</A>, <A HREF="../../cdd/cdd_help.shtml#CDSearch_help_contents">CD-Search help document</A>, and <A HREF="../../cdd/docs/cdd_publications.html">CDD publications</A>.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<!-- /UL -->
|
|
|
|
<!-- ========= END_COMPARE_CDD_OVERVIEW =========== -->
|
|
|
|
<!-- ========= COMPARE_CDD_USES_BOXED_NOTE =========== -->
|
|
|
|
<A NAME="CompareCDDExamplesOfUse"></A>
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="5" bgcolor="#FFFFFF">
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
<TD class="Yellow1CellBlueEdge NormalText" align="left">
|
|
Examples of how CDD can be used and the types of information it displays:
|
|
<!-- A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" valign="center" alt="back to top"></A -->
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
|
|
<TD class="WhiteCellBlueEdgeBottomAndSides NormalText" align="left">
|
|
|
|
<UL>
|
|
|
|
<LI>Retrieve a <A HREF="../../cdd/cdd_help.shtml#CDD_Docsum"><B>list of conserved domain models</B></A> that contain a specific keyword or phrase.</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><B>Example:</B> retrieve domains that have the phrase <A HREF="/cdd?term=%22chloride%20channel%22%5BAll%20Fields%5D"><I>"chloride channel"</I></A> in their description.</LI><BR>
|
|
|
|
<LI>If desired, restrict the search to <A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">NCBI-curated</A> domain models by adding <I>cdd<A HREF="../../cdd/cdd_help.shtml#SearchFieldDatabase">[database]</A></I> to the query. For example, a search for:<BR><A HREF="/cdd?term=%22chloride%20channel%22%5BAll%20Fields%5D"><I>"chloride channel" AND cdd[database]</I></A><BR>
|
|
will retrieve the NCBI-curated domain models that contain the phrase "chloride channel."</LI><BR>
|
|
|
|
<LI><B>Read more:</B> the <A HREF="../../cdd/cdd_help.shtml">CDD help document</A> provides <A HREF="../../cdd/cdd_help.shtml#SearchEntrezCDD">search tips</A>, including details about <A HREF="../../cdd/cdd_help.shtml#SearchTipsInputValues">allowable search terms</A>, examples of basic and advanced <A HREF="../../cdd/cdd_help.shtml#SearchMethods">search methods</A>, a list of available <A HREF="../../cdd/cdd_help.shtml#SearchFields">search fields</A>, tips about <A HREF="../../cdd/cdd_help.shtml#SearchTipsAdditional">use of quotes and truncation</A>, and more.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI>View the <A HREF="../../cdd/cdd_help.shtml#CDVisual"><B>details of a conserved domain model</B></A>, such as its <A HREF="../../cdd/cdd_help.shtml#CDTextSummary">description</A>, <A HREF="../../cdd/cdd_help.shtml#CDAlignmentDisplays">multiple sequence alignment</A>, <A HREF="../../cdd/cdd_help.shtml#ConservedFeatures">conserved features/sites</A>, and corresonding <A HREF="#Include3DStruct">3D structures</A>.</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><B>Example:</B> view the conserved domain summary page for the <A HREF="/Structure/cdd/cddsrv.cgi?uid=cd00400">voltage-gated chloride channel, cd00400</A>.</LI><BR>
|
|
|
|
<LI><B>Read more:</B> the <A HREF="../../cdd/cdd_help.shtml">CDD help document</A> describes the types of information shown on a <A HREF="../../cdd_help.shtml#SummaryPage">summary page</A> for a conserved domain model.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI>Infer the <B>putative function of a <A HREF="./../cdd/cdd_help.shtml#QuerySequence">query protein</A></B> by indentifying its conserved domains.</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><B>Example:</B> <A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=NP_001190924">identify the conserved domains in the Arabisopdis thaliana chloride channel E protein sequence (NP_001190924)</A>.</LI><BR>
|
|
|
|
<LI><B>Read more:</B> the <A HREF="../../cdd/cdd_help.shtml#CDSearch_help_contents">CD-Search help document</A> describes how to use the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search tool</A>, including allowable types of <A HREF="../../cdd/cdd_help.shtml#RPSBInput">input</A> and display controls for the <A HREF="../../cdd/cdd_help.shtml#RPSBResults">output</A>.
|
|
</LI><BR>
|
|
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
</TD>
|
|
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
<TD class="WhiteCell NormalText" align="left"> </TD>
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<!-- ========= END_COMPARE_CDD_USES_BOXED_NOTE =========== -->
|
|
|
|
|
|
<!-- ========= COMPARE_CDART_OVERVIEW =========== -->
|
|
|
|
<A NAME="CompareCDART"></A>
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="5" bgcolor="#FFFFFF">
|
|
<TR>
|
|
<TD class="Yellow1CellBlueEdge NormalText" align="left">
|
|
<A HREF="/Structure/lexington/lexington.cgi"><B>Conserved Domain Architecture Retrieval Tool</B> (<B>CDART</B>)</A>
|
|
<A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" valign="center" alt="back to top"></A>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- UL>
|
|
<LI><A HREF="/Structure/lexington/lexington.cgi"><B>Conserved Domain Architecture Retrieval Tool</B> (<B>CDART</B>)</A></LI><BR -->
|
|
|
|
<UL>
|
|
|
|
<LI>The <A HREF="/Structure/lexington/lexington.cgi">Conserved Domain Architecture Retrieval Tool (CDART)</A> is built upon <A HREF="#CompareCDD">CDD</A>.<!--, and in turn, CDART provides the foundation on which <A HREF="#CompareSPARCLE">SPARCLE</A> is built. -->
|
|
</LI><BR>
|
|
|
|
<LI>CDART is a <B>database of conserved domain architectures</B> and a tool for finding <B>protein similarities across significant evolutionary distances</B> using sensitive domain profiles rather than direct sequence similarity, focusing on the overall conserved domain architecture of the protein rather than on in individual conserved domains.</LI><BR>
|
|
|
|
<LI><A NAME="DomainArchitecture"></A>A <B>domain architecture</B> is defined as the <B>sequential order of </B><A HREF="/Structure/cdd/cdd_help.shtml#CDWhat"><B>conserved domains</B></A> in a protein.<BR>
|
|
CDART uses purely <B>automated</B> techniques to identify the conserved domain architecture of each sequence in the <A HREF="/gquery/">Entrez</A> <A HREF="/protein">Protein database</A>.</LI><BR>
|
|
|
|
<LI>CDART then uses automated methods to <B>identify domain architectures that are similar to each other</B>.<BR>
|
|
A <B>similar domain architecture</B> must include at least one of the conserved domain superfamilies in the query sequence. The <A HREF="/Structure/lexington/docs/cdart_help.html#OutputDomainArchitectureSimilarityScore">similarity score</A> of each domain architecture indicates the number of domain superfamilies in the architecture that match domain superfamilies in the query protein, and is used to rank the search results.</LI><BR>
|
|
|
|
<LI>Through these methods, CDART makes it possible to retrieve all of the protein sequences with a given conserved domain architecture, and to retrieve proteins with similar domain architectures.</LI><BR>
|
|
|
|
<LI>Additional details are provided on the "<A HREF="/Structure/lexington/docs/cdart_about.html">About CDART</A>" page, in the <A HREF="/Structure/lexington/docs/cdart_help.html">CDART Help Document</A>, and in the <A HREF="/Structure/lexington/docs/cdart_publications.html">CDART publication</A>.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<!-- /UL -->
|
|
|
|
<!-- ======== END_COMPARE_CDART_OVERVIEW ========= -->
|
|
|
|
<!-- ========= COMPARE_CDART_USES_BOXED_NOTE =========== -->
|
|
|
|
<A NAME="CompareCDARTExamplesOfUse"></A>
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="5" bgcolor="#FFFFFF">
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
<TD class="Yellow1CellBlueEdge NormalText" align="left">
|
|
Examples of how CDART can be used and the types of information it displays:
|
|
<!-- A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" valign="center" alt="back to top"></A -->
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
|
|
<TD class="WhiteCellBlueEdgeBottomAndSides NormalText" align="left">
|
|
|
|
|
|
<UL>
|
|
|
|
<LI>View the <A HREF="../../lexington/docs/cdart_help.html#WhatIs"><B>conserved domain architecture</B></A> of a query protein, followed by a list of <A HREF="../../lexington/docs/cdart_help.html#OutputList">similar conserved domain architectures</A>:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><B>Example:</B> View the <!-- A HREF="/Structure/lexington/lexington.cgi?cmd=prot&uid=334187188" --><A HREF="/Structure/lexington/lexington.cgi?cmd=prot&uid=NP_001190924">conserved domain architecture for Arabisopdis thaliana chloride channel E (NP_001190924)</A> protein sequence.</LI><BR>
|
|
|
|
<LI><B>Read more:</B> the <A HREF="../../lexington/docs/cdart_help.html">CDART help document</A> describes how to use the tool, including a <A HREF="../../lexington/docs/cdart_help.html#QuickStart">quick start guide</A>, <A HREF="../../lexington/docs/cdart_help.html#Input">input options</A>. and <A HREF="../../lexington/docs/cdart_help.html#Output">output</A>.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
|
|
<LI>View a list of <A HREF="../../lexington/docs/cdart_help.html#OutputList"><B>similar conserved domain architectures</B></A> and retrieve proteins that have an architecture of interest:</LI><BR>
|
|
|
|
<UL>
|
|
<LI><B>Example:</B> Starting with the CDART display for the <!-- A HREF="/Structure/lexington/lexington.cgi?cmd=prot&uid=334187188" --><A HREF="/Structure/lexington/lexington.cgi?cmd=prot&uid=NP_001190924">Arabisopdis thaliana chloride channel E (NP_001190924)</A>, <!-- shows the conserved domain architecture of that protein, followed by a list of --> scroll through the list of <A HREF="../../lexington/docs/cdart_help.html#OutputList">similar conserved domain architectures</A> that appears beneath the query protein's architecture. Click on any architcture of interest to retrieve all sequences from the non-redundant ("<B>nr</B>") protein database that possess the architecture.</LI><BR>
|
|
|
|
<LI><B>Read more:</B> the <A HREF="../../lexington/docs/cdart_help.html">CDART help document</A> describes how to use the tool, and provides details about the list of <A HREF="../../lexington/docs/cdart_help.html#OutputList">similar conserved domain architectures</A> as well as options to <A HREF="/lexington/docs/cdart_help.html#OutputFilters">filter your results</A>.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
</TD>
|
|
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
<TD class="WhiteCell NormalText" align="left"> </TD>
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<!-- ========= END_COMPARE_CDART_USES_BOXED_NOTE =========== -->
|
|
|
|
|
|
<!-- ========= COMPARE_SPARCLE_OVERVIEW =========== -->
|
|
|
|
<A NAME="CompareSPARCLE"></A>
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="5" bgcolor="#FFFFFF">
|
|
<TR>
|
|
<TD class="Yellow1CellBlueEdge NormalText" align="left"><A HREF="/sparcle"><B>Subfamily Protein Architecture Labeling Engine</B> (<B>SPARCLE</B>)</A>
|
|
<A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" valign="center" alt="back to top"></A>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- UL>
|
|
|
|
<LI><A NAME="CompareSPARCLE"></A><A HREF="/sparcle"><B>Subfamily Protein Architecture Labeling Engine</B> (<B>SPARCLE</B>)</A></LI><BR -->
|
|
|
|
<UL>
|
|
|
|
<LI>The <A HREF="/sparcle">Subfamily Protein Architecture Labeling Engine (SPARCLE)</A> is built upon <A HREF="#CompareCDART">CDART</A>.
|
|
</LI><BR>
|
|
|
|
<LI>SPARCLE contains the subset of <B>subset of domain architectures</B> that include at least one conserved domain model that is a <A HREF="/Structure/cdd/cdd_help.shtml#RPSB_hit_type_specific_hit"><B>specific hit</B></A> to at least one protein sequence in the non-redundant ("nr") protein database.
|
|
</LI><BR>
|
|
|
|
<LI>SPARCLE then <B>assigns a</B> <A HREF="#Name"><B>name</B></A> <B>and</B> <A HREF="#Label"><B>label</B></A> (a description of the architecture's biological function) <B>to each conserved domain architecture</B>. As noted in the <A HREF="#DataProcessing">data processing</A> section of this document, names are assigned to the architectures either by a manual <A HREF="#DataProcessingCurated">curation process</A>, or by automated processes that use algorithms to <A HREF="#DataProcessingAutonamed">autoname</A> an architecture, or to <A HREF="#DataProcessingNamedByDomain">name an architecture based on the domains it contains</A>. Each <A HREF="#SummaryPage">SPARCLE record</A> includes a list of the <A HREF="#Evidence">supporting evidence</A> that was used in assigning a name to the architecture.<!-- Curated domain architecture records are supported with, and linked to, <A HREF="#Evidence">evidence</A> from high quality sequence data and literature. -->
|
|
<!-- Whereas CDART identifies the domain architectures that exist in publicly available proteins, using purely automated methods, the SPARCLE resource assigns names and functional labels to the domain architectures, using both manual and automated methods, as described in the <A HREF="#DataProcessing">data processing</A> section of this document. <B>Curated domain architecture records</B> are supported with, and linked to, evidence from high quality sequence data and literature. -->
|
|
<!-- While CDART uses purely automated methods to identify the domain architectures that exist in publicly available proteins, SPARCLE uses both manual and automated methods to assigns names and functional labels to the domain architectures, as described in the <A HREF="#DataProcessing">data processing</A> section of this document.. <B>Curated domain architecture records</B> are supported with, and linked to, evidence from high quality sequence data and literature. -->
|
|
<!-- While CDART includes domain architectures that were identified with purely automated methods, the SPARCLE database includes <B>curated domain architecture records</B> that are supported with, and linked to, evidence from high quality sequence data and literature. The domain architectures are named using the methods described in the <A HREF="#DataProcessing">data processing</A> section of this document.-->
|
|
</LI><BR>
|
|
|
|
<LI>In this way, SPARCLE is used to <B>classify proteins</B>, based on the <B>functional characterization and labeling of protein sequences</B> that have been grouped by their characteristic <!-- A HREF="../../lexington/docs/cdart_help.html#WhatIs" --><A HREF="#DomainArchitecture">conserved domain architecture</A>.</LI><BR>
|
|
|
|
<LI>Additional details are provided on the "<A HREF="/Structure/sparcle/docs/sparcle_about.html">About SPARCLE</A>" page, in this SPARCLE Help Document, and in the <A HREF="sparcle_publicatons.html">SPARCLE publication</A>.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<!-- /UL -->
|
|
|
|
<!-- ========= END_COMPARE_SPARCLE_OVERVIEW =========== -->
|
|
|
|
<!-- ========= COMPARE_SPARCLE_USES_BOXED_NOTE =========== -->
|
|
|
|
<A NAME="CompareSPARCLEExamplesOfUse"></A>
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="5" bgcolor="#FFFFFF">
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
<TD class="Yellow1CellBlueEdge NormalText" align="left">
|
|
Examples of how SPARCLE can be used and the types of information it displays:
|
|
<!-- A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" valign="center" alt="back to top"></A -->
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
|
|
<TD class="WhiteCellBlueEdgeBottomAndSides NormalText" align="left">
|
|
|
|
|
|
<UL>
|
|
|
|
<LI>Find the <A HREF="#InputSequence"><B>protein classification</B> of a query sequence</A>:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><B>Example:</B> Enter the <I>Bacillus subtilis</I> DNA gyrase protein as a query sequence into the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> tool, either as an accession number (NP_387887) or as <A HREF="/protein/NP_387887?report=fasta">FASTA-formatted sequence data for NP_387887</A>. The "Protein Classification" section of the <A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=NP_387887">CD-Search results for NP_387887</A> will show the protein's domain architecture, including a link to the corresponding SPARCLE record.</LI><BR>
|
|
|
|
<LI><B>Read more:</B> A separate section of this document provides <A HREF="#InputSequence">additional details and an illustrated example</A> showing the classfication of a protein query sequence.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
|
|
<LI>Retrieve the <A HREF="#InputKeywords">domain architectures that contain a <B>keyword or phrase of interest</B></A> in their description:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><B>Example:</B> Retrieve domain architectures that contain the terms <A HREF="/sparcle/?term=chloride+channel+AND+curated%5BReviewLevel%5D">"<I>chloride</I>" and "<I>channel</I>", and limits the results to curated domain architecture records</A>.</LI><BR>
|
|
|
|
<LI><B>Read more:</B> A separate section of this document provides <A HREF="#InputKeywords">additional details and an illustrated example</A> showing the retrieval of domain architectures using a keyword search.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI>Retrieve a non-redundant list of <A HREF="#SummaryPage">protein sequences that have a <B>domain architecture of interest</B></A>:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><B>Example:</B> Open the SPARCLE record for <A HREF="/Structure/sparcle/archview.html?archid=10002697">domain architecture ID 10002697</A>, for cytochrome c biogenesis protein DipZ. Scroll down to the blue header, "Sequences with this architecture," which by default shows all <A HREF="#PIG">non-redundant sequences</A> with that architecture. If desired, use the folder tabs in that section to view a pre-defined subset of proteins, such as those from the RefSeq or SwissProt databases, or those which have resolved 3D structures.</LI><BR>
|
|
|
|
<LI><B>Read more:</B> A separate section of this document describes the information and options available on a <A HREF="#SummaryPage">domain architecture summary page</A>.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
</TD>
|
|
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="60" align="left"> </TD>
|
|
<TD class="WhiteCell NormalText" align="left"> </TD>
|
|
<TD class="WhiteCell NormalText" width="50" align="left"> </TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<!-- ========= END_COMPARE_SPARCLE_USES_BOXED_NOTE =========== -->
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ======= END_LEVEL_1_TOPIC_COMPARE_CDD_CDART_SPARCLE ========= -->
|
|
|
|
<!-- ====== PAGE_MARGIN_TO_RIGHT_OF_BLUE_EDGE_BOX_WITH_SECTION_TEMPLATE_CONTENTS ====== -->
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############# END_BLUE_EDGE_BOX_WITH_SECTION_1_CONTENTS ############ -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell NormalText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
|
|
<!-- ########### BEGIN_BLUE_HEADER_SECTION_2 ############# -->
|
|
|
|
<A NAME="Input"></A>
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="SteelBlueCell"><SPAN class="HeaderText1">Input Options</SPAN></TD>
|
|
<TD class="SteelBlueCell" WIDTH="15" ALIGN="left" VALIGN="center"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############## END_BLUE_HEADER_SECTION_2 ############ -->
|
|
|
|
<!-- ########## BEGIN_BLUE_EDGE_BOX_WITH_SECTION_2_CONTENTS ########### -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="WhiteCellBlueEdgeAll NormalText">
|
|
|
|
<!-- ============ MINI_TOC_FOR_THIS_SECTION ============== -->
|
|
<BR><BR>
|
|
<P class="indent20">To access SPARCLE, you can either:</P>
|
|
<UL>
|
|
<LI><A HREF="#InputSequence">Enter a <B>query sequence</B> into CD-Search</A> <I>(<A HREF="#InputSequenceIllustration"><span style="color:#d70000">illustrated example</span></A> & <A HREF="#InputSequenceFootnote">note about ongoing research</A>)</I><BR> -<B>OR</B>- <BR></LI>
|
|
<LI><A HREF="#InputKeyword">Search the SPARCLE database by <B>keyword</B></A> <I>(<A HREF="#InputKeywordsIllustration"><span style="color:#d70000">illustrated example</span></A>)</I></LI>
|
|
</UL>
|
|
|
|
<P class="indent20">With either approach, the corresponding <B>SPARCLE record</B>(s) will display the <A HREF="#SearchFieldName"><B>name</B></A> and functional <A HREF="#SearchFieldLabel"><B>label</B></A> of the protein's conserved domain architecture, supporting <B>evidence</B>, and links to <B>other proteins</B> with the same architecture. Details about each approach are below.</P><BR><BR>
|
|
|
|
|
|
<!-- ========== END_MINI_TOC_FOR_THIS_SECTION ============ -->
|
|
|
|
<!-- =========== LEVEL_1_TOPIC_INPUT_SEQUENCE ============== -->
|
|
|
|
<A NAME="InputSequence"></A>
|
|
<A NAME="InputSequenceData"></A>
|
|
<A NAME="InputQuerySequence"></A>
|
|
<A NAME="InputProteinSequence"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Enter a query sequence into <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A></B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<UL>
|
|
|
|
<LI>The most common way to access SPARCLE is to enter a query sequence into <!-- A HREF="/Structure/sparcle/archview.html" --><A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</B></A>, either as <A HREF="https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=BlastDocs&DOC_TYPE=BlastHelp">FASTA</A>-formatted sequence data, or as an accession number of a sequence that is in the <A HREF="/protein">protein</A> or <A HREF="/nucleotide">nucleotide</A> databases. The search results will include a <B>"Protein Classification" section</B> if the query protein has a hit to a curated domain architecture in the <A HREF="/sparcle">SPARCLE database</A>. In the protein classification section, click on the domain architecture ID in order to open the corresponding SPARCLE record.</LI><BR>
|
|
|
|
<LI>The <A HREF="#InputSequenceIllustration">illustration below</A> provides an <SPAN style="color:#D70000">example</SPAN>, using <A HREF="/protein/NP_387887">NP_387887</A>, DNA gyrase subunit B, as the protein query sequence.</LI><BR>
|
|
|
|
<LI>You can <B>click</B> on the individual panels of the illustration to open the corresponding live web page:<BR><BR>
|
|
<UL>
|
|
<LI>the <B>1st panel</B> opens a <A HREF="/Structure/cdd/wrpsb.cgi">blank CD-Search page</A>, where you can either paste the <A HREF="/protein/NP_387887?report=fasta">FASTA-formatted sequence for NP_387887</A> or enter your own query sequence</LI>
|
|
<LI>the <B>2nd panel</B>* opens a live view of the <A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=NP_387887">search results for for NP_387887</A></LI>
|
|
<LI>the <B>3rd panel</B>* opens the SPARCLE record for the <A HREF="/Structure/sparcle/archview.html?archid=10647733">conserved domain architecture</A><BR><BR>
|
|
|
|
* <I>Please note that the <B>2nd and 3rd panels</B> of the <B>illustration</B> reflect the search results as of January 2017. The corresponding <B>live web pages</B> will show a slightly different result, because the annotation of domain architectures on proteins continues to evolve as new data and publications become available. (See the note about <A HREF="#InputSequenceFootnote">ongoing research</A> beneath the illustration.)</I>
|
|
|
|
</LI>
|
|
</UL><BR>
|
|
|
|
</LI><BR>
|
|
|
|
<!-- OL>
|
|
<LI><A HREF="/Structure/cdd/wrpsb.cgi">Open the CD-Search home page</A>. (This opens a blank search page, where you can either paste the <A HREF="/protein/NP_387887?report=fasta">FASTA-formatted sequence for NP_387887</A> or enter your own query sequence.) Alternatively, follow the link for "<I>Analyze this sequence: Identify Conserved Domains</I>" in the right margin of any protein sequence record to open pre-computed CD-Search results for that sequence.</LI><BR>
|
|
<LI><A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=NP_387887">View the CD-Search results for NP_387887</A>, to see the protein classification for that query sequence, then follow the link for the domain architecture.</LI><BR>
|
|
<LI><A HREF="/Structure/sparcle/archview.html?archid=10647733">View the corresponding SPARCLE record</A>, which lists the evidence used to curate the domain architecture that was used to classify the protein, and which provides links to other sequences with the same domain architecture.</LI><BR>
|
|
</OL -->
|
|
|
|
</UL>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- =========== INPUT_SEQUENCE_ILLUSTRATION ============== -->
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="0" class="WhiteCell">
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="20">
|
|
<A NAME="InputSequenceIllustration"></A>
|
|
<A NAME="InputSequenceDataIllustration"></A>
|
|
<A NAME="InputQuerySequenceIllustration"></A>
|
|
<A NAME="InputProteinSequenceIllustration"></A>
|
|
</TD>
|
|
|
|
<TD>
|
|
<table style="margin:0px 0px 0px 0px;" width="800" border="0" bgcolor="FFFFFF" class="NormalText" cellpadding="0" cellspacing="0">
|
|
|
|
<tr>
|
|
|
|
<td ALIGN="center" VALIGN="top" class="NormalText">
|
|
<A HREF="/Structure/cdd/wrpsb.cgi"><IMG src="images/about_sparcle_step1_input_protein_sequence_DNA_gyrase_NP_387887.png" WIDTH="211" HEIGHT="410" border="0" align="center" alt="Step 1 in using SPARCLE: Enter a query protein sequence into the CD-Search tool. Click on this graphic to open the CD-Search tool and input your own query protein sequence."></A>
|
|
</td>
|
|
|
|
<td ALIGN="center" VALIGN="center" class="NormalText">
|
|
<img SRC="images/about_sparcle_example_right_arrow_red_top.png" width="22" height="400" border="0">
|
|
<!--img SRC="../../IMG/spacer.gif" width="7" height="565" border="0" -->
|
|
</td>
|
|
|
|
<td ALIGN="center" VALIGN="top" class="NormalText">
|
|
<A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=NP_387887"><IMG src="images/about_sparcle_step2_CDSearch_results_protein_classification_DNA_gyrase_NP_387887.png" WIDTH="263" HEIGHT="410" border="0" align="center" alt="Step 2 in using SPARCLE: The CD-Search results page will display a Protein Classification section above the graphic summary of conserved domains, if a SPARCLE record exists for the domain architecture in the query protein sequence. Click on this graphic to open the CD-Search results for NP_387887, DNA gyrase subunit B from Bacillus subtilis."></A>
|
|
</td>
|
|
|
|
<td ALIGN="center" VALIGN="center" class="NormalText">
|
|
<img SRC="images/about_sparcle_example_right_arrow_red_top.png" width="22" height="400" border="0">
|
|
<!-- img SRC="../../IMG/spacer.gif" width="7" height="565" border="0" -->
|
|
</td>
|
|
|
|
<td ALIGN="center" VALIGN="top" class="NormalText">
|
|
<A HREF="/Structure/sparcle/archview.html?archid=10647733"><IMG src="images/about_sparcle_step3_view_sparcle_record_DNA_gyrase_NP_387887.png" WIDTH="281" HEIGHT="410" border="0" align="center" alt="Step 3 in using SPARCLE: The Protein Classification section of the CD-Search results links to the corresponding SPARCLE record, illustrated here. The SPARCLE record shows the name and functional label of the architecture, supporting evidence, and links to other proteins with the same architecture. Click on this graphic to open the SPARCLE record for the domain architecture (architecture ID 10647733) that was found in the protein query sequence, NP_387887, DNA gyrase subunit B from Bacillus subtilis."></A>
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
</table>
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
<BR>
|
|
|
|
<!-- =========== END_INPUT_SEQUENCE_ILLUSTRATION ============== -->
|
|
|
|
<!-- ========= CROSS_REFERENCE_TO_CD_SEARCH_HELP =========== -->
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="InputSequenceFootnote"></A>
|
|
<A NAME="InputSequenceIllustrationFootnote"></A>
|
|
<A NAME="InputSequenceOngoingResearch"></A>
|
|
<A NAME="InputQuerySequenceFootnote"></A>
|
|
<A NAME="InputProteinSequenceFootnote"></A><SPAN style="background-color: #FFFF00"><B><I>Ongoing research</I></B></SPAN>: The <A HREF="/cdd">Conserved Domain Database (CDD)</A>, as well as the conserved domain architecture annotated on proteins by <A HREF="/sparcle">SPARCLE</A>, continue to evolve as new data become available and as research progresses. Therefore, <SPAN style="background-color: #FFFF00">the live web page views might differ from the illustration above</SPAN>.<BR><BR>
|
|
|
|
For <SPAN style="background-color: #FFFF00">example</SPAN>, in January 2017, the protein sequence <A HREF="/protein/NP_387887">NP_387887</A> was <B>initially</B> annotated with <A HREF="/Structure/sparcle/archview.html?archid=10647733">architecture ID 10647733</A> (as shown in the illustration above). That architecture is named "DNA gyrase subunit B" and includes <B>four distinct conserved domains</B>.<BR><BR>
|
|
In March 2017, when a new build of CDD/SPARCLE was released, the conserved domain architecture annotation for NP_387887 was <B>revised</B> to <A HREF="/Structure/sparcle/archview.html?archid=11481348">architecture ID 11481348</A> (which is a <B>multi-domain</B> that encompasses the four original conserved domains, and which can be seen in the <A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887">current CD-Search results for NP_387887</A>). That architecture has a more specific and precise name, "type IIA DNA topoisomerase subunit B," and reflects the <B>full length protein model</B>.<BR><BR>
|
|
|
|
<B>To see the four distinct conserved domains that compose the full length protein model</B>, simply change the <A HREF="../../cdd/cdd_help.shtml#GlobalOptions">CD-Search display option</A> on the <A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074">live CD-Search results for NP_387887</A> from "<!-- A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074&mode=rep" --><A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887&mode=rep">Concise Results</A>" to "<!-- A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074&mode=full" --><A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887&mode=full">Full Results</A>" (using the <A HREF="../../cdd/cdd_help.shtml#GlobalOptions">"View" menu</A> near the upper right hand corner of the CD-Search results page). The Full Results display will show the four conserved domains that compose the full length protein model.
|
|
<!-- B>The individual conserved domains that compose the full protein model can be seen by changing the <A HREF="../../cdd/cdd_help.shtml#GlobalOptions">display option</A> on a CD-Search results page</B> from <A HREF="../../cdd/cdd_help.shtml#ConciseDisplay">concise results</A> to <A HREF="../../Structure/cdd/cdd_help.shtml#FullDisplay">full results</A>. (For example, see the <A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887&mode=full">current full results for NP_387887</A>). --><BR><BR>
|
|
|
|
As the available data and understanding of conserved domain architectures continue to evolve, the domain architectures that are annotated on proteins may evolve as well, as shown in this example. Comments about the data are welcome and can be sent to the NCBI Support Center/Help Desk, which is accessible as a link in the footer of NCBI web pages<!-- A HREF="mailto:info@ncbi.nlm.nih.gov">info@ncbi.nlm.nih.gov</A -->.</LI><BR>
|
|
|
|
<LI><A NAME="InputSequenceCDSearchHelp"></A><A NAME="CDSearchHelp"></A>Additional details about using the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search tool</A> are provided in the <A HREF="../../cdd/cdd_help.shtml#CDSearch_help_contents">CD-Search Help Document</A>.</LI>
|
|
|
|
</UL><BR>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ========= END_CROSS_REFERENCE_TO_CD_SEARCH_HELP =========== -->
|
|
|
|
<!-- =========== END_LEVEL_1_TOPIC_INPUT_SEQUENCE ============= -->
|
|
|
|
<!-- ============= LEVEL_1_TOPIC_INPUT_KEYWORDS =============== -->
|
|
|
|
<A NAME="InputKeyword"></A>
|
|
<A NAME="InputKeywords"></A>
|
|
<A NAME="InputTextTerm"></A>
|
|
<A NAME="InputTextTerms"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Search the <A HREF="/sparcle">SPARCLE database</A> by keyword</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<UL>
|
|
|
|
<LI>The <A HREF="/sparcle">SPARCLE database</A> can be searched by keyword</B></A>. That will retrieve domain architectures that contain the term(s) of interest in their descriptions.</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>The <A HREF="#InputKeywordsIllustration">illustration below</A> provides an <SPAN style="color:#D70000"><B>example</B></SPAN>. It searches the SPARCLE database for conserved domain architecture records that contain the terms <!-- A HREF="/sparcle/?term=chloride+channel+AND+curated%5BReviewLevel%5D" -->"<I>chloride</I>" and "<I>channel</I>", and limits the results to curated domain architecture records<!-- /A --> by adding <I>curated[ReviewLevel]</I> to the search.</LI><BR>
|
|
|
|
<LI><B>Click</B> on the individual panels of the illustration below to open the corresponding live web page:</LI><BR>
|
|
|
|
<UL>
|
|
<LI>the <B>1st panel</B> opens the <A HREF="/sparcle">SPARCLE database</A> home page, where you can either enter the example query:<BR>
|
|
<I>chloride channel AND curated<A HREF="#SearchFieldReviewLevel">[ReviewLevel]</A></I><BR>
|
|
or enter your own search terms.</LI>
|
|
<LI>the <B>2nd panel</B> opens a live view of the <A HREF="/sparcle/?term=chloride+channel+AND+curated%5BReviewLevel%5D">search results</A><BR>
|
|
<I>(Please note this panel shows the search results as of March 2, 2017. The corresponding live web page will retrieve a larger number of records, as the SPARCLE database continues to grow.)</I></LI>
|
|
<LI>the <B>3rd panel</B> opens a <A HREF="/Structure/sparcle/archview.html?archid=10087058">conserved domain architecture record for the chloride channel protein</A>.</LI>
|
|
</UL>
|
|
<BR>
|
|
|
|
<LI>Beneath the illustration are additional details about:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>the <A HREF="#InputKeywordsScopeOfSearch">scope of a search</A>, describing which fields of a database record are searched</LI>
|
|
|
|
<LI><A HREF="#InputKeywordsSearchTips">search tips</A> for narrowing or broadening your search:</LI>
|
|
|
|
<UL>
|
|
<!-- LI><A HREF="#SearchTipsAllFields">All Fields are searched by default</A></LI -->
|
|
<LI><A HREF="#SearchTipsSearchFields">How to limit your query to a specific search field</A></LI>
|
|
<LI><A HREF="#SearchTipsQuotes">How to use quotes to force a phrase search</A></LI>
|
|
<LI><A HREF="#SearchTipsTruncation">How to use an asterisk (*) for truncation</A></LI>
|
|
<LI><A HREF="#SearchTipsCompare">Compare some sample search strategies</A></LI>
|
|
</UL>
|
|
|
|
<LI>a tabular list of <A HREF="#SearchFields">search fields</A>, <!-- if you would like to search for your term in only a specific data field --> including a description and sample search for each field</LI>
|
|
</UL>
|
|
<BR>
|
|
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- =========== INPUT_KEYWORDS_ILLUSTRATION ============== -->
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="0" class="WhiteCell">
|
|
|
|
<TR>
|
|
<TD class="WhiteCell NormalText" width="20">
|
|
<A NAME="InputKeywordIllustration"></A>
|
|
<A NAME="InputKeywordsIllustration"></A>
|
|
<A NAME="InputTextTermIllustration"></A>
|
|
<A NAME="InputTextTermsIllustration"></A>
|
|
</TD>
|
|
|
|
<TD>
|
|
<table style="margin:0px 0px 0px 0px;" width="800" border="0" bgcolor="FFFFFF" class="NormalText" cellpadding="0" cellspacing="0">
|
|
|
|
<tr>
|
|
|
|
<td ALIGN="center" VALIGN="top" class="NormalText">
|
|
<A HREF="/sparcle"><IMG src="images/entrez_sparcle_step1_home_page_search_for_chloride_channel_curated_reviewlevel.png" WIDTH="211" HEIGHT="410" border="0" align="center" alt="Step 1 in searching the SPARCLE database by keyword: Enter the desired search terms in the query box, adding curated[ReviewLevel], if desired, to limit results to curated domain architectures. Click on this graphic to open the SPARCLE home and input your own search terms."></A>
|
|
</td>
|
|
|
|
<td ALIGN="center" VALIGN="center" class="NormalText">
|
|
<img SRC="images/about_sparcle_example_right_arrow_red_top.png" width="22" height="400" border="0">
|
|
<!--img SRC="../../IMG/spacer.gif" width="7" height="565" border="0" -->
|
|
</td>
|
|
|
|
<td ALIGN="center" VALIGN="top" class="NormalText">
|
|
<A HREF="/sparcle/?term=chloride+channel+AND+curated%5BReviewLevel%5D"><IMG src="images/entrez_sparcle_step2_search_results_chloride_channel_curated_reviewlevel.png" WIDTH="263" HEIGHT="410" border="0" align="center" alt="Step 2 in searching the SPARCLE database by keyword: View the search results and click on the architecture ID of any domain architecture of interest to open its summary page. Click on this graphic to open the results of a SPARCLE search for chloride channel AND curated[ReviewLevel]."></A>
|
|
</td>
|
|
|
|
<td ALIGN="center" VALIGN="center" class="NormalText">
|
|
<img SRC="images/about_sparcle_example_right_arrow_red_top.png" width="22" height="400" border="0">
|
|
<!-- img SRC="../../IMG/spacer.gif" width="7" height="565" border="0" -->
|
|
</td>
|
|
|
|
<td ALIGN="center" VALIGN="top" class="NormalText">
|
|
<A HREF="/Structure/sparcle/archview.html?archid=10087058"><IMG src="images/entrez_sparcle_step3_view_sparcle_record_chloride_channel_arch_id_10087058.png" WIDTH="281" HEIGHT="410" border="0" align="center" alt="Step 3 in searching the SPARCLE database by keyword: view the SPARCLE record for the domain architecture of interest. Click on this graphic to open the SPARCLE record architecture ID 10087058, chloride channel protein. From there, you can view evidence used to curate the domain architecture, retrieve all protein sequences which contain that architecture, and more."></A>
|
|
</td>
|
|
|
|
</tr>
|
|
|
|
</table>
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
<BR>
|
|
|
|
<!-- =========== END_INPUT_KEYWORDS_ILLUSTRATION ============== -->
|
|
|
|
<!-- ===== INPUT_KEYWORDS_ADDITIONAL_DETAILS_AND_SEARCH_TIPS ===== -->
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="InputKeywordsScopeOfSearch"></A><A NAME="KeywordsScopeOfSearch"></A><B>Scope</B> of a keyword search: <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>When you search the <A HREF="/sparcle">SPARCLE</A> database by keyword (e.g., <A HREF="/sparcle?term=gyrase">gyrase</A>), <A HREF="#SearchFields"><B>All Fields</B></A> are searched by <B>default</B>. This includes looking for your keyword(s) in the <B>name</B> & functional <B>label</B> (description) of the conserved domain architecture. This also includes looking for your keyword(s) in the entities that were used as evidence <!-- by curators or by automated naming algorithms, as explained in the data processing section of this document --> to give a name to the architecture<!-- (and subsequently, to the proteins that contain that architecture) -->, such as <B>gene names</B> (names of genes whose protein products have that architecture), <B>protein names</B> (definition lines of proteins used as evidence to support the domain architecture, such as SwissProt records, where protein sequences are named based on literature), <B>conserved domain names</B> (including the short and long names of conserved domains that are present in the architecture), <B>Enzyme Commission (EC) numbers</B> and corresponding EC text <B>descriptions</B>.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI><A NAME="InputKeywordsSearchTips"></A><A NAME="KeywordsSearchTips"></A><A NAME="SearchTips"></A><B>Search tips</B> for keyword searches: <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A><BR><BR>
|
|
|
|
<A HREF="#SearchTipsAllFields">all fields are searched by default</A> | <A HREF="#SearchTipsSearchFields">how to limit your query to a specific search field</A> |
|
|
<A HREF="#SearchTipsQuotes">use quotes to force a phrase search</A> |
|
|
<A HREF="#SearchTipsTruncation">use an asterisk (*) for truncation</A> |
|
|
<A HREF="#SearchTipsCompare">compare some sample search strategies</A>
|
|
|
|
</LI><BR>
|
|
|
|
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="SearchTipsAllFields"></A>By default, <A HREF="#SearchFields"><B>All Fields</B></A> are searched in the <A HREF="/sparcle">SPARCLE database</A>.</LI><BR>
|
|
|
|
<LI><A NAME="SearchTipsSearchFields"></A><B>Limit a query to a specific search field:</B><BR>
|
|
If you prefer to narrow your search to a specific field, you can:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>Use the <A HREF="/sparcle/limits">"<B>Limits</B>" page</A> or the <A HREF="/sparcle/advanced">"<B>Advanced</B>" search page</A> to view a list of available search fields, and <B>select</B> the field of interest from a <B>pull-down menu</B>.<!--BR><BR>
|
|
<B><I>-- OR --</I></B --></LI><BR>
|
|
|
|
<LI>Alternatively, you can <B>type</B> the field name, surrounced by <B>square brackets []</B>, directly after your search term, with or without a space between your term and the first bracket. For example:</LI>
|
|
|
|
<UL>
|
|
<LI>a search for: <A HREF="/sparcle?term=curated%5BReviewLevel%5D"><I>curated[ReviewLevel]</I></A> looks for the term "<I>curated</I>" in the "<A HREF="#SearchFieldReviewLevel"><I>ReviewLevel</I></A>" search field</I></LI>
|
|
<LI>a search for: <A HREF="/sparcle?term=bacteria%5BOrganism%5D"><I>bacteria[Organism]</I></A> looks for the term "<I>bacteria</I>" only in the "<A HREF="#SearchFieldOrganism"><I>Organism</I></A>" search field. This will retrieve conserved domain architectures whose names and labels are applicable within bacteria but not within other taxonomic nodes.<!-- Conserved domain architecture records that mention the term "bacteria" in other search fields or contexts will <B>not</B> be retrieved by this search. --></LI>
|
|
</UL>
|
|
<BR>
|
|
|
|
<LI>The <A HREF="#SearchFields"><B>available search fields</B></A> are listed in a table below, including a description and search example for each field<!-- that appears beneath the illustrated example of a keyword search -->.</LI>
|
|
|
|
<UL>
|
|
<LI>A footnote under the table shows how search fields can be specified using either their <A HREF="#SearchFieldAbbreviations">full spelling or an abbreviation</A>, and in upper case, lower case, or mixed case.</LI>
|
|
</UL>
|
|
<BR>
|
|
|
|
<LI>The "<A HREF="#SearchMethodShowIndex"><B>Show Index</B></A>" link on the <A HREF="/sparcle/advanced">SPARCLE Advanced Search page</A> allows you to browse the index of each search field, where you can see the available terms, the number of records containing each term or phrase, as well as the syntax for entering values in search fields such as CreateDate.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI><A NAME="SearchTipsQuotes"></A><A NAME="KeywordsSearchTipsQuotes"></A><A NAME="SearchTipsPhraseSearch"></A><B>Use quotes to search for a phrase:</B><BR>
|
|
Another way to narrow your search is to enclose multiple terms in quotes (e.g., search for <A HREF="/sparcle/?term=chloride+channel">"chloride channel"</A>).</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>Using quotes will require the system to search for the terms as a <B>phrase</B>. It will therefore only retrieve records where the two words occur together, adjacent to each other.</LI><BR>
|
|
|
|
<LI><B>If quotes are not used</B>, the <A HREF="/gquery">Entrez</A> system may still recognize and handle the terms as a phrase, if they are present in a phrase dictionary used by the search engine. If the terms are <I>not</I> present in the phrase dictionary and are <I>not</I> surrounded by quotes, Entrez will insert a <B>Boolean AND</B> between the terms; in that case, they may or may not appear adjacent to each other in the retrieved records.</LI><BR>
|
|
|
|
<LI>The "<B>Details</B>" section in the right hand margin of a search results page will show you exactly how the Entrez system parsed your query. More search tips are provided in the <a HREF="/books/NBK3827/">PubMed help document</a> and <a HREF="/books/NBK3837/">Entrez help document</a>.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI><A NAME="SearchTipsTruncation"></A><A NAME="SearchTipsAsterisk"></A><B>Use an asterisk (*) for truncation</B><BR>
|
|
To broaden a search, you can use an asterisk (*) as a wild card to search for a word stem.</LI><BR>
|
|
<UL>
|
|
<LI>For example, a search for <A HREF="/sparcle?term=chlori%2A"><I>chlori*</I></A> will retrieve records with terms such as <I>chloride, chlorin, chlorinate, chlorinated, chlorinating, chlorination, chlorine, chlorite, and chloritidismutans</I>.</LI>
|
|
<LI>As another example, a search for <A HREF="/sparcle?term=arachidon%2A"><I>arachidon*</I></A> will retrieve records with terms such as <I>arachidonate, arachidonic, arachidonoyl, and arachidonyl</I>.</LI>
|
|
<LI>The <A HREF="/books/NBK3837/">Entrez Help</A> document provides additional information about <A HREF="/books/NBK3837/#EntrezHelp.Using_Wild_Cards_or_Query_Tru">truncating</A> search terms in this way.</LI>
|
|
</UL>
|
|
<BR>
|
|
|
|
<!-- LI><A NAME="SearchTipsTruncation"></A><A NAME="SearchTipsAsterisk"></A>If you would like to <B>expand</B> a search, some techniques you can try include:</LI>
|
|
<UL>
|
|
<LI><B>use truncation</B>: search for a word stem by using an <B>asterisk (*) as a wild card</B>. For example, a serach for <A HREF="/sparcle?term=arachidon%2A">arachidon*</A> will retrieve records with terms such as arachidonate, arachidonic, arachidonoyl. The <A HREF="/books/NBK3837/">Entrez Help</A> document provides additional information about <A HREF="/books/NBK3837/#EntrezHelp.Using_Wild_Cards_or_Query_Tru">truncating</A> search terms in this way.</LI>
|
|
<LI><B>add synonyms to your query</B>: For example, compare the results of the following searches:</LI>
|
|
<UL>
|
|
<LI><A HREF="/sparcle?term=%22amyotrophic%20lateral%20sclerosis%22">"amyotrophic lateral sclerosis"</A></LI>
|
|
<LI><A HREF="/sparcle?term=%22amyotrophic%20lateral%20sclerosis%22%20OR%20ALS">"amyotrophic lateral sclerosis" OR ALS</A></LI>
|
|
</UL>
|
|
</UL -->
|
|
|
|
<LI><A NAME="SearchTipsCompare"></A><A NAME="SearchTipsCompareSearchStrategies"></A><A NAME="SearchTipsSampleSearches"></A><B>Compare some sample search strategies:</B><BR>
|
|
As examples of various search strategies, <B>compare the results</B> of the following searches:</LI><BR>
|
|
<UL>
|
|
<LI><A HREF="/sparcle?term=chlori%2A"><I>chlori*</I></A><BR>
|
|
<I>If an asterisk is used to truncate a search term, the system will retrieve all records that contain the specified word stem. The word stem can appear in any field of the record, unless you specify a desired <A HREF="#SearchFields">search field</A>.</I></LI><BR>
|
|
|
|
<LI><A HREF="/sparcle/?term=chloride+channel">chloride channel</A><BR>
|
|
<I>If no <A HREF="#SearchFields">search field</A> is specified, [All Fields] are searched by default, Also, the keywords are not necessarily searched as a phrase, but can occur separately in different parts of the record.</I></LI><BR>
|
|
<LI><A HREF="/sparcle?term=%22chloride%20channel%22%5BAll%20Fields%5D">"chloride channel"</A><BR>
|
|
<I>Use quotes to search for the terms as a phrase.</I></LI><BR>
|
|
|
|
<LI><A HREF="/sparcle?term=%22chloride%20channel%22%5BName%5D">"chloride channel"[Name]</A><BR>
|
|
<I>Limit the query to a specific <A HREF="#SearchFields">search field</A>, such as the <A HREF="#SearchFieldName">[Name]</A> field shown here, to narrow the search results.</I></LI><BR>
|
|
|
|
<LI><A HREF="/sparcle?term=%22chloride%20channel%22%5BAll%20Fields%5D%20AND%20curated%5BReviewLevel%5D">"chloride channel" AND curated[ReviewLevel]</A><BR>
|
|
<I>Add a <A HREF="#SearchFieldReviewLevel">[ReviewLevel]</A> criterion to the query, as shown above, to limit retrieval to a specified subset of architectures (e.g., architectures that have been <A HREF="#DataProcessingCurated">curated</A>, <A HREF="#DataProcessingAutonamed">autonamed</A>, or <A HREF="#DataProcessingNamedByDomain">namedByDomain</A>).</I></LI><BR>
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- === END_INPUT_KEYWORDS_ADDITIONAL_DETAILS_AND_SEARCH_TIPS === -->
|
|
|
|
<!-- =========== TABLE_OF_SEARCH_FIELDS ============== -->
|
|
|
|
<A NAME="SearchFields"></A>
|
|
<A NAME="InputSearchFields"></A>
|
|
<A NAME="InputKeywordsSearchFields"></A>
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<UL>
|
|
<LI><B>Search Fields:</B> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A><BR><BR>
|
|
|
|
As noted in the <A HREF="#SearchTips"><B>Search Tips</B></A> above, when you search the <A HREF="/sparcle"><B>SPARCLE</B></A> database by <A HREF="#OutputKeywords"><B>keyword</B></A>, <B>All Fields</B> are searched by default. If you prefer to restrict your search to a specific data field, you can use the pull-down menus on either the <A HREF="/sparcle/limits">"<B>Limits</B></A> or the <A HREF="/sparcle/advanced">"<B>Advanced</B>"</A> search page to select the desired field. Alternatively, you can type the desired field directly in your query, surrounding field name with <A HREF="#SearchFieldAbbreviations"><b>square brackets [].*</b></A><BR>
|
|
|
|
</LI>
|
|
</UL>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<TABLE width="75%" style="margin:0px 0px 0px 0px;" border="0" class="WhiteCell">
|
|
|
|
<TR>
|
|
<TD width="30" class="NormalText"> </TD>
|
|
<TD colspan="3" class="NormalText">The available search fields include:<BR><BR></TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD width="30" class="NormalText" align="left" valign="top"> </TD>
|
|
|
|
<TD class="NormalText" align="left" valign="top">
|
|
<A HREF="#SearchFieldAll">All Fields</A><BR>
|
|
<A HREF="#SearchFieldBiosystemsDescription">BiosystemsDescription</A><BR>
|
|
<A HREF="#SearchFieldCDDDescription">CDDDescription</A><BR>
|
|
<A HREF="#SearchFieldCDDShortname">CDDShortname</A><BR>
|
|
<A HREF="#SearchFieldCDDTitle">CDDTitle</A><BR>
|
|
<A HREF="#SearchFieldComment">Comment</A><BR>
|
|
</TD>
|
|
|
|
<TD class="NormalText" align="left" valign="top">
|
|
<A HREF="#SearchFieldCreateDate">CreateDate</A><BR>
|
|
<A HREF="#SearchFieldDefline">Defline</A><BR>
|
|
<A HREF="#SearchFieldECNumber">ECNumber</A><BR>
|
|
<!-- A HREF="#SearchFieldECTitle">ECTitle</A><BR -->
|
|
<A HREF="#SearchFieldFilter">Filter</A><BR>
|
|
<A HREF="#SearchFieldGeneDescription">GeneDescription</A><BR>
|
|
<A HREF="#SearchFieldGeneSymbol">GeneSymbol</A><BR>
|
|
</TD>
|
|
|
|
<TD class="NormalText" align="left" valign="top">
|
|
<A HREF="#SearchFieldLabel">Label</A><BR>
|
|
<A HREF="#SearchFieldName">Name</A><BR>
|
|
<A HREF="#SearchFieldOrganism">Organism</A><BR>
|
|
<A HREF="#SearchFieldPDBTitle">PDBTitle</A><BR>
|
|
<A HREF="#SearchFieldReviewLevel">ReviewLevel</A><BR>
|
|
<A HREF="#SearchFieldStatus">Status</A><BR>
|
|
<A HREF="#SearchFieldUID">UID</A><BR>
|
|
</TD>
|
|
|
|
</TR>
|
|
</TABLE>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" border="0" class="WhiteCell">
|
|
<TR>
|
|
<TD width="20" class="NormalText"> </TD>
|
|
|
|
<TD>
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" border="black 1px" cellpadding="3" class="format1 TableText1">
|
|
|
|
<tr>
|
|
<td class="format1H"><b>Field name</b></td>
|
|
<td class="format1H" style="white-space: nowrap;"><A HREF="#SearchFieldAbbreviations"><b>Abbreviation*</b></A></td>
|
|
<td class="format1H"><b>Description</b></td>
|
|
<td class="format1H"><b>Sample Search</b></td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_All ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldAll"></A>All Fields</td>
|
|
<td class="format1B" valign="Top">[All]<BR>
|
|
[All Fields]</td>
|
|
<td class="format1B" valign="Top">Searches all of the indexed fields in the SPARCLE database.<BR><BR>
|
|
If no field specifier is included in a query, the system searches [All] fields by <B>default</B>, as happens with the <B>first sample search shown at the right</B>. Click on that search to open the corresponding results page. The "<B>Search Details</B>" box that appears in the right hand margin of the search results page shows that the query was translated by the system to:<BR>
|
|
<I>chloride[All Fields] AND channel[All Fields]</I>
|
|
</td>
|
|
|
|
<td class="format1B" valign="Top">
|
|
<A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
|
|
<a HREF="/sparcle?cmd=search&Term=chloride+channel"><b>chloride channel</b></a> <BR><BR> <!-- Use the escape character %22 in the URL to represent quotes around the phrase. -->
|
|
The basic search above, in which the query terms are entered <B>without quotes</B>, will retrieve the architecture(s) that contain the <B>word</B> "chloride" and the <B>word</B> "channel" in any field of the record. The words do not have to be adjacent to each other in the record (i.e., they do not have to appear as a phrase), and they do not have to appear in the same field.
|
|
<br><br><br>
|
|
|
|
<a HREF="/sparcle?cmd=search&Term=%22chloride+channel%22%5BAll%5D"><b>"chloride channel"[all]</b></a> <BR><BR> <!-- Use the escape character %22 in the URL to represent quotes around the phrase. -->
|
|
The search above, which surrounds the search terms <B>with quotes</B>, will retrieve the architecture(s) that contain the <B>phrase</B> "chloride channel" in any field of the record. <I>(The <A HREF="#SearchFieldsQuotes">quotes</A> surrounding the search terms ensure they are searched as a phrase.)</I><BR><BR><BR>
|
|
|
|
<!-- The <A HREF="#SearchTipsQuotes"><B>quotes</B></A> surrounding the search terms ensure they are searched as a phrase.<A HREF="#SearchTipsQuotes">**</A> If quotes are not used and the terms are not automatically recognized as a phrase by the Entrez system, Entrez will insert a Boolean AND between the terms and they may or may not appear adjacent to each other in the retrieved records.><br><br -->
|
|
|
|
<I>Note: <B>Compare</B> the results of the above search, which looks for the <A HREF="#SearchFieldsQuotes">phrase</A> "chloride channel" in <B>any field</B> of the record, with the more specific results obtained by the sample <A HREF="#SearchFieldName">[Name] field</A> search:<BR>
|
|
<a HREF="/sparcle?cmd=search&Term=%22chloride+channel%22%5BName%5D"><b>"chloride channel"[Name]</b></a>
|
|
<BR>
|
|
which retrieves records containing the phrase "chloride channel" only in the name of the conserved domain architecture.<BR>
|
|
(The <A HREF="#DataProcessing">data processing</A> section of this document describes how architectures are named.)</I><BR><BR>
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- ====== SEARCH_FIELD_BiosystemsDescription ========= -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldBiosystem"></A><A NAME="SearchFieldBiosystems"></A><A NAME="SearchFieldBiosystemDescription"></A><A NAME="SearchFieldBiosystemsDescription"></A>BiosystemsDescription</td>
|
|
<td class="format1B" valign="Top">[BiosystemsDescription]</td>
|
|
<td class="format1B" valign="Top">Descriptions of <A HREF="/biosystems/">BioSystems</A> that are listed as supporting evidence for conserved domain architectures in the SPARCLE database.<BR><BR>
|
|
As noted on the <A HREF="/Structure/biosystems/docs/biosystems_about.html">About BioSystems</A> page, a biosystem is a group of molecules that interact in a biological system. One type of biosystem is a biological pathway, which can consist of interacting genes, proteins, and small molecules. Another type of biosystem is a disease, which can involve components such as genes, biomarkers, and drugs.<BR><BR></td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22folate+biosynthesis%22%5BBiosystemsDescription%5D"><b>"folate biosynthesis"[BiosystemsDescription]</b></a> <BR><BR>
|
|
will retrieve architecture(s) that list, as supporting evidence, biosystems whose descriptions contain the <A HREF="#SearchFieldsQuotes">phrase</A> "folate biosynthesis."<BR><BR>
|
|
<!-- I>(The <A HREF="#SearchFieldsQuotes">quotes</A> surrounding the search terms ensure they are searched as a phrase.)</I --><BR><BR>
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- ======== SEARCH_FIELD_CDDDescription =========== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldCDDDescription"></A>CDDDescription</td>
|
|
<td class="format1B" valign="Top">[CDDDescription]</td>
|
|
<td class="format1B" valign="Top">Description of <A HREF="/cdd/">conserved domain models</A> that are components of, or that are listed as supporting evidence for, conserved domain architectures in the SPARCLE database.</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22transport+proteins%22%5BCDDDescription%5D"><b>"transport proteins"[CDDDescription]</b></a> <BR><BR>
|
|
will retrieve architecture(s) that contain conserved domain models whose description includes the <A HREF="#SearchFieldsQuotes">phrase</A> "transport proteins."<BR><BR>
|
|
<!-- I>(The <A HREF="#SearchFieldsQuotes">quotes</A> surrounding the search terms ensure they are searched as a phrase.)</I><BR><BR -->
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_CDDShortname ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldCDDShortname"></A>CDDShortname</td>
|
|
<td class="format1B" valign="Top">[CDDShortname]</td>
|
|
<td class="format1B" valign="Top">Short names of conserved domain models that are components of, or that are listed as supporting evidence for, conserved domain architectures in the SPARCLE database.<BR><BR>
|
|
|
|
The short name is the label that appears on the conserved domain's <A HREF=".//./cdd/cdd_help.shtml#RPSB_HitColors">cartoon</A> in a <A HREF="../../cdd/cdd_help.shtml#RPSBResults">CD-Search results display</A><!-- Aron, 5/24/17, explained that the shortname is the label that appears on a conserved domain cartoon in the CD-Search results -->.<BR><BR>
|
|
|
|
<I>Note: This field can only be searched by entering the complete short name, surrounded by quotes. Entering a single term or other fragment from the short name will not retrieve results. (See examples below.)</I><BR><BR>
|
|
|
|
<I>Because of this, it is better to search the <A HREF="#SearchFieldCDDDescription">[CDDDescription]</A> field because it offers more comprehensive searches.</I><BR><BR>
|
|
|
|
--------------------<BR>
|
|
Examples: To illustrate the use of the [CDDShortname] field:<BR><BR>
|
|
|
|
A search for the following complete string: <a HREF="/sparcle?cmd=search&term=%22voltage+gated+clc%22%5BCDDShortname%5D"><I>"voltage gated clc"[CDDShortname]</I></a> will retrieve architectures that contain a conserved domain model with that short name.<BR><BR>
|
|
|
|
However, a search for the single word: <a HREF="/sparcle?cmd=search&term=voltage%5BCDDShortname%5D"><I>voltage[CDDShortname]</I></a> will not retrieve any records, because there are no conserved domains that have a short title of the single word "voltage."<BR>
|
|
|
|
--------------------<BR><BR>
|
|
|
|
Tip: The <A HREF="/sparcle/advanced">Advanced search page</A> can be used to browse the available terms in any index.<BR>
|
|
|
|
For example, to see a list of short names, use the "<I>Builder</I>" section of the advanced search page, select the <I>CDDShortname</I> search field from the pull-down menu, then click on "<I>Show index list</I>."<BR><BR>
|
|
|
|
<I>Note:</I> If you <I>do not enter any term</I> in the text box beside the selected search field, the system will automatically take you to the top of the index for the selected search field, and you can then scroll through the terms.<BR><BR>
|
|
|
|
If you <I>enter a term</I> in the text box before clicking on "Show index list," the search system will jump to the part of the index that contains your term, then you can scroll up or down.
|
|
<BR><BR>
|
|
|
|
<!-- However, any one of the following searches will retrieve architectures that contain conserved domain models with the specified short name:<BR><BR>
|
|
<I>"voltage gated clc"[CDDShortname]</I><BR>
|
|
<I>"voltage clc voltage gated chloride channel"[CDDShortname]</I><BR>
|
|
<I>"voltage gated clc clc voltage gated chloride channel the clc chloride channels catalyse the selective"[CDDShortname]</I><BR -->
|
|
</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22voltage+gated+clc%22%5BCDDShortname%5D"><b>"voltage gated clc"[CDDShortname]</b></a> <BR><BR>
|
|
will retrieve architecture(s) that contain a conserved domain model whose short name is "voltage gated clc".<BR><BR>
|
|
<I>(The quotes surrounding the search terms ensure they are searched as a <A HREF="#SearchFieldsQuotes">phrase</A>.)</I><BR><BR>
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_CDDTitle ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldCDDTitle"></A>CDDTitle</td>
|
|
<td class="format1B" valign="Top">[CDDTitle]</td>
|
|
<td class="format1B" valign="Top">Title of conserved domain models that are components of, or that are listed as supporting evidence for, conserved domain architectures in the SPARCLE database.<BR><BR>
|
|
|
|
<I>Note: Some older conserved domain models do not have a title. For example, the conserved domain model with accession <A HREF="/cdd/cddsrv.cgi?uid=cd00400">cd00400</A> has a short name of "Voltage_gated_ClC" and an extensive description, <!-- which includes the phrase "CLC voltage-gated chloride channel," -->but it doesn't have a separate title. <!-- Another model, <A HREF="/Structure/cdd/cddsrv.cgi?uid=pfam00654">pfam00654</A>, has a short name, title, and description. --> As a result, those records will not be retrieved by a search of the [CDDTitle] field.<BR><BR>
|
|
|
|
Therefore, is generally better to search for the <A HREF="#SearchFieldCDDDescription">[CDDDescription]</A> field, rather than the [CDDTitle] field, because the [CDDDescription] field provides a more comprehensive search.<BR><BR>
|
|
|
|
For example, compare the results of the [CDDTitle] and [CDDDescription] searches:<BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=%22voltage+gated+chloride+channel%22%5BCDDTitle%5D">voltage[CDDTitle]</a><BR>
|
|
vs.<BR>
|
|
<a HREF="/sparcle?cmd=search&term=%22voltage+gated+chloride+channel%22%5BCDDDescription%5D">voltage[CDDDescription]</a><BR><BR>
|
|
|
|
<!-- a HREF="/sparcle?cmd=search&term=%22voltage+gated+chloride+channel%22%5BCDDDescription%5D">"voltage gated chloride channel"[CDDDescription]</a><BR>
|
|
vs.<BR>
|
|
<a HREF="/sparcle?cmd=search&term=%22voltage+gated+chloride+channel%22%5BCDDDescription%5D">"voltage gated chloride channel"[CDDDescription]</a>
|
|
</I><BR><BR -->
|
|
|
|
</td>
|
|
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=%22voltage+gated+chloride+channel%22%5BCDDTitle%5D"><b>voltage[CDDTitle]</b></a> <BR><BR>
|
|
will retrieve architecture(s) that contain a conserved domain model whose title includes the <B>word</B> "voltage".<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=%22voltage+gated+chloride+channel%22%5BCDDTitle%5D"><b>"voltage gated chloride channel"[CDDTitle]</b></a> <BR><BR>
|
|
will retrieve architecture(s) that contain a conserved domain model whose title includes the <A HREF="#SearchFieldsQuotes">phrase</A> "voltage gated chloride channel".<BR><BR>
|
|
<!-- I>(The <A HREF="#SearchFieldsQuotes">quotes</A> surrounding the search terms ensure they are searched as a phrase.)</I><BR><BR -->
|
|
|
|
<I>Note: <!-- Some older CDD records do not have a title, so it -->It is generally better to search for the <A HREF="#SearchFieldCDDDescription">[CDDDescription]</A> field, rather than the [CDDTitle] field, because the [CDDDescription] field provides a more comprehensive search. See the note and examples in the preceding column.</I><BR><BR>
|
|
|
|
<!-- I>For example, compare the searches above with those below:<BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=%22voltage+gated+chloride+channel%22%5BCDDDescription%5D">voltage[CDDDescription]</a> <BR><BR>
|
|
will retrieve architecture(s) that contain a conserved domain model whose description includes the <B>word</B> "voltage".<BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=%22voltage+gated+chloride+channel%22%5BCDDDescription%5D">"voltage gated chloride channel"[CDDDescription]</a> <BR><BR>
|
|
will retrieve architecture(s) that contain a conserved domain model whose description includes the <B>phrase</B> "voltage gated chloride channel".</I><BR><BR -->
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_Comment ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldComment"></A>Comment</td>
|
|
<td class="format1B" valign="Top">[Comment]</td>
|
|
<td class="format1B" valign="Top">The [Comment] field contains free text that was written by curators in the supporting evidence fields of SPARCLE records. It represents something the curators wanted to note about the conserved domain architecture, based on the research they did in curating and naming the architecture.<BR><BR></td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=chloride%5BComment%5D"><b>chloride[Comment]</b></a> <BR><BR>
|
|
will retrieve the architectures that contain the word "<I>chloride</I>" in the comments section of a conserved domain architecture's supporting evidence.<BR><BR></td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_CreateDate ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldCreateDate"></A>CreateDate</td>
|
|
<td class="format1B" valign="Top">[CreateDate]<BR>[CDAT]<BR>[PDAT]<BR>[DP]</td>
|
|
<td class="format1B" valign="Top">The date on which the current version of a conserved domain architecture record was published in the SPARCLE curation system.<BR><BR>
|
|
This is referred to as the Create Date [CDAT]. Alternatively, it is sometimes referred to as the Publication Date, or Date of Publication, hence the alternative abbreviations of [PDAT] or [DP].<BR><BR>
|
|
The architecture subsequently becomes available in the public SPARCLE database, although that might happen a bit later.
|
|
<!-- The date on which a given conserved domain architecture ID first appeared in the public SPARCLE database.<BR><BR>
|
|
This is referred to as the Create Date [CDAT]. Alternatively, it is sometimes referred to as the Publication Date, or Date of Publication, hence the alternative abbreviations of [PDAT] or [DP]. -->
|
|
<BR><BR>
|
|
|
|
Examples:<BR>
|
|
--------------------<BR>
|
|
To search for a <B>specific day, month, or year</B>, enter it in any one of the following formats:<BR><BR>
|
|
|
|
<B>YYYY/MM/DD</B><BR>
|
|
will retrieve all architectures that were published in the SPARCLE curation system on the specified <B>day</B><BR><BR>
|
|
|
|
or<BR><BR>
|
|
<B>YYYY/MM</B><BR>
|
|
will retrieve all architectures that were published in the SPARCLE curation system in the specified <B>month</B><BR><BR>
|
|
|
|
or<BR><BR>
|
|
<B>YYYY</B><BR>
|
|
will retrieve all architectures that were published in the SPARCLE curation system in the specified <B>year</B><BR><BR>
|
|
|
|
--------------------<BR>
|
|
|
|
To search for a <B>range of dates</B>, enter your in any one of the following formats, using the <B>colon (:)</B> as the range operator:<BR><BR>
|
|
|
|
<B>YYYY/MM/DD[CDAT]:YYYY/MM/DD[CDAT]</B><BR>will retrieve all architectures that were published in the SPARCLE curation system between the two dates you specified<BR><BR>
|
|
|
|
</td>
|
|
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
|
|
<B>Single date:</B><BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=2017/04/20%5BCDAT%5D"><b>2017/04/20[CDAT]</b></a> <BR><BR>
|
|
will retrieve all architectures that were published in the SPARCLE curation system on 20 April 2017.<BR><BR><BR>
|
|
|
|
<B>Date range:</B><BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=2017/04/20%5BCDAT%5D%20%3A%202017/05/18%5BCDAT%5D"><b>2017/04/20[CreateDate] : 2017/05/18[CreateDate]</b></a> <BR><BR>
|
|
will retrieve all architectures that were published in the SPARCLE curation system between 20 April 2017 and 18 May 2017.<BR><BR>
|
|
|
|
In the query above, the <B>colon (:)</B> serves as the range operator.
|
|
<BR><BR>
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_Defline ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldDefline"></A>Defline</td>
|
|
<td class="format1B" valign="Top">[Defline]</td>
|
|
<td class="format1B" valign="Top">The definition line (description) of any protein sequence that was used as supporting evidence for a conserved domain architecture.</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=chloride%5Bdefline%5D"><b>chloride[defline]</b></a> <BR><BR>
|
|
will retrieve the architectures that list, as supporting evidence, any proteins that have the term "chloride" in their definition line.<BR><BR></td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_ECNumber ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldECNumber"></A>ECNumber</td>
|
|
<td class="format1B" valign="Top">[ECNumber]</td>
|
|
<td class="format1B" valign="Top">The Enzyme Commission (EC) number that is found in the sequence record of any protein that was used as evidence for a conserved domain architecture, or the EC number that is found in a high quality (e.g., curated) sequence record that belongs to the group of proteins annotated with the architecture.<BR><BR>
|
|
|
|
The <A HREF="http://www.chem.qmul.ac.uk/iubmb/enzyme/">Enzyme Nomenclature and Classification</A> system is based on the reactions catalyzed by the enzymes. The system is developed by one of the <A HREF="http://www.chem.qmul.ac.uk/iubmb/nomenclature/">Nomenclature Committees</A> of the <A HREF="http://iubmb.org/biochemical-nomenclature/">International Union of Biochemistry and Molecular Biology (IUBMB)</A>. Separate websites enable you to <A HREF="http://enzyme.expasy.org/enzyme-byclass.html">browse enzymes by class</A>, or to <A HREF="http://enzyme.expasy.org/">search the enzyme nomenclature database</A> by text word or number.<BR><BR>
|
|
|
|
--------------------<BR>
|
|
<I>Method for assigning EC numbers to conserved domain architecture records in SPARCLE:</I><BR><BR>
|
|
|
|
Typically, the EC numbers are taken from Swiss-Prot records that belong to the cluster of proteins that have a given architecture.<BR><BR>
|
|
|
|
In addition, the EC number from a Swiss-Prot record might also be applied to other, similar protein clusters that essentially represent the same architecture. Those architectures might have been split into separate SPARCLE records only because they contain slightly different domain models. For example, two or more protein clusters might have top-scoring hits to overalapping/redundant conserved domain models from different source databases, but their architectures are essentially similar, as in the hypothetical example below.<BR><BR>
|
|
|
|
--------------------<BR>
|
|
<I>As a hypothetical example of how an EC Number from one architecture might be annotated on other architectures:</I><BR><BR>
|
|
|
|
a) Let's say you have three architectures that are similar to each other:
|
|
|
|
<UL>
|
|
<LI>They each have their own SPARCLE record because their top scoring domain models are slightly different from each other:</LI>
|
|
</UL>
|
|
|
|
<OL>
|
|
<LI>------[pfam01]------[pfam05]------</LI>
|
|
<LI>------[pfam01]------[COG12]------</LI>
|
|
<LI>------[pfam01]------[cd0008]------</LI>
|
|
</OL>
|
|
|
|
b) Let's also say that:
|
|
<UL>
|
|
<LI>domain models pfam05, COG12, and cd0008 are redundant (i.e., they come from different source databases, but they overlap with each other on protein sequences and are therefore redundant)</LI>
|
|
<LI>architecture #2 maps to protein sequence SwissProt P0321</LI>
|
|
<LI>SwissProt P0321 has been annotated with an EC number.</LI>
|
|
</UL>
|
|
|
|
c) As a result:<BR>
|
|
<UL>
|
|
<LI>architectures #1, 2, and 3 above are essentially the same (due to the redundant nature of pfam05, COG12, and cd0008)</LI>
|
|
<LI>all three architectures (all three SPARCLE records) will be indexed with the same EC number that was annotated on SwissProt P0321</LI>
|
|
</UL>
|
|
|
|
</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%223.6.4.13%22%5BECNumber%5D"><b>3.6.4.13[ECNumber]</b></a> <BR><BR>
|
|
will retrieve architectures that have the Enzyme Commission number of 3.6.4.13, RNA helicase.<BR><BR>
|
|
|
|
<!-- I>(<A HREF="http://enzyme.expasy.org/enzyme-byclass.html">browse enzymes by class</A>,<BR>or<BR><A HREF="http://enzyme.expasy.org/">search the enzyme nomenclature database</A>)</I --></td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_ECTitle ============== -->
|
|
<!-- tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldECTitle"></A>ECTitle</td>
|
|
<td class="format1B" valign="Top">[ECTitle]<BR>[ECTL]</td>
|
|
<td class="format1B" valign="Top">The title of the Enzyme Commission record that is associated with a conserved domain architecture.<BR><BR>
|
|
See the notes under <A HREF="#SearchFieldECNumber">[ECNumber]</A> for information on how Enzyme Commission records are associated with conserved domain architectures.<BR><BR></td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22_____+_____%22%5B____%5D"><b>bsid82991[accn]</b></a> <BR><BR>
|
|
will retrieve the SPARCLE record that contains the specified ____ in the _____ field.</td>
|
|
</tr -->
|
|
|
|
<!-- =========== SEARCH_FIELD_Filter ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldFilter"></A>Filter</td>
|
|
<td class="format1B" valign="Top">[Filter]</td>
|
|
<td class="format1B" valign="Top">The [Filter] field can be used to limit your search to conserved domain architectures that have links to another Entrez database of interest, as shown in the search examples to the right.<BR><BR>
|
|
|
|
NCBI uses the following methods to create links between conserved domain architectures and records in other databases:<BR><BR>
|
|
|
|
The SPARCLE data processing pipeline calculates two types of <B>direct links</B>:<BR>
|
|
|
|
<OL>
|
|
<LI><B>sparcle_protein</B>: each conserved domain architecture in the SPARCLE database links to all protein sequences that have the architecture.
|
|
<!-- The <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> and <A HREF="/Structure/lexington/lexington.cgi">CDART</A> tools are used to identify all protein sequences in the non-redundant (nr) database that contain a given conserved domain architecture. Links are then made from the architecture's record in the SPARCLE database to all corresponding sequences in the protein database.<BR>
|
|
Every conserved domain architecture in SPARCLE database links to all protein sequences in the non-redundant (nr) database that contain the architecture.<BR>
|
|
All proteins in nr that have the sparcle_architecture of interest --></LI><BR>
|
|
<LI><B>sparcle_cdd</B>: each conserved domain architecture in the SPARCLE database links to all of the <A HREF="../../cdd/cdd_help.shtml#CDWhat">conserved domain</A> models (<A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</A> and <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily">superfamilies</A>) that compose the architecture. For example, if an architecture contains one specific hit and one superfamily, that SPARCLE record will link to two <A HREF="/cdd/">Conserved Domain Database (CDD)</A> records -- one for the specific hit and one for the superfamily.
|
|
</LI>
|
|
</OL>
|
|
|
|
All other links between SPARCLE and other Entrez databases are <B>indirect</B>, created by a <B>join</B> between the proteins that contain the architecture and the other data types.<!-- For <B>example</B>: -->
|
|
|
|
<UL>
|
|
|
|
<LI>For example, links from <A HREF="/sparcle">SPARCLE</A> architectures to <A HREF="/gene">Gene</A> records are created by a join between the following:<BR><BR>
|
|
<B>sparcle_protein</B>  AND  <B>protein_gene</B>  →  <B>sparcle_gene</B></LI><BR>
|
|
|
|
<!-- LI>links from <A HREF="/sparcle">SPARCLE</A> architectures to <A HREF="/pcassay">BioAssay</A> records are created by a join between the following:<BR><BR>
|
|
<B>sparcle_protein</B>  AND  <B>protein_pcassay_target</B>  →  <B>sparcle_pcassay_target</B></LI><BR -->
|
|
|
|
</UL>
|
|
<BR>
|
|
|
|
</td>
|
|
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle/?term=%22chloride+channel%22+AND+%22sparcle_gene%22%5BFilter%5D"><b>"chloride channel"[All] AND "sparcle_gene"[Filter]</b></a><BR><BR>
|
|
will retrieve conserved domain architectures that have the <A HREF="#SearchFieldsQuotes">phrase</A> "chloride channel" in any field of the record, and have links to records in the <A HREF="/gene">Gene database</A>.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle/?term=%22chloride+channel%22+AND+%22sparcle_biosystems%22%5BFilter%5D"><b>"chloride channel"[All] AND "sparcle_biosystems"[Filter]</b></a><BR><BR>
|
|
will retrieve conserved domain architectures that have the <A HREF="#SearchFieldsQuotes">phrase</A> "chloride channel" in any field of the record, and have links to records in the <A HREF="/biosystems">Biosystems database</A>.<BR><BR>
|
|
<I>(Note: To view the <B>biosystems</B> that are linked to an architecture, click on an architecture of interest in the SPARCLE search results, then click on the "<B>pathways</B>" link in the right hand margin of the architecture's summary page to open the corresponding Biosystems records.)</I><BR><BR>
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
|
|
<!-- =========== SEARCH_FIELD_GeneDescription ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldGeneDescription"></A>GeneDescription</td>
|
|
<td class="format1B" valign="Top">[GeneDescription]</td>
|
|
<td class="format1B" valign="Top">The description of <A HREF="/gene">Gene</A> records that were used as supporting evidence for conserved domain architectures.<BR><BR>
|
|
The [GeneDescription] index includes text terms from the gene's official full name, official symbol, alternative symbols, and gene summary.<BR><BR></td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22chloride+channel%22%5BGeneDescription%5D"><b>"chloride channel"[GeneDescription]</b></a> <BR><BR>
|
|
will retrieve the architecture that lists, as supporting evidence, genes that include the <A HREF="#SearchFieldsQuotes">phrase</A> "chloride channel" in their description.<BR><BR></td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_GeneSymbol ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldGeneSymbol"></A>GeneSymbol</td>
|
|
<td class="format1B" valign="Top">[GeneSymbol]</td>
|
|
<td class="format1B" valign="Top">The gene symbol of <A HREF="/gene">Gene</A> records that were used as supporting evidence for conserved domain architectures.</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<!-- a HREF="/sparcle?cmd=search&term=eriC%5BGeneSymbol%5D"><b>eriC[GeneSymbol]</b></a> <BR><BR>
|
|
will retrieve the architecture that lists, as supporting evidence, genes whose symbol is "eriC."<BR><BR -->
|
|
<!-- a HREF="/sparcle?cmd=search&term=AANAT%5BGeneSymbol%5D"><b>AANAT[GeneSymbol]</b></a> <BR><BR>
|
|
will retrieve the architecture that lists, as supporting evidence, genes whose symbol is "AANAT."<BR><BR -->
|
|
<a HREF="/sparcle?cmd=search&term=nat16%5BGeneSymbol%5D"><b>nat16[GeneSymbol]</b></a> <BR><BR>
|
|
will retrieve the architecture that lists, as supporting evidence, genes whose symbol is "nat16."<BR><BR>
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_Label ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldLabel"></A>Label</td>
|
|
<td class="format1B" valign="Top">[Label]</td>
|
|
<td class="format1B" valign="Top">The functional label (description) of a conserved domain architecture.</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22chloride+channel%22%5BLabel%5D"><b>"chloride channel"[Label]</b></a> <BR><BR>
|
|
will retrieve the architecture(s) that contain the <A HREF="#SearchFieldsQuotes">phrase</A> "chloride channel" in the functional Label (description) of the architecture.<BR><BR>
|
|
<!-- <I>The <A HREF="#SearchFieldsQuotes">quotes</A> surrounding the search terms ensure they are searched as a phrase.</I><BR><BR -->
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_Name ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldName"></A>Name</td>
|
|
<td class="format1B" valign="Top">[Name]<BR>[NM]</td>
|
|
<td class="format1B" valign="Top">The name of a conserved domain architecture.<BR><BR>
|
|
|
|
The <A HREF="#DataProcessing">data processing</A> section of this document describes the three different methods by which conserved domain architectures are named:
|
|
|
|
<OL>
|
|
<LI><A HREF="#DataProcessingCurated">Curated architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingAutonamed">Autonamed architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingNamedByDomain">NamedByDomain architectures</A></LI>
|
|
<!-- LI><A HREF="#DataProcessingComputed">Computed name architectures</A></LI -->
|
|
</OL>
|
|
|
|
These represent three tiers of SPARCLE records, which can be retrieved, if desired, using the <A HREF="#SearchFieldReviewLevel">[ReviewLevel]</A> search field.<BR><BR>
|
|
|
|
</td>
|
|
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22chloride+channel%22%5BName%5D"><b>"chloride channel"[Name]</b></a> <BR><BR>
|
|
will retrieve the architecture(s) that contain the <A HREF="#SearchFieldsQuotes">phrase</A> "chloride channel" in the name of the architecture.
|
|
<!-- The <A HREF="#SearchFieldsQuotes"><B>quotes</B></A> surrounding the search terms ensure they are searched as a phrase.<BR><BR -->
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_Organism ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldOrganism"></A>Organism</td>
|
|
<td class="format1B" valign="Top">[Organism]<BR>[Orgn]</td>
|
|
<td class="format1B" valign="Top">The taxonomic node to which the <A HREF="#SearchFieldName">name</A> and <A HREF="#SearchFieldLabel">label</A> of the conserved domain architecture apply.<BR><BR>
|
|
|
|
By <B>default</B>, conserved domain architectures are associated with the <B>root of the taxonomic tree</B> (i.e., all organisms). When an architecture is associated with the root, it means the name/label of the architecture is not specific to any node of the full taxonomic tree. This is true of most architectures in the SPARCLE database.<BR><BR>
|
|
|
|
If the [Organism] classification of an architecture is not root, but is instead a <B>more specific taxonomic node</B>, that means the curator is asserting that the name/label chosen for the architecture is applicable within the specified node, but not necessarily within other taxonomic branches.<BR><BR>
|
|
|
|
--------------------<BR><BR>
|
|
|
|
For <B>example</B>, the total number of architectures in the SPARCLE database was 129405 as of July 13, 2017. <I>(Note: the <A HREF="/sparcle?cmd=search&term=all%5BFilter%5D">current total number of architectures</A> might be larger or smaller, if more architectures have been added or removed since that date as a result of <A HREF="#OngoingResearch">ongoing research</A>).</I><!-- the number was 135024 as of June 7, 2017 --><BR><BR>
|
|
|
|
Most of those architectures are assigned, by default, to the root of the taxonomic tree. <I>(As an example, <A HREF="/sparcle?term=all%5BFilter%5D%20NOT%20%28%22archaea%22%5BOrganism%5D%20OR%20%22bacteria%22%5BOrganism%5D%20OR%20%22eukaryota%22%5BOrganism%5D%20OR%20%22fungi%22%5BOrganism%5D%20OR%20%22metazoa%22%5BOrganism%5D%20OR%20%22viridiplantae%22%5BOrganism%5D%20OR%20%22viruses%22%5BOrganism%5D%29">retrieve the architectures that have a taxonomic scope of <B>all organisms</B></A>.)</I><BR><BR>
|
|
|
|
A small number of architectures are assigned to more specific taxonomic nodes, as follows:
|
|
|
|
<UL>
|
|
<LI><A HREF="/sparcle?cmd=search&term=archaea%5BOrganism%5D">archaea</A></LI>
|
|
<LI><A HREF="/sparcle?cmd=search&term=bacteria%5BOrganism%5D">bacteria</A></A></LI>
|
|
<!-- LI><A HREF="/sparcle?cmd=search&term=cyanobacteria%5BOrganism%5D">cyanobacteria</A> [Note: this node was present in the organism index list on June 7, 2017, but was not present on July 13, 2017]</LI -->
|
|
<LI><A HREF="/sparcle?cmd=search&term=eukaryota%5BOrganism%5D">eukaryota</A></LI>
|
|
<LI><A HREF="/sparcle?cmd=search&term=fungi%5BOrganism%5D">fungi</A></LI>
|
|
<LI><A HREF="/sparcle?cmd=search&term=metazoa%5BOrganism%5D">metazoa</A></LI>
|
|
<LI><A HREF="/sparcle?cmd=search&term=viridiplantae%5BOrganism%5D">viridiplantae</A></LI>
|
|
<LI><A HREF="/sparcle?cmd=search&term=viruses%5BOrganism%5D">viruses</A></LI>
|
|
</UL>
|
|
|
|
The next column provides examples of search strategies that will retrieve conserved domain architectures that have a taxonomic scope of interest.<BR><BR>
|
|
|
|
The <A HREF="#SummaryPage">SPARCLE record</A> for each architecture contains a section entitled "<A HREF="#SummaryPageCuratedNamesAndLabels">Curated names and labels</A>, which includes the architecture's <A HREF="#SummaryPageTaxonomicScope">taxonomic scope</A>.<BR><BR>
|
|
|
|
</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=bacteria%5BOrganism%5D"><b>bacteria[Organism]</b></a> <BR><BR>
|
|
will retrieve the architectures whose names and labels are applicable within bacteria but not within other taxonomic nodes.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=viruses%5BOrganism%5D"><b>viruses[Organism]</b></a> <BR><BR>
|
|
will retrieve the architectures whose names and labels are applicable within viruses but not within other taxonomic nodes.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle?term=guanylate%20cyclase%20AND%20bacteria%5BOrganism%5D"><b>guanylate cyclase AND bacteria[Organism]</b></a> <BR><BR>
|
|
will retrieve the architectures that contain the terms "guanylate" and "cyclase" in any field of the SPARCLE architecture record, and whose names and labels are applicable within bacteria but not within other taxonomic nodes.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle?term=guanylate%20cyclase%20AND%20eukaryota%5BOrganism%5D"><b>guanylate cyclase AND eukaryota[Organism]</b></a> <BR><BR>
|
|
will retrieve the architectures that contain the terms "guanylate" and "cyclase" in any field of the SPARCLE architecture record, and whose names and labels are applicable within eukaryota but not within other taxonomic nodes.<BR>
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_PDBTitle ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldPDBTitle"></A>PDBTitle</td>
|
|
<td class="format1B" valign="Top">[PDBTitle]<BR>[PDBTL]</td>
|
|
<td class="format1B" valign="Top">The title of any <A HREF="http://www.rcsb.org/pdb/home/home.do">Protein Data Bank (PDB)</A> record (3D macromolecular structure) that was used as supporting evidence for the conserved domain architecture.</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22DNA+modification%22%5BPDBTitle%5D"><b>"DNA modification"[PDBTitle]</b></a> <BR><BR>
|
|
will retrieve the SPARCLE record that contains the <A HREF="#SearchFieldsQuotes">phrase</A> "DNA modification" in the title of any 3D structure record that was used as supporting evidence for the conserved domain architecture.</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_ReviewLevel ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldReviewLevel"></A>ReviewLevel</td>
|
|
<td class="format1B" valign="Top">[ReviewLevel]<BR>[REV]</td>
|
|
<td class="format1B" valign="Top">
|
|
|
|
The SPARCLE database has three tiers (review levels) of conserved domain architecture records:
|
|
|
|
<OL>
|
|
<LI><A HREF="#DataProcessingCurated">Curated architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingAutonamed">Autonamed architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingNamedByDomain">NamedByDomain architectures</A></LI>
|
|
<!-- LI><A HREF="#DataProcessingComputed">Computed name architectures</A></LI -->
|
|
</OL>
|
|
|
|
The <A HREF="#DataProcessing">data processing</A> section of this document describes the methods by which architectures in each tier are handled.<BR><BR>
|
|
|
|
The [ReviewLevel] search field can be used to limit retrieval to a specific tier of records, if desired, as shown in the search examples in the next column.<BR><BR>
|
|
|
|
<I>(Note: The [ReviewLevel] field is similar to the <A HREF="#SearchFieldStatus">[Status]</A> field, described below.)</I><BR><BR>
|
|
</td>
|
|
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=curated%5BReviewLevel%5D"><b>curated[ReviewLevel]</b></a> <BR>
|
|
will retrieve all of the curated architectures from the SPARCLE database.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=autonamed%5BReviewLevel%5D"><b>autonamed[ReviewLevel]</b></a> <BR>
|
|
will retrieve all of the autonamed architectures from the SPARCLE database.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=namedbydomain%5BReviewLevel%5D"><b>namedbydomain[ReviewLevel]</b></a> <BR>
|
|
will retrieve all of the architectures from the SPARCLE database that were named by domain.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle/?term=%22chloride+channel%22+AND+curated%5BReviewLevel%5D"><b>"chloride channel" AND curated[ReviewLevel]</b></a> <BR>
|
|
will retrieve all architectures that contain the <A HREF="#SearchFieldsQuotes">phrase</A> "chloride channel" in any field of the record, and will then limit the retrieval to curated architectures.<BR><BR>
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_Status ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldStatus"></A>Status</td>
|
|
<td class="format1B" valign="Top">[Status]</td>
|
|
<td class="format1B" valign="Top">
|
|
|
|
The [Status] field is similar to the <A HREF="#SearchFieldReviewLevel">[ReviewLevel]</A> field (described above).<BR><BR>
|
|
|
|
The [Status] field divides the SPARCLE database into two broad subsets:
|
|
<UL>
|
|
<LI><B>Reviewed</B> (which represents <A HREF="#DataProcessingCurated">curated</A> records)</LI>
|
|
<LI><B>Provisional</B> (which represents all other SPARCLE records, such as those that were <A HREF="#DataProcessingAutonamed">autonamed</A> or <A HREF="#DataProcessingNamedByDomain">namedByDomain</A>)</LI>
|
|
</UL>
|
|
|
|
(<I>In contrast, the [ReviewLevel] field divides the SPARCLE database based on the method by which the data have been processed (i.e., <A HREF="#DataProcessingCurated">curated</A>, <A HREF="#DataProcessingAutonamed">autonamed</A>, <A HREF="#DataProcessingNamedByDomain">namedByDomain</A>).</I><BR><BR>
|
|
|
|
Because of this, a search for <B>curated[ReviewLevel]</B> will retrieve the same subset of architectures as <B>reviewed[Status]</B>.<BR>
|
|
A search for <B>provisional[Status]</B> will retrieve all architectures that have <I>not</I> been curated.<BR><BR>
|
|
|
|
</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=reviewed%5BStatus%5D"><b>reviewed[Status]</b></a> <BR><BR>
|
|
will retrieve all of the reviewed (i.e., <A HREF="#DataProcessingCurated">curated</A>) architectures from the SPARCLE database.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle/?term=%22chloride+channel%22+AND+reviewed%5BStatus%5D"><b>"chloride channel" AND reviewed[Status]</b></a> <BR><BR>
|
|
will retrieve all architectures that contain the <A HREF="#SearchFieldsQuotes">phrase</A> "chloride channel" in any field of the record, and will then limit the retrieval to reviewed (i.e., <A HREF="#DataProcessingCurated">curated</A>) architectures.
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_UID ============== -->
|
|
<tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchFieldUID"></A>UID</td>
|
|
<td class="format1B" valign="Top">[UID]<BR>[ArchID]</td>
|
|
<td class="format1B" valign="Top">The <B>unique identification number</B> (<B>UID</B>) of a conserved domain architecture. It is also referred to as an <B>architecture ID</B>, or <B>archid</B>.<BR><BR>
|
|
If you enter an <B>integer</B> as a query, the search system will interpret the query by <B>default</B> as a search of the <B>[UID] field</B>.<BR><BR>
|
|
<A HREF="#ArchitectureID">Additional information about architecture IDs</A> is provided in the section of this document that describes the <A HREF="#SummaryPage">contents of a conserved domain architecture's summary page</A>.
|
|
</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=10087058%5BUID%5D"><b>10087058[UID]</b></a> <BR><BR>
|
|
The search above, which uses the [UID] field specifier, will retrieve the architecture that has the unique identification number (UID) 10087058.<BR><BR><BR>
|
|
|
|
<a HREF="/sparcle?cmd=search&term=10087058"><b>10087058</b></a> <BR><BR>
|
|
|
|
If you enter the query as just the integer, as shown above, without the [UID] field specifier, the search system will search the [UID] field by <B>default</B>.<BR><BR>
|
|
|
|
Therefore, both of the searches above will retrieve the same architecture.<BR><BR>
|
|
|
|
</td>
|
|
</tr>
|
|
|
|
<!-- =========== SEARCH_FIELD_XXXXXX ============== -->
|
|
<!-- tr>
|
|
<td class="format1A" valign="Top"><A NAME="SearchField____"></A>__________</td>
|
|
<td class="format1B" valign="Top">[____]</td>
|
|
<td class="format1B" valign="Top">Searches _____________</td>
|
|
<td class="format1B" valign="Top"><A HREF="#Top"><IMG SRC="../../IMG/arrowup_blue.gif" width="12" height="12" border="0" align="right" alt="back to top"></IMG></A>
|
|
<a HREF="/sparcle?cmd=search&term=%22_____+_____%22%5B____%5D"><b>bsid82991[accn]</b></a> <BR><BR>
|
|
will retrieve the SPARCLE record that contains the specified ____ in the _____ field.</td>
|
|
</tr -->
|
|
|
|
</TABLE>
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- =========== END_TABLE_OF_SEARCH_FIELDS ============== -->
|
|
|
|
<!-- =========== FOOTNOTES_ABOUT_SEARCH_FIELDS ============== -->
|
|
<BLOCKQUOTE><BLOCKQUOTE>
|
|
|
|
<A NAME="SearchFieldFootnotes"></A>
|
|
<A NAME="SearchFieldsFootnotes"></A>
|
|
<A NAME="SearchFieldAbbreviations"></A>
|
|
<A NAME="SearchFieldsAbbreviations"></A>
|
|
* In a query, the <B>field name may be typed as the <A HREF="#SearchFields">full name or abbreviation</A></B>, and may be in <B>upper, lower, or mixed case</B>. If more than one abbreviation is shown, any one of them can be used. The field name <B>must be</B> surrounded by <B>square brackets []</B>. A <B>space</B> between the search term and the field specifier is <B>optional</B>. If desired, surround a phrase with <A HREF="#SearchFieldsQuotes"><B>quotes</B></A> to force an adjacency search. For example, all of the sample queries below will work equally:
|
|
<BR>
|
|
      "chloride channel"[NAME] <BR>
|
|
      "chloride channel" [NAME] <BR>
|
|
      "chloride channel"[name] <BR>
|
|
      "chloride channel" [name] <BR>
|
|
      "chloride channel" [NM] <BR>
|
|
      "chloride channel"[nm] <BR><BR>
|
|
<!-- BR>
|
|
      "chloride channel"[TI] <BR>
|
|
      "chloride channel"[TITL] <BR>
|
|
      "chloride channel" [TITL] <BR>
|
|
      "chloride channel" [titl] <BR>
|
|
      "chloride channel"[Title] <BR><BR -->
|
|
|
|
<A NAME="SearchFieldsQuotes"></A>
|
|
** The <B>quotes surrounding the query terms</B> in some of the sample searches force the terms to be <B>searched as a phrase</B>. <B>If quotes are not used</B>, the <A HREF="/gquery">Entrez</A> system may still recognize and handle the terms as a phrase, if they are present in a phrase dictionary used by the search engine. If the terms are <I>not</I> present in the phrase dictionary and are <I>not</I> surrounded by quotes, Entrez will insert a <B>Boolean AND</B> between the terms; in that case, they may or may not appear adjacent to each other in the retrieved records. The "<B>Details</B>" section in the right hand margin of a search results page will show you exactly how the Entrez system parsed your query. More search tips are provided in the <a HREF="/books/NBK3827/">PubMed help document</a> and <a HREF="/books/NBK3837/">Entrez help document</a>.<BR><BR>
|
|
|
|
<A NAME="SearchFieldsTruncation"></A>
|
|
It is also possible to search for a word stem by using an <B>asterisk (*) as a wild card</B>; for example, arachidon* will retrieve records with terms such as arachidonate, arachidonic, arachidonoyl. The <A HREF="/books/NBK3837/">Entrez Help</A> document provides additional information about <A HREF="/books/NBK3837/#EntrezHelp.Using_Wild_Cards_or_Query_Tru">truncating</A> search terms in this way.
|
|
|
|
</BLOCKQUOTE></BLOCKQUOTE>
|
|
<BR>
|
|
|
|
<!-- ========= END_FOOTNOTES_ABOUT_SEARCH_FIELDS =========== -->
|
|
|
|
<!-- =========== END_LEVEL_1_TOPIC_INPUT_KEYWORDS ============= -->
|
|
|
|
<!-- ====== PAGE_MARGIN_TO_RIGHT_OF_BLUE_EDGE_BOX_WITH_SECTION_TEMPLATE_CONTENTS ====== -->
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############# END_BLUE_EDGE_BOX_WITH_SECTION_2_CONTENTS ############ -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell NormalText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
|
|
<!-- ########### BEGIN_BLUE_HEADER_SECTION_3 ############# -->
|
|
|
|
<A NAME="Output"></A>
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="SteelBlueCell"><SPAN class="HeaderText1">Output</SPAN></TD>
|
|
<TD class="SteelBlueCell" WIDTH="15" ALIGN="left" VALIGN="center"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############## END_BLUE_HEADER_SECTION_3 ############ -->
|
|
|
|
<!-- ########## BEGIN_BLUE_EDGE_BOX_WITH_SECTION_3_CONTENTS ########### -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="WhiteCellBlueEdgeAll NormalText">
|
|
|
|
<!-- ============ MINI_TOC_FOR_THIS_SECTION ============== -->
|
|
<BR>
|
|
<!-- BLOCKQUOTE><BLOCKQUOTE>
|
|
|
|
<A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A>
|
|
|
|
</BLOCKQUOTE></BLOCKQUOTE -->
|
|
|
|
<!-- ========== END_MINI_TOC_FOR_THIS_SECTION ============ -->
|
|
|
|
<!-- ========== LEVEL_1_TOPIC_OUTPUT_SEQUENCE_SEARCH ========== -->
|
|
|
|
<A NAME="OutputSequence"></A>
|
|
<A NAME="OutputSequenceSearch"></A>
|
|
|
|
<TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>Output from a sequence search</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR><BR>
|
|
|
|
<P CLASS="NormalText indent20">If you have <A HREF="#InputSequence">entered a query sequence</A> into the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> tool, the CD-Search results page will include a "<B>Protein Classification</B>" section if the query sequence maps to a conserved domain architecture in the <A HREF="/sparcle">SPARCLE</A> database. <I>(If a query sequence does not map to any conserved domain architecture in the <A HREF="/sparcle">SPARCLE</A> database, then the CD-Search results will not include a Protein Classification section.)</I><BR><BR>
|
|
|
|
A sample protein classification section is shown in the <I><span style="color:#d70000">illustration at the right</span></I>, which displays the CD-Search results for the query sequence <A HREF="/protein/NP_387887">DNA gyrase B (NP_387887)</A>, an antibiotic target. Click on the illustration to see <A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074">open the live CD-Search results</A>.<BR><BR>
|
|
|
|
Please note that the "<B>Graphical Summary</B>" on the live CD-Search results page might look different from the illustration at the right because conserved domain architecture records in the <A HREF="/sparcle">SPARCLE</A> database continue to evolve with <A HREF="#OngoingResearch">ongoing research</A>.<BR><BR>
|
|
|
|
For example, in January 2017, the protein sequence <A HREF="/protein/NP_387887">NP_387887</A> was <B>initially</B> annotated with <A HREF="/Structure/sparcle/archview.html?archid=10647733">architecture ID 10647733</A> (as shown in the illustration). That architecture is named "DNA gyrase subunit B" and includes <B>four distinct conserved domains</B>.<BR><BR>
|
|
|
|
In March 2017, when a new build of CDD/SPARCLE was released, the conserved domain architecture annotation for NP_387887 was <B>revised</B> to <A HREF="/Structure/sparcle/archview.html?archid=11481348">architecture ID 11481348</A>, which is a <B>multi-domain</B> that encompasses the four original conserved domains, and which can be seen in the <A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887">current CD-Search results for NP_387887</A>. That architecture has a more specific and precise name, "type IIA DNA topoisomerase subunit B," and reflects the <B>full length protein model</B>.<BR><BR>
|
|
|
|
To see the four conserved domains that compose the multi-domain, simply change the CD-Search display option on the <A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074">live CD-Search results for NP_387887</A> from "<A HREF="../../cdd/cdd_help.shtml#ConciseDisplay"><B>Concise Results</B></A>" to "<A HREF="../../cdd/cdd_help.shtml#FullDisplay"><B>Full Results</B></A>" (using the <A HREF="../../cdd/cdd_help.shtml#GlobalOptions"><B>"View" menu</B></A> near the upper right hand corner). The Full Results display will show the four conserved domains that compose the multi-domain.<BR><BR>
|
|
|
|
As the available data and understanding of conserved domain architectures continue to evolve, the domain architectures that are annotated on proteins may evolve as well, as shown in this example. Comments about the data are welcome and can be sent to the NCBI Support Center/Help Desk, which is accessible as a link in the footer of NCBI web pages<!-- A HREF="mailto:info@ncbi.nlm.nih.gov">info@ncbi.nlm.nih.gov</A -->.
|
|
</P>
|
|
|
|
</TD>
|
|
|
|
<TD WIDTH="300" ALIGN="CENTER" VALIGN="TOP"><A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=NP_387887"><IMG src="images/about_sparcle_step2_CDSearch_results_protein_classification_DNA_gyrase_NP_387887.png" WIDTH="263" HEIGHT="410" border="0" alt="Step 2 in using SPARCLE: The CD-Search results page will display a Protein Classification section above the graphic summary of conserved domains, if a SPARCLE record exists for the domain architecture in the query protein sequence. Click on this graphic to open the CD-Search results for NP_387887, DNA gyrase subunit B from Bacillus subtilis."></A>
|
|
</TD>
|
|
|
|
</TR>
|
|
|
|
</TABLE>
|
|
<BR><BR>
|
|
|
|
<!-- ======== END_LEVEL_1_TOPIC_OUTPUT_SEQUENCE_SEARCH ======== -->
|
|
|
|
<!-- ========== LEVEL_1_TOPIC_OUTPUT_KEYWORD_SEARCH ========== -->
|
|
|
|
<A NAME="OutputKeyword"></A>
|
|
<A NAME="OutputKeywords"></A>
|
|
<A NAME="OutputKeywordSearch"></A>
|
|
|
|
<TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>Output from a keyword search</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR><BR>
|
|
|
|
<P CLASS="NormalText indent20">
|
|
|
|
If you are <A HREF="#InputKeywords">searching for keywords</A> in the <A HREF="/sparcle/">SPARCLE</A> database, the SPARCLE search results will display a list of the conserved domain architectures that contain the keyword(s) you specified.<BR><BR>
|
|
|
|
Depending on how you entered the search, the search terms can either appear in any field of a conserved domain architecture record, or in a search field you specify, and they can either appear together as a phrase or separate from each other.<BR><BR>
|
|
|
|
The <A HREF="#SearchTips">Search Tips</A> section of this document provides details about the <A HREF="#InputKeywordsScopeOfSearch">scope of a keyword search</A>, as well as tips on how to <A HREF="#SearchTipsSearchFields">limit your query to specific search fields</A>, <A HREF="#SearchTipsQuotes">use quotes to force a phrase search</A>, and <A HREF="#SearchTipsTruncation">use an asterisk (*) for truncation</A>. It also includes a <A HREF="#SearchTipsCompare">comparison of some sample search strategies</A>.<BR><BR>
|
|
|
|
The <I><span style="color:#d70000">illustration at the right</span></I> shows the results of a sample search for the words <B><I>chloride</I></B> and <B><I>channel</I></B> in any field of an architecture record, and limited to the subset of architecture records that meet the criterion of <A HREF="#DataProcessingCurated"><B><I>curated</I></B></A><A HREF="#SearchFieldReviewLevel"><I>[ReviewLevel]</I></A>.<BR><BR>
|
|
|
|
<B>Click</B> on the illustration to <A HREF="/sparcle/?term=chloride+channel+AND+curated%5BReviewLevel%5D">open the corresponding live search results in the SPARCLE database</A>. <I>(Please note that the second panel of the illustration shows the search results as of March 2, 2017; the corresponding live web page will retrieve a larger number of records, as the SPARCLE database continues to grow.)</I><BR><BR>
|
|
|
|
A <A HREF="#SearchTipsCompare">comparison of some sample search strategies</A> shows other ways of constructing the query, with links to the search results in each case.
|
|
|
|
</P>
|
|
|
|
</TD>
|
|
|
|
<TD WIDTH="300" ALIGN="CENTER" VALIGN="TOP"><A HREF="/sparcle/?term=chloride+channel+AND+curated%5BReviewLevel%5D"><IMG src="images/entrez_sparcle_step2_search_results_chloride_channel_curated_reviewlevel.png" WIDTH="263" HEIGHT="410" border="0" align="center" alt="Step 2 in searching the SPARCLE database by keyword: View the search results and click on the architecture ID of any domain architecture of interest to open its summary page. Click on this graphic to open the results of a SPARCLE search for chloride channel AND curated[ReviewLevel]."></A>
|
|
</TD>
|
|
|
|
</TR>
|
|
|
|
</TABLE>
|
|
<BR>
|
|
|
|
<!-- ======== END_LEVEL_1_TOPIC_OUTPUT_KEYWORD_SEARCH ======== -->
|
|
|
|
<!-- ====== PAGE_MARGIN_TO_RIGHT_OF_BLUE_EDGE_BOX_WITH_SECTION_TEMPLATE_CONTENTS ====== -->
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############# END_BLUE_EDGE_BOX_WITH_SECTION_3_CONTENTS ############ -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell NormalText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
|
|
<!-- ########### BEGIN_BLUE_HEADER_SECTION_4 ############# -->
|
|
|
|
<A NAME="SummaryPage"></A>
|
|
<A NAME="SampleRecord"></A>
|
|
<A NAME="SPARCLERecord"></A>
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="SteelBlueCell"><SPAN class="HeaderText1">Sample SPARCLE Record</SPAN></TD>
|
|
<TD class="SteelBlueCell" WIDTH="15" ALIGN="left" VALIGN="center"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############## END_BLUE_HEADER_SECTION_4 ############ -->
|
|
|
|
<!-- ########## BEGIN_BLUE_EDGE_BOX_WITH_SECTION_4_CONTENTS ########### -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="WhiteCellBlueEdgeAll NormalText">
|
|
|
|
<!-- ============ MINI_TOC_FOR_THIS_SECTION ============== -->
|
|
<BR>
|
|
<!-- BLOCKQUOTE><BLOCKQUOTE>
|
|
|
|
<A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A>
|
|
|
|
</BLOCKQUOTE></BLOCKQUOTE -->
|
|
|
|
<!-- ========== END_MINI_TOC_FOR_THIS_SECTION ============ -->
|
|
|
|
<!-- ==== LEVEL_1_TOPIC_SUMMARY_PAGE_CLASSIFICATION_OF_PROTEINS ==== -->
|
|
|
|
<A NAME="SummaryPageClassificationOfProteins"></A>
|
|
<TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>Classification of proteins by domain architecture</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR><BR>
|
|
|
|
<P CLASS="NormalText indent20">
|
|
|
|
A <A HREF="/sparcle">SPARCLE</A> database record is also referred to as a <A HREF="#ConservedDomainArchitecture">conserved domain architecture</A>'s "<B>summary page</B>."<BR><BR>
|
|
|
|
An individual SPARCLE record shows a <B>unique architecture</B> that has been <B>observed in at least one protein sequence</B>.<BR><BR>
|
|
|
|
The summary page displays the <A HREF="#Name"><B>name</B></A> and <A HREF="#Label"><B>label</B></A> of the architecture, along with <A HREF="#Evidence"><B>evidence</B></A> used to assign that name and label.<BR><BR>
|
|
|
|
Additionally, because SPARCLE is used to <A HREF="#Overview"><B>classify proteins</B></A> by their characteristic conserved domain architecture, the summary page includes a list of protein <A HREF="#SummaryPageSequences"><B>sequences with this architecture</B></A>.<BR><BR>
|
|
|
|
As noted in the section of the document about <A HREF="#OngoingResearch">ongoing research</A>, the conserved domain models, architectures, and the resulting protein sequence clusters, continue to evolve as new data become available and as research progresses.<BR><BR>
|
|
|
|
The complete contents of a SPARCLE record include the following. Click on any item to read more about it:<BR>
|
|
</P>
|
|
|
|
<SPAN CLASS="NormalText">
|
|
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageDescription">Description of architecture</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageName">Name</A></LI>
|
|
<LI><A HREF="#SummaryPageLabel">Label (description of function)</A></LI>
|
|
<LI><A HREF="#SummaryPageArchitectureID">Architecture ID</A></LI>
|
|
<LI><A HREF="#SummaryPageVersion">Version</A></LI>
|
|
<LI><A HREF="#SummaryPagePublicationDate">Date Published</A></LI>
|
|
<LI><A HREF="#SummaryPageReviewLevel">Review Level</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageSequences">Sequences with this architecture</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageSequencesFolderTabs">Folder tabs</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageSequencesAll">All</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesProteinWithPubMed">Protein with PubMed Reference</A></LI>
|
|
<LI><A HREF="#SummaryPageSequences3DStructure">3D Structure</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesGene">Gene</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesRefSeq">RefSeq</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesSwissProt">Swiss-Prot</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageSequencesFilters">Filters</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersTags">Tags</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersSource">Source</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersOrganism">Organism</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersDescription">Description</A></LI>
|
|
<LI><A HREF="#SummaryPageSequencesFiltersGeneSymbol">Gene Symbol</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageSequencesEmptySet">Note: Empty Set</A></LI>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageCuratedNamesAndLabels">Curated Names and Labels</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageTaxonomicScope">Taxonomic Scope</A></LI>
|
|
<LI><A HREF="#SummaryPageAssignedName">Name</A></LI>
|
|
<LI><A HREF="#SummaryPageAssignedLabel">Label</A></LI>
|
|
<LI><A HREF="#SummaryPageEvidence">Supporting evidence</A></LI>
|
|
<UL>
|
|
<LI><A HREF="#SummaryPageEvidenceProteins">Protein sequences</A></LI>
|
|
<LI><A HREF="#SummaryPageEvidenceConservedDomains">Conserved domains</A></LI>
|
|
<LI><A HREF="#SummaryPageEvidencePublications">Publications</A></LI>
|
|
<LI><A HREF="#SummaryPageEvidenceOther">Other</A></LI>
|
|
</UL>
|
|
</UL>
|
|
<LI><A HREF="#SummaryPageConservedDomains">Conserved domains in this architecture</A></LI>
|
|
<LI><A HREF="#SummaryPageFunctionalSites">Functional sites in this architecture</A></LI>
|
|
</UL>
|
|
|
|
</SPAN>
|
|
|
|
</TD>
|
|
|
|
<TD WIDTH="315" ALIGN="CENTER" VALIGN="TOP"><A HREF="/Structure/sparcle/archview.html?archid=10647733"><IMG src="images/about_sparcle_step3_view_sparcle_record_DNA_gyrase_NP_387887.png" WIDTH="281" HEIGHT="410" border="0" align="center" alt="Step 3 in using SPARCLE: The Protein Classification section of the CD-Search results links to the corresponding SPARCLE record, illustrated here. The SPARCLE record shows the name and functional label of the architecture, supporting evidence, and links to other proteins with the same architecture. Click on this graphic to open the SPARCLE record for the domain architecture (architecture ID 10647733) that was found in the protein query sequence, NP_387887, DNA gyrase subunit B from Bacillus subtilis."></A>
|
|
</TD>
|
|
|
|
</TR>
|
|
|
|
</TABLE>
|
|
<BR>
|
|
|
|
<!-- ==== END_LEVEL_1_TOPIC_SUMMARY_PAGE_CLASSIFICATION_OF_PROTEINS === -->
|
|
|
|
<!-- ======= LEVEL_1_TOPIC_SUMMARY_PAGE_DESCRIPTION ========== -->
|
|
|
|
<A NAME="SummaryPageDescription"></A>
|
|
<TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>Description of the conserved domain architecture</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR><BR>
|
|
|
|
<P CLASS="NormalText indent20">
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="Name"></A><A NAME="SummaryPageName"></A><A NAME="SummaryPageDescriptionName"></A><A NAME="DescriptionName"></A><B>Name of architecture</B>:<BR><BR>
|
|
The name of a conserved domain architecture is either assigned manually by <A HREF="#DataProcessingCurated">curation</A>, or computationally by the <A HREF="#DataProcessingAutonamed">autoname</A> algorithm or the <A HREF="#DataProcessingNamedByDomain">namedByDomain</A> algorithm.
|
|
<BLOCKQUOTE>
|
|
The architecture name is displayed in two places on a SPARCLE record: near the top of the record (in bold font), and in the "<A HREF="#CuratedNamesAndLabels">Curated names and labels</A>" section of the record.<BR><BR>
|
|
For example, the name of the conserved domain architecture shown in the <A HREF="#SummaryPage">illustrated example of a SPARCLE record</A> is:<BR>
|
|
"DNA gyrase subunit B." (<I>You can also see this name in the live SPARCLE record for <A HREF="/Structure/sparcle/archview.html?archid=10647733">architecture ID 10647733</A>.</I>)
|
|
</BLOCKQUOTE>
|
|
</LI><BR>
|
|
|
|
<LI><A NAME="Label"></A><A NAME="SummaryPageLabel"></A><A NAME="SummaryPageDescriptoinLabel"></A><A NAME="DescriptionLabel"></A><B>Label (description of function)</B>:<BR><BR>
|
|
The label provides a description of the conserved domain architecture's biological function.
|
|
<BLOCKQUOTE>
|
|
The label is displayed in two places on a SPARCLE record: near the top of the record (beneath the bold font that shows the architecture's name), and in the "<A HREF="#CuratedNamesAndLabels">Curated names and labels</A>" section of the record.<BR><BR>
|
|
For example, the label of the conserved domain architecture shown in the <A HREF="#SummaryPage">illustrated example of a SPARCLE record</A> is:<BR>
|
|
"DNA gyrase is a type 2 topoisomerase that relaxes supercoils but can also introduce negative supercoils into DNA in an ATP-dependent manner." (<I>You can also see this label in the live SPARCLE record for <A HREF="/Structure/sparcle/archview.html?archid=10647733">architecture ID 10647733</A>.</I>)
|
|
</BLOCKQUOTE>
|
|
</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageArchitectureID"></A><A NAME="ArchitectureID"></A><B>Architecture ID</B>:<BR><BR>
|
|
|
|
An integer, assigned by NCBI, that uniquely identifies a conserved domain architecture.<BR>
|
|
The <B>architecture ID</B> is also referred to as a <B>unique identifier</B> (<A HREF="#SearchFieldUID"><B>UID</B></A>) and can be searched directly in the <A HREF="/sparcle">SPARCLE</A> database.<BR><BR>
|
|
|
|
Each architecture ID reflects the <!-- A HREF="#ConservedDomainsInThisArchitecture" --><B>set</B> of conserved domain models<!-- /A --> that are top-scoring hits (as determined by the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> service) on the proteins that possess the architecture, the <B>sequential order</B> of those domains, and the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types"><B>type of hit</B></a> each domain has to the proteins. (As noted in the <A HREF="#Overview">Overview</A> section of this document, it is also possible for a domain architecture to consist of a single conserved domain footprint. Such architectures also receive an architecture ID.)<BR><BR>
|
|
|
|
<!-- Each architecture ID reflects the unique set, and sequential order, of conserved domain models that are top-scoring hits (as determined by the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> service) on the proteins that possess the architecture, and the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">type of hit</a> each domain has to the proteins. <I>(As noted in the <A HREF="#Overview">Overview</A> section of this document, it is also possible for a domain architecture to consist of a single conserved domain footprint.)</I><BR><BR -->
|
|
|
|
The <B>conserved domain models that compose an architecture</B> are shown in two places on the architecture's summary page: (a) in the graphical display at the top of the page (<A HREF="#SummaryPage">illustrated example</A>), and (b) in the section labeled "<A HREF="#SummaryPageConservedDomains">Conserved domains in this architecture</A>."<BR><BR>
|
|
|
|
In the <B>graphical display</B> of a conserved domain architecture, you can <B>mouse over</B> a conserved domain's cartoon in order to see its <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number</A>, or click on the cartoon to see detailed information about that domain model, including a multiple sequence alignment of its member proteins.<BR><BR>
|
|
|
|
The <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number</A> <B>prefix</B> for each conserved domain model in the architecture reflects the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">type of hit</A> it has on the proteins that possess the architecture. Accession numbers that begin with the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl">"cl" prefix</A> indicate a <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily">superfamily hit</A> (the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl">"cl" prefix</A> stands for <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamily</A> cluster). All other <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">type of accession numbers</A> (i.e., accessions that begin with any prefix other than "cl") indicate <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</A>.<BR>
|
|
|
|
<BLOCKQUOTE>
|
|
As an example of the <B>unique composition of each architecture</B>, search the <A HREF="/sparcle">SPARCLE</A> database for:<BR>
|
|
     <A HREF="/sparcle?term=tumor%5BName%5D"><I>tumor[Name]</I></A><BR>
|
|
That will retrieve conserved domain architectures which contain the term "<I>tumor</I>" in the architecture <A HREF="#Name">name</A>, including a number of architectures named "<I>P53 and SAM_tumor-p63 domain-containing protein</I>."<BR>
|
|
At <B>first glance</B>, some of the architectures appear similar to each other. Upon <B>closer look</B>, however, you will see that each architecture is comprised of a unique series of conserved domain accession numbers. <I>(To see the accession numbers, open the <A HREF="#SummaryPage">SPARCLE record</A> for any domain architecture of interest, then either mouse over the cartoon for each domain in the architecture's graphic, or view the tabular list of "<A HREF="#SummaryPageConservedDomains">Conserved domains in this architecture</A>.")</I> As a result, each architecture receives its own architecture ID.<BR>
|
|
<!-- I>(Please note that some of the architectures might <a href="#EmptySet">not currently have links</A> to any proteins. The reasons for this are explained below, in the section of this help document about "<a href="#SummaryPageSequences">Sequences with this architecture</A>.")</I -->
|
|
</BLOCKQUOTE>
|
|
|
|
</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageVersion"></A><B>Version</B>:<BR><BR>
|
|
Each SPARCLE record is assigned a version of 1 when it is first published (i.e., first released into the public SPARCLE database). If a SPARCLE record is later revised in any way, the version number is incremented when the revised record is published.
|
|
<BLOCKQUOTE>
|
|
Details: The information within a SPARCLE architecture record can change over time, as new data and publications become available about a given conserved domain architecture. Each time a change is made to a SPARCLE record, and the revised record is then published (i.e., released into the public database), it receives a new version number. The majority of changes are generally minor, such as corrections of typing error or the addition of punctuation, such as a dashes, to protein names. Other changes might be more important, such as the addition of new evidence in support of the domain architecture, or the correction of a protein name.
|
|
</BLOCKQUOTE>
|
|
</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPagePublicationDate"></A><A NAME="SummaryPageDatePublished"></A><B>Date Published</B>:<BR><BR>
|
|
The date on which the current version of a conserved domain architecture record was published in the SPARCLE curation system.
|
|
<!-- The date on which the current version of this SPARCLE record was released into the database. If the record is listed as Version 1, then the Date Published also represents the <A HREF="#SearchFieldCreateDate">Create Date</A> (the date on which the architecture first became available in the SPARCLE database). -->
|
|
<BLOCKQUOTE>
|
|
The architecture subsequently becomes available in the public <A HREF="/sparcle">SPARCLE</A> database, although that might happen a bit later.<BR><BR>
|
|
<I>Search tip: To retrieve architectures by their publication date, use the <A HREF="#SearchFields">search field</A> called <A HREF="#SearchFieldCreateDate">[CreateDate]</A> on the <A HREF="/sparcle/advanced">SPARCLE Advanced Search page</A>.</I>
|
|
</BLOCKQUOTE>
|
|
</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageReviewLevel"></A><B>Review Level</B>:<BR><BR>
|
|
The SPARCLE database has three tiers (review levels) of conserved domain architecture records:<BR><BR>
|
|
|
|
<OL>
|
|
<LI><A HREF="#DataProcessingCurated">Curated architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingAutonamed">Autonamed architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingNamedByDomain">NamedByDomain architectures</A></LI>
|
|
<!-- LI><A HREF="#DataProcessingComputed">Computed name architectures</A></LI -->
|
|
</OL>
|
|
<BR>
|
|
|
|
Additional details about each tier are provided in the <A HREF="#DataProcessing">data processing</A> section of this document, including a description of the method by which the architectures in each tier are named.<BR><BR>
|
|
|
|
<I>Search tip: When doing <A HREF="#InputKeywords">keyword search</A> of the SPARCLE database, you can limit your search results to architectures that belong to a given tier by using the <A HREF="#SearchFields">search field</A> called <A HREF="#SearchFieldReviewLevel">[ReviewLevel]</A> on the <A HREF="/sparcle/advanced">SPARCLE Advanced Search page</A>. Alternatively, you can simply use the "<B>Filter your results</B>" options in the upper right hand margin of a SPARCLE search results page (<A HREF="#OutputKeywords">illustrated example</A>) to select the desired tier.</I><BR>
|
|
|
|
<!-- I>Search tip: To retrieve conserved domain architectures that belong to a given tier, use the <A HREF="#SearchFields">search field</A> called <A HREF="#SearchFieldReviewLevel">[ReviewLevel]</A> on the <A HREF="/sparcle/advanced">SPARCLE Advanced Search page</A>. Alternatively, you can simply use the "<B>Filter your results</B>" options in the upper right hand margin of a SPARCLE search results page (<A HREF="#OutputKeywords">illustrated example</A>) that appears after you do a <A HREF="#InputKeywords">keyword search</A> of the SPARCLE database.</I><BR -->
|
|
</LI><BR>
|
|
|
|
</UL>
|
|
|
|
</P>
|
|
|
|
</TD>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<!-- === END_LEVEL_1_TOPIC_SUMMARY_PAGE_DESCRIPTION ===== -->
|
|
|
|
<!-- ======= LEVEL_1_TOPIC_SUMMARY_PAGE_SEQUENCES ========== -->
|
|
|
|
<A NAME="SummaryPageSequences"></A>
|
|
<A NAME="SummaryPageSequencesWithThisArchitecture"></A>
|
|
<A NAME="SequencesWithThisArchitecture"></A>
|
|
<TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>Sequences with this architecture</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR>
|
|
|
|
<!-- ====== MINI_TOC_FOR_SEQUENCES_WITH_THIS_ARCHITECTURE ===== -->
|
|
|
|
<BLOCKQUOTE>
|
|
<A HREF="#SummaryPageSequencesIntro">Introductory note</A><BR>
|
|
|
|
<A HREF="#SummaryPageSequencesFolderTabs">Folder tabs</A>:
|
|
<A HREF="#SummaryPageSequencesAll">All</A> |
|
|
<A HREF="#SummaryPageSequencesProteinWithPubMed">Protein with PubMed Reference</A> |
|
|
<A HREF="#SummaryPageSequences3DStructure">3D Structure</A> |
|
|
<A HREF="#SummaryPageSequencesGene">Gene</A> |
|
|
<A HREF="#SummaryPageSequencesRefSeq">RefSeq</A> |
|
|
<A HREF="#SummaryPageSequencesSwissProt">Swiss-Prot</A><BR>
|
|
|
|
<A HREF="#SummaryPageSequencesFilters">Filters</A>:
|
|
<A HREF="#SummaryPageSequencesFiltersTags">Tags</A> |
|
|
<A HREF="#SummaryPageSequencesFiltersSource">Source</A> |
|
|
<A HREF="#SummaryPageSequencesFiltersOrganism">Organism</A> |
|
|
<A HREF="#SummaryPageSequencesFiltersDescription">Description</A> |
|
|
<A HREF="#SummaryPageSequencesFiltersGeneSymbol">Gene Symbol</A><BR>
|
|
|
|
<A HREF="#SummaryPageSequencesEmptySet">Note: Empty Set</A><BR>
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ====== END_MINI_TOC_FOR_SEQUENCES_WITH_THIS_ARCHITECTURE ===== -->
|
|
|
|
<!-- ====== SEQUENCES_WITH_THIS_ARCHITECTURE_INTRODUCTORY_NOTE ===== -->
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="SummaryPageSequencesIntro"></A><B>Introductory note:</B><BR><BR>
|
|
|
|
The "<B>Sequences with this architecture</B>" table lists the sequences from the NCBI <A HREF="/protein">Protein database</A> that have the conserved domain architecture currently being viewed.<BR><BR>
|
|
|
|
A <B>conserved domain architecture</B> is <B>defined</B> as the sequential order of conserved domains in a protein sequence. Additionally, each domain within the architecture can get any one of several <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types"><B>hit types</B></A> against a query protein sequence (e.g., <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hit</A>, <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_non_specific_hit">non-specific hit</A>, <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily">superfamily</A>, <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_multi_domain">multi-domain</A>), as determined by the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> service.<BR><BR>
|
|
|
|
In order to be listed in the "Sequences with this architecture" table, a protein must have the <B>exact order</B> of conserved domains shown in the graphic at the top of a conserved domain architecture's summary page. Additionally, each conserved domain shown in the graphic must be the <B>top-scoring hit</B> for the corresponding region of the protein sequence, and must be of the <B>same hit type</B> as shown in the architecture's graphic. For example, when you mouse over a conserved domain cartoon in the architecture's graphic, and you and see a <A HREF="/cdd/cdd_help.shtml#CDSource_accession_prefix">conserved domain accession number</A> that begins with the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl">"cl" prefix</A>, that indicates a <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily">superfamily hit</A>. A conserved domain accession number that begins with any other prefix indicates a <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hit</A>.<BR><BR>
|
|
|
|
Therefore, every protein listed in the "sequences with this architecture" table has the exact order of conserved domains, and the exact hit type to each domain, as shown in the graphic at the top of a conserved domain architecture's summary page.<BR><BR>
|
|
|
|
You can choose to view <B>all proteins</B> that have the architecture, or a <B>pre-defined subset</B>, using the folder tabs and filters described below:
|
|
|
|
</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<!-- ==== END_SEQUENCES_WITH_THIS_ARCHITECTURE_INTRODUCTORY_NOTE ==== -->
|
|
|
|
<!-- ===== SEQUENCES_WITH_THIS_ARCHITECTURE_FOLDER_TABS ====== -->
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="FolderTabs"></A><A NAME="SummaryPageSequencesFolderTabs"></A><A NAME="SequencesWithThisArchitectureFolderTabs"></A><B>Folder Tabs:</B><BR>
|
|
|
|
<!-- =========== MINI_TOC =========== -->
|
|
|
|
<BLOCKQUOTE>
|
|
<A HREF="#SummaryPageSequencesAll">All</A> |
|
|
<A HREF="#SummaryPageSequencesProteinWithPubMed">Protein with PubMed Reference</A> |
|
|
<A HREF="#SummaryPageSequences3DStructure">3D Structure</A> |
|
|
<A HREF="#SummaryPageSequencesGene">Gene</A> |
|
|
<A HREF="#SummaryPageSequencesRefSeq">RefSeq</A> |
|
|
<A HREF="#SummaryPageSequencesSwissProt">Swiss-Prot</A>
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- =========== END_MINI_TOC =========== -->
|
|
|
|
The folder tabs under "<A HREF="#SummaryPageSequences">sequences with this architecture</A>" provide quick access to some <B>commonly used data subsets</B>. <I>(A complete list of available data subsets is provided under "<A HREF="#Filters">Filters</A>.")</I>
|
|
</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="SummaryPageSequencesAll"></A><A NAME="FolderTabAll"></A>"<B>All</B>" folder tab - All proteins in the <A HREF="/protein">Protein database</A> that have the conserved domain architecture.</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageSequencesProteinWithPubMed"></A><A NAME="FolderTabProteinWithPubMed"></A>"<B>Protein with PubMed Reference</B>" folder tab - The subset of protein sequences that have this conserved domain architecture, and that include reference to a published article in <A HREF="/pubmed">PubMed</A>.</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageSequences3DStructure"></A><A NAME="FolderTab3DStructure"></A>"<B>3D Structure</B>" folder tab - The subset of protein sequences that have this conserved domain architecture, and that have an experimentally resolved <A HREF="/structure">3-dimensional structure</A>.</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageSequencesGene"></A><A NAME="FolderTabGene"></A>"<B>Gene</B>" folder tab - A subset of protein sequences that have this conserved domain architecture, and that have a link to a <A HREF="/gene">Gene</A> record. This folder tab shows only one representative protein for each gene to which the architecture is linked, in order to provide a non-redundant view of the genes associated with the architecture.
|
|
|
|
<BLOCKQUOTE>
|
|
<I>Note: The "Gene" folder tab lists the same subset of protein sequences that can be retrieved using the option for "Filters:Tags:Gene Representative." However, the displays are slightly different. The "Gene" folder tab provides a gene-centric view that displays the gene ID, gene symbol, and gene description associated with each protein. In contrast, the "<A HREF="#FiltersTagsGeneRepresentative">Filters:Tags:Gene Representative</A>" displays the protein ID and description of each sequence that is linked to a gene. Both views include the source organism, protein length (in amino acids), and an "Actions" column that provide access to the protein sequences in FASTA format and links to other tools/resources.</I>
|
|
</BLOCKQUOTE>
|
|
</LI>
|
|
|
|
<LI><A NAME="SummaryPageSequencesRefSeq"></A>"<B>RefSeq</B>" folder tab - The subset of protein sequences that have this conserved domain architecture, and that are from the <A HREF="/refseq/">RefSeq</A> database.</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageSequencesSwissProt"></A>"<B>Swiss-Prot</B>" folder tab - The subset of protein sequences that have this conserved domain architecture, and that are from the <!-- UniProt Knowlegebase --><!-- A HREF="https://www.ebi.ac.uk/swissprot/">Swiss-Prot</A --><A HREF="https://www.ebi.ac.uk/uniprot">UniProtKB/Swiss-Prot</A> database.</LI><BR>
|
|
|
|
</UL>
|
|
<BR>
|
|
|
|
<!-- ===== END_SEQUENCES_WITH_THIS_ARCHITECTURE_FOLDER_TABS ===== -->
|
|
|
|
<!-- ====== SEQUENCES_WITH_THIS_ARCHITECTURE_FILTERS ======== -->
|
|
|
|
<LI><A NAME="Filters"></A><A NAME="SummaryPageSequencesFilters"></A><A NAME="SequencesWithThisArchitectureFilters"></A><B>Filters:</B><BR>
|
|
|
|
<!-- =========== MINI_TOC =========== -->
|
|
|
|
<BLOCKQUOTE>
|
|
<A HREF="#SummaryPageSequencesFiltersTags">Tags</A>: <A HREF="#FiltersTagsAnnotated">Annotated</A> | <A HREF="#FiltersTagsBioAssay">BioAssay</A> | <A HREF="#FiltersTagsGene">Gene</A> | <A HREF="#FiltersTagsGeneRepresentative">Gene Representative</A> | <A HREF="#FiltersTagsNRRepresentative">NR Representative</A> | <A HREF="#FiltersTagsPubMed">PubMed</A> | <A HREF="#FiltersTagsReference">Reference</A><BR>
|
|
<A HREF="#SummaryPageSequencesFiltersSource">Source</A> |
|
|
<A HREF="#SummaryPageSequencesFiltersOrganism">Organism</A> |
|
|
<A HREF="#SummaryPageSequencesFiltersDescription">Description</A> |
|
|
<A HREF="#SummaryPageSequencesFiltersGeneSymbol">Gene Symbol</A>
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- =========== END_MINI_TOC =========== -->
|
|
|
|
The "Filters" under "<A HREF="#SummaryPageSequences">sequences with this architecture</A>" enable you to view a number of pre-defined data subsets. Click on the down arrow (<B>V</B>) beside "Filters" to see the complete list and to activate the check box(es) of the desired filter(s). <I>(Some of the commonly used filters are shown as <A HREF="FolderTabs">folder tabs</A> near the top of the "sequences with this architecture" section.)</I><BR><BR>
|
|
|
|
After a filter is selected, it will remain active unless/until you deactivate its checkbox or dismiss the filter(s) by clicking the red <FONT color="D70000"><B>X</B></FONT> in the "Filters" tab.<BR><BR>
|
|
|
|
The number and types of filters that appear in a SPARCLE record depend on the set of protein sequences with that architecture, and on the information/data links that are available for those proteins. An example architecture that has a wide variety of filters is <A HREF="/Structure/sparcle/archview.html?archid=11481348">type IIA DNA topoisomerase subunit B (architecture ID 11481348)</A>. Other architectures might have only a small number of filters.<BR>
|
|
The various filters you might see on a page are described below:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="SummaryPageSequencesFiltersTags"></A><A NAME="FiltersTags"></A><B>Tags</B> - This filter enables you to view the subset of proteins that have been tagged with various attributes.<!--, such as: having links a gene representative, or being the "<A HREF="#FiltersTagsNRRepresentative">NR representative." --> The available tags include: <A HREF="#FiltersTagsAnnotated">Annotated</A>, <A HREF="#FiltersTagsBioAssay">BioAssay</A>, <A HREF="#FiltersTagsGene">Gene</A>, <A HREF="#FiltersTagsGeneRepresentative">Gene Representative</A>, <A HREF="#FiltersTagsNRRepresentative">NR Representative</A>, <A HREF="#FiltersTagsPubMed">PubMed</A>, and <A HREF="#FiltersTagsReference">Reference</A>.</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="FiltersTagsAnnotated"></A><A NAME="FiltersTagAnnotated"></A>The "<B>Annotated</B>" tag retrieves the subset of protein sequences that have this conserved domain architecture, and that either link to <A HREF="/pubmed/">PubMed</A>, <A HREF="/pcassay/">BioAssay</A>, <A HREF="/structure/">Structure</A>, or <A HREF="/omim/">OMIM</A>, or are considered to be "landmark" sequences by <A HREF="https://blast.ncbi.nlm.nih.gov/smartblast/">Smart BLAST</A>.</LI><BR>
|
|
|
|
<UL>
|
|
<LI>The <A HREF="https://blast.ncbi.nlm.nih.gov/smartblast/smartBlast.cgi?CMD=Web&PAGE_TYPE=BlastDocs">SmartBLAST help document</A> includes a section on the "Landmark Database," which describes how the landmark sequences are seleted. Excerpt:<BR><BR>
|
|
"The landmark database includes proteomes from 27 genomes spanning a wide taxonomic range. This search set is produced using the best available genomic assemblies for each organism with the following procedure. First, the most recent representative assembly from each organism is identified. Second, all proteins annotated on each assembly are downloaded and compiled into the landmark BLAST database. The result is a taxonomically diverse non-redundant set of proteins supported by genomic assemblies."</LI>
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="FiltersTagsBioAssay"></A><A NAME="FiltersTagBioAssay"></A>The "<B>BioAssay</B>" tag retrieves the subset of protein sequences that have this conserved domain architecture, and that are the targets of <A HREF="/pcassay/">BioAssay</A> experiments.</LI><BR>
|
|
|
|
<UL>
|
|
<LI>These are identified by an automated process that looks at the proteins that have the architecture in question, and finds the subset of proteins whose sequence identifiers are listed as the targets of BioAssay experiments.</LI>
|
|
<LI>The section of this document on <A HREF="#DataProcessing">data processing</A>: <A HREF="#DataProcessingLinks">links from architectures to other data types</A> describes how links are identified between conserved domain architectures in the SPARCLE database and other data types.</LI>
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="FiltersTagsGene"></A><A NAME="FiltersTagGene"></A>The "<B>Gene</B>" tag retrieves the subset of protein sequences that have this conserved domain architecture, and that are linked to a record in the Gene database.<!-- {A protein is linked only if it is explicitly listed on the Gene record, either in the "NCBI Reference Sequences" or the "Related Sequences" section.} --></LI><BR>
|
|
|
|
<UL>
|
|
<LI>This option lists all of the protein sequences that have the architecture in question, and that have links to <A HREF="/gene/">Gene</A> records.</LI>
|
|
<LI>If several protein sequence records have links to the same gene record, all of those protein sequence records will be listed in this view.</LI>
|
|
<LI>For <B>example</B>, if 10 protein sequence records link to 2 genes, the "gene" tag will display all 10 protein sequences.<!-- I>(In contrast, the <A HREF="#FiltersTagGeneRepresentative">"gene representative" tag</A>, described below, displays one representative protein for each gene.)</I --></LI>
|
|
<LI>The section of this document on <A HREF="#DataProcessing">data processing</A>: <A HREF="#DataProcessingLinks">links from architectures to other data types</A> describes how links are identified between conserved domain architectures in the SPARCLE database and other data types.</LI>
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="FiltersTagsGeneRepresentative"></A><A NAME="FiltersTagGeneRepresentative"></A>The "<B>Gene representative</B>" tag shows only one representative protein sequence for each gene that is linked to the architecture, in order to provide a non-redundant view of the genes associated with that architecture.</LI><BR>
|
|
|
|
<UL>
|
|
<LI>For <B>example</B>, if 10 protein sequence records link to 2 genes, the "gene representative" tag will display only 2 proteins -- one representative protein sequence for each gene.<!-- I>(In contrast, the <A HREF="#FiltersTagGene">gene" tag</A>", described above, will display all 10 proteins.)</I --></LI>
|
|
<LI><I>Note: The <A HREF="#FolderTabGene">"Gene" folder tab</A> lists the same subset of protein sequences that is retrieved by the "Gene Representative" tag. However, the displays are slightly different. The "Gene" folder tab provides a gene-centric view that displays the gene ID, gene symbol, and gene description associated with each protein. In contrast, the "Gene Representative" filter tag displays the protein ID and description of each sequence that is linked to a gene. Both views include the source organism, protein length (in amino acids), and an "Actions" column that provide access to the protein sequences in FASTA format and links to other tools/resources.</I></LI>
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="FiltersTagsNRRepresentative"></A><A NAME="NR"></A><A NAME="nr"><A NAME="NRrepresentative"></A></A>An "<B>NR representative</B>" is a protein sequence that has been selected as the representative of a group of identical sequences, for the purpose of creating a <B>protein non-redundant (NR) database</B>. The "NR Representative" tag therefore retrieves a non-redundant list of protein sequences that have this conserved domain architecture. Technical details:</LI><BR>
|
|
|
|
<UL>
|
|
<LI><A NAME="PIG"></A><A NAME="PIGs"></A><A NAME="ProteinIdentityGroup"></A><A NAME="ProteinIdentityGroups"></A>To create a non-redundant protein database, the NCBI data processing pipeline organizes protein sequences into <SPAN style="color:#D70000"><B>protein identity groups (PIGs)</B></SPAN>. A protein identity group contains protein sequences that are identical in length and composition, regardless of taxonomic source (i.e., regardless of <A HREF="/books/NBK21100/#A268">TaxID</A>). Each group is given a stable identification number (<B>PIG ID</B>).</LI>
|
|
<LI><A NAME="PIGRepresentative"></A><A NAME="ProteinIdentityGroupRepresentative"></A><B>One protein sequence from each PIG is selected as the representative</B>. If the PIG includes a <A HREF="/RefSeq/">RefSeq</A> record, that is selected as the representative. If no RefSeq record is present, then a representative is selected from one the following databases: <A HREF="http://ca.expasy.org/sprot/">Swiss-Prot</A><!-- A HREF="http://www.uniprot.org/">UniProt</A -->, <A HREF="http://pir.georgetown.edu/">PIR</A>, <A HREF="http://www.rcsb.org/pdb/home/home.do">PDB</A>, <A HREF="//ftp.ncbi.nih.gov/ncbi-asn1/protein_fasta/README.asn1.protein_fasta">GenPept</A> (protein translations of nucleotide sequence records in <A HREF="/Genbank/">GenBank</A> that have been annotated with a coding sequence, or <A HREF="/Sitemap/samplerecord.html#CDSB">CDS</A>, feature), or <A HREF="http://www.prf.or.jp/index-e.html">PRF</A>, respectively.<!-- Several sections of the biosystems_help.html file also discuss PIGs; the text in this bullet point was taken from /Structure/biosystems/docs/biosystems_help.html#BSSummaryProteins --></LI>
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="FiltersTagsPubMed"></A>The "<B>PubMed</B>" tag retrieves the subset of protein sequences that have this conserved domain architecture, and that have a link to published literature represented in the <A HREF="/pubmed/">PubMed database</A>.</LI><BR>
|
|
|
|
<!-- UL>
|
|
<LI>________________________</LI>
|
|
</UL><BR -->
|
|
|
|
<LI><A NAME="FiltersTagsReference"></A><A NAME="LandmarkSequences"></A><A NAME="Landmark"></A>The "<B>Reference</B>" tag retrieves the subset of protein sequences that have this conserved domain architecture, and that are considered to be <B>"landmark" sequences</B> by <A HREF="https://blast.ncbi.nlm.nih.gov/smartblast/"><B>Smart BLAST</B></A>. <!-- Lewis, 7/10/2017: Reference is a set of protein sequences identified by the NCBI BLAST group as being "landmark" sequences. BLAST group can explain how they identify "landmark" sequences -- what are the criteria a sequence must meet in order to be considered a "landmark"] --></LI><BR>
|
|
|
|
<UL>
|
|
<LI>The <A HREF="https://blast.ncbi.nlm.nih.gov/smartblast/smartBlast.cgi?CMD=Web&PAGE_TYPE=BlastDocs">SmartBLAST help document</A> includes a section on the "Landmark Database," which describes how the landmark sequences are seleted. Excerpt:<BR><BR>
|
|
"The landmark database includes proteomes from 27 genomes spanning a wide taxonomic range. This search set is produced using the best available genomic assemblies for each organism with the following procedure. First, the most recent representative assembly from each organism is identified. Second, all proteins annotated on each assembly are downloaded and compiled into the landmark BLAST database. The result is a taxonomically diverse non-redundant set of proteins supported by genomic assemblies."
|
|
</LI>
|
|
</UL><BR>
|
|
|
|
</UL>
|
|
|
|
<LI><A NAME="SummaryPageSequencesFiltersSource"></A><B>Source</B> - The source database from which a protein sequence record came</LI><BR>
|
|
|
|
<UL>
|
|
<LI>The NCBI <A HREF="/protein">Protein database</A> pulls together sequences from a variety of source databases, such as the protein translations of nucleotide sequence records in the <A HREF="/genbank/">GenBank</A>, <A HREF="http://www.ebi.ac.uk/">European Molecular Biology Laboratory (EMBL)</A>, <A HREF="http://www.ddbj.nig.ac.jp/">DNA Data Bank of Japan (DDBJ)</A>, and NCBI <A HREF="/refseq/">Reference Sequence (RefSeq)</A> databases, as well as the <A HREF="https://www.ebi.ac.uk/uniprot">Swiss-Prot</A>, <A HREF="http://pir.georgetown.edu/">Protein Information Resource (PIR)</A>, <A HREF="https://www.prf.or.jp/index-e.html">Protein Research Foundation (PRF) </A>, and the NCBI <A HREF="/genbank/tpa/">Third Party Annotation (TPA)</A> databases.</LI>
|
|
<LI>You can use the <B>"Filters:Source" check boxes</B> on a conserved domain architecture's summary page to retrieve the subset of proteins that are from the source database(s) of interest.</LI>
|
|
<LI><I>Note: The <B>folder tabs</B> that appear under the blue header for "Sequences with this architecture" provide an <B>alternative way</B> to retrieve proteins from some of the commonly used source databases, such as RefSeq and Swiss-Prot.</I></LI>
|
|
</UL><BR>
|
|
|
|
|
|
<LI><A NAME="SummaryPageSequencesFiltersOrganism"></A><B>Organism</B> - Enter the scientific name of any organism that appears in the list of "<A HREF="#SummaryPageSequences">sequences with this architecture</A>" <!-- or any taxonomic node above the organism name, -->to display only the protein sequences that have this conserved domain architecture, and that come from the organism<!-- or taxonomic node -->you have specified.</LI><BR>
|
|
|
|
<UL>
|
|
<LI><B>Note:</B> You can enter a taxonomic node other than Genus species. However, the system will currently retrieve only the proteins that have been classified down to the specified level of the taxonomic tree, but no deeper.</LI>
|
|
<LI>For <B>example</B>, open the "Sequences with this architecture" table for <A HREF="/Structure/sparcle/archview.html?archid=11481348">architecture ID 11481348</A>: type IIA DNA topoisomerase subunit B.</LI>
|
|
<LI>Enter "<I>Pseudobutyrivibrio ruminis</I>" (without the quotes) in the "Filters:Organism" text box. The system will display only the proteins that have been classified with that exact genus and species.</LI>
|
|
<LI>Now clear/dismiss the filter you just entered (or simply reload the SPARCLE record for <A HREF="/Structure/sparcle/archview.html?archid=11481348">architecture ID 11481348</A>) in order to once again display all of the proteins, before doing the next step below.</LI>
|
|
<LI>Enter the taxonomic node "<I>Clostridiales</I>" (without quotes) in the "Organism" filter. The system will display only the proteins that have been classified down to that node of the taxonomic tree, and no deeper.</LI>
|
|
<LI><I>Note: The Organism filter will be enhanced in the future to allow retrieval by any node in an organism's lineage.</I></LI>
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="SummaryPageSequencesFiltersDescription"></A><B>Description</B> - This filter retrieves the subset of proteins that have this conserved domain architecture, and that have a description (definition line) containing the keyword(s) that you type in the textbox.</LI><BR>
|
|
<UL>
|
|
<LI>For example, open the "Sequences with this architecture" table for <A HREF="/Structure/sparcle/archview.html?archid=12201410">architecture ID 12201410</A>: hybrid sensor histidine kinase/response regulator.</LI>
|
|
<LI>The "Sequences with this architecture" table includes some proteins with the description of "<I>Signal transduction histidine kinase</I>."</LI>
|
|
<LI>To view only those proteins, you can enter a <B>single keyword</B> such as "<I>signal</I>" or a <B>phrase</B> such as "<I>signal transduction</I>" (with or without quotes) in the text box beside the "Description" filter.</LI>
|
|
<LI><B>Note:</B> if you enter two or more terms, they must be <B>adjacent</B> to each other in the description of a protein in order for the protein to be retrieved. That is, if you enter two or more words, the system will search for them as a <B>phrase</B>, whether or not you surround them with quotes.</LI>
|
|
<LI>For <B>example</B>, the protein sequences with the description "<I>Signal transduction histidine kinase</I>" will <I>not</I> be retrieved if you enter the words "<I>signal histidine</I>" (with or without quotes) in the "description" filter.</LI>
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="SummaryPageSequencesFiltersGeneSymbol"></A><B>Gene Symbol</B> - This filter retrieves the subset of proteins that have this conserved domain architecture, and that are linked to genes that have the symbol you specified in the textbox.</LI><BR>
|
|
|
|
<UL>
|
|
<LI>If a <A HREF="/gene/">Gene</A> record lists an official symbol as well as aliases (alternative gene symbols), and that gene is associated with the architecture, you can type any one of those symbols into the textbox to retrieve the subset of protein sequences linked to the gene.</LI>
|
|
</UL><BR>
|
|
|
|
<!-- LI><A NAME="_____"></A><B>__________</B> - __________</LI><BR>
|
|
|
|
<UL>
|
|
<LI>________________________</LI>
|
|
</UL><BR -->
|
|
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
<!-- ======= END_SEQUENCES_WITH_THIS_ARCHITECTURE_FILTERS ====== -->
|
|
|
|
<!-- ======= SEQUENCES_WITH_THIS_ARCHITECTURE_EMPTY_SET ====== -->
|
|
|
|
<P CLASS="NormalText indent20">
|
|
|
|
<A NAME="SummaryPageSequencesEmptySet"></A><A NAME="SequencesWithThisArchitectureEmptySet"></A><A NAME="EmptySet"></A><I>Note: <B>Empty set</B> (no links to protein sequences): Occasionally, the "<A HREF="#SummaryPageSequences">sequences with this architecture</A>" table <!-- on a conserved domain architecture's summary page -->might display a message that says, "This architecture currently does not link to any protein sequence records." This might be true for either of the following reasons:</P>
|
|
|
|
<BLOCKQUOTE>
|
|
<UL>
|
|
|
|
<LI>The original sequence(s) in which the architecture was found are no longer in the public database (e.g., they might have been found to be erroneous and were therefore withdrawn).<BR>
|
|
-- or -- </LI>
|
|
|
|
<LI>The scoring used by the CDD/CD-Search systems might have been refined, and the sequences that were originally linked to this architecture are now linked to a different architecture that achieves a higher score. (An example of this is provided in the section of this document about <A HREF="#OngoingResearch">ongoing research</A>.)</LI>
|
|
|
|
</UL>
|
|
</BLOCKQUOTE>
|
|
|
|
<P CLASS="NormalText indent20">
|
|
In either case, however, the SPARCLE record for the domain architecture is retained in the database, and its architecture ID is also retained (and not re-used for any other architecture), because it is possible that another sequence in the future will map to the architecture.</I><BR><BR>
|
|
|
|
</P>
|
|
|
|
<!-- ======= END_SEQUENCES_WITH_THIS_ARCHITECTURE_EMPTY_SET ====== -->
|
|
|
|
</TD>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
</TR>
|
|
|
|
</TABLE>
|
|
<BR>
|
|
|
|
<!-- === END_LEVEL_1_TOPIC_SUMMARY_PAGE_SEQUENCES ===== -->
|
|
|
|
<!-- ======= LEVEL_1_TOPIC_CURATED_NAMES_AND_LABELS ========== -->
|
|
|
|
<A NAME="SummaryPageCuratedNamesAndLabels"></A>
|
|
<A NAME="CuratedNamesAndLabels"></A>
|
|
<TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>Curated Names and Labels</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR><BR>
|
|
|
|
<!-- =========== MINI_TOC =========== -->
|
|
|
|
<!-- BLOCKQUOTE>
|
|
<A HREF="#SummaryPageTaxonomicScope">Taxonomic Scope</A> |
|
|
<A HREF="#SummaryPageCuratedName">Name</A> |
|
|
<A HREF="#SummaryPageCuratedLabel">Label</A> |
|
|
<A HREF="#SummaryPageEvidence">Supporting evidence</A>: <A HREF="#SummaryPageEvidenceProteins">Protein sequences</A>, <A HREF="#SummaryPageEvidenceConservedDomains">Conserved domains</A>, <A HREF="#SummaryPageEvidencePublications">Publications</A>, <A HREF="#SummaryPageEvidenceOther">Other</A>
|
|
</BLOCKQUOTE>
|
|
<BR -->
|
|
|
|
<!-- =========== END_MINI_TOC =========== -->
|
|
|
|
<P CLASS="NormalText indent20">
|
|
The <B>Curated Names and Labels</B> section of a conserved domain architecture's summary page lists the architecture's:<BR>
|
|
<A HREF="#SummaryPageTaxonomicScope">Taxonomic Scope</A> |
|
|
<A HREF="#SummaryPageCuratedName">Name</A> |
|
|
<A HREF="#SummaryPageCuratedLabel">Label</A> |
|
|
<A HREF="#SummaryPageEvidence">Supporting evidence</A>: <A HREF="#SummaryPageEvidenceProteins">Protein sequences</A>, <A HREF="#SummaryPageEvidenceConservedDomains">Conserved domains</A>, <A HREF="#SummaryPageEvidencePublications">Publications</A>, <A HREF="#SummaryPageEvidenceOther">Other</A>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="SummaryPageTaxonomicScope"></A><A NAME="TaxonomicScope"></A><B>Taxonomic Scope</B></LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>The <B>taxonomic scope</B> column indicates the taxonomic node to which the architecture name and label apply.</LI><BR>
|
|
|
|
<LI>By <B>default</B>, conserved domain architectures are associated with the <B>root of the taxonomic tree</B> (i.e., all organisms). When an architecture is associated with the root, it means the name/label of the architecture is not specific to any node of the full taxonomic tree. This is true of most architectures in the SPARCLE database.</LI><BR>
|
|
|
|
<UL>
|
|
<LI>For example, search the SPARCLE database to:<BR>
|
|
<A HREF="/sparcle?term=all%5BFilter%5D%20NOT%20%28%22archaea%22%5BOrganism%5D%20OR%20%22bacteria%22%5BOrganism%5D%20OR%20%22eukaryota%22%5BOrganism%5D%20OR%20%22fungi%22%5BOrganism%5D%20OR%20%22metazoa%22%5BOrganism%5D%20OR%20%22viridiplantae%22%5BOrganism%5D%20OR%20%22viruses%22%5BOrganism%5D%29">retrieve the architectures that have a taxonomic scope of <B>all organisms</B></A></LI><BR>
|
|
</UL>
|
|
|
|
<LI>If the taxonomic classification of an architecture is not root, but is instead a <B>more specific taxonomic node</B>, that means the curator is asserting that the name/label chosen for the architecture is applicable within the specified node, but not necessarily within other taxonomic branches.</LI><BR>
|
|
|
|
<UL>
|
|
<LI>For example, a search of the <A HREF="/sparcle/">SPARCLE</A> database for:<BR><BR>
|
|
|
|
<a HREF="/sparcle?term=guanylate%20cyclase%20AND%20bacteria%5BOrganism%5D"><b>guanylate cyclase AND bacteria[Organism]</b></a><BR><BR>
|
|
will retrieve the architectures that contain the terms "guanylate" and "cyclase" in any field of the SPARCLE architecture record, and whose names and labels are applicable within bacteria but not within other taxonomic nodes.<BR><BR>
|
|
|
|
<a HREF="/sparcle?term=guanylate%20cyclase%20AND%20eukaryota%5BOrganism%5D"><b>guanylate cyclase AND eukaryota[Organism]</b></a><BR><BR>
|
|
will retrieve the architectures that contain the terms "guanylate" and "cyclase" in any field of the SPARCLE architecture record, and whose names and labels are applicable within eukaryota but not within other taxonomic nodes.
|
|
</LI><BR>
|
|
</UL>
|
|
|
|
<LI>The section of this document about <A HREF="#SearchFields">Search fields</A>: <A HREF="#SearchFieldOrganism">[Organism]</A> provides additional information about the taxonomic classification of conserved domain architectures and search tips on how to restrict your search to a specific taxonomic node, if desired.</LI><BR>
|
|
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="SummaryPageCuratedName"></A><A NAME="CuratedNamesAndLabelsName"></A><B>Name</B></LI><BR>
|
|
|
|
<UL>
|
|
<LI>The name of the conserved domain architecture.<BR>
|
|
The name is displayed in two places on a SPARCLE record: near the top of the record (in bold font), and in the "Curated names and labels" section of the record.</LI><BR>
|
|
<LI>As an example, the name of the conserved domain architecture shown in the <A HREF="#SummaryPage">illustration of a sample SPARCLE record</A> is "DNA gyrase subunit B." <I>(You can also see this name in the live SPARCLE record for <A HREF="/Structure/sparcle/archview.html?archid=10647733">architecture ID 10647733</A>.)</I>
|
|
</LI><BR>
|
|
<LI>The name of a conserved domain architecture is either assigned manually by <A HREF="#DataProcessingCurated">curation</A>, or computationally by the <A HREF="#DataProcessingAutonamed">autoname</A> algorithm or the <A HREF="#DataProcessingNamedByDomain">namedByDomain</A> algorithm.</LI><BR>
|
|
</UL><BR>
|
|
|
|
<LI><A NAME="SummaryPageCuratedLabel"></A><A NAME="CuratedNamesAndLabelsLabel"></A><B>Label</B></LI><BR>
|
|
|
|
<UL>
|
|
<LI>The label provides a description of the conserved domain architecture's biological function.<BR>
|
|
The label is displayed in two places on a SPARCLE record: near the top of the record (beneath the bold font that shows the architecture's name), and in the "Curated names and labels" section of the record.</LI><BR>
|
|
<LI>As an example, the label of the conserved domain architecture shown in the <A HREF="#SummaryPage">illustration of a sample SPARCLE record</A> is "DNA gyrase is a type 2 topoisomerase that relaxes supercoils but can also introduce negative supercoils into DNA in an ATP-dependent manner." <I>(You can also see this label in the live SPARCLE record for <A HREF="/Structure/sparcle/archview.html?archid=10647733">architecture ID 10647733</A>.)</I>
|
|
</LI><BR>
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
</P>
|
|
|
|
<!-- ======= SUMMARY_PAGE_EVIDENCE ========== -->
|
|
|
|
<P CLASS="NormalText indent20">
|
|
|
|
<A NAME="SummaryPageEvidence"></A>
|
|
<A NAME="SummaryPageSupportingEvidence"></A>
|
|
<A NAME="SupportingEvidence"></A>
|
|
<A NAME="Evidence"></A>
|
|
|
|
<UL>
|
|
<LI><B>Supporting Evidence:</B><BR><BR>
|
|
The "Curated Names and Labels: Supporting Evidence" section of a conserved domain architecture's summary page lists the evidence that was used by NCBI <A HREF="#DataProcessingCurated">curators</A>, or by the "<A HREF="#DataProcessingAutonamed">autonamed</A>" or "<A HREF="#DataProcessingNamedByDomain">namedbydomain</A>" algorithms, to assign a name to the architecture. Some types of supporting evidence include:
|
|
</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="SummaryPageEvidenceProteins"></A><A NAME="SummaryPageEvidenceProteinSequences"></A><A NAME="SupportingEvidenceProteins"></A><A NAME="SupportingEvidenceProteinSequences"></A><A NAME="EvidenceProteins"></A><A NAME="EvidenceProteinSequences"></A><B>Protein sequences</B><BR><BR>
|
|
|
|
As described in the <A HREF="#DataProcessing">data processing</A> section of this document, the names of high quality protein sequences are used by NCBI <A HREF="#DataProcessingCurated">curators</A> and by the "<A HREF="#DataProcessingAutonamed">autonamed</A>" algorithm in assigning a name to the conserved domain architecture (if those proteins are representative of the overall group of sequences that have the architecture in question). The Supporting Evidence: Protein Sequences section of a conserved domain architecture's summary page lists the protein sequence records that were used to name the architecture.</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageEvidenceConservedDomains"></A><A NAME="SupportingEvidenceConservedDomains"></A><A NAME="EvidenceConservedDomains"></A><B>Conserved domains</B><BR><BR>
|
|
|
|
As described in the <A HREF="#DataProcessing">data processing</A> section of this document, the names of conserved domain models are used by NCBI <A HREF="#DataProcessingCurated">curators</A> and by the "<A HREF="#DataProcessingNamedByDomain">namedbydomain</A>" algorithm in assigning a name to the conserved domain architecture.<BR><BR>
|
|
|
|
The "Supporting Evidence: Conserved Domains" section of a SPARCLE record might list one or more of the domains that are present in the architecture (i.e., one or more of the domains that are listed in the "<A HREF="#SummaryPageConservedDomains">Conserved domains in this architecture</A>" section of the SPARCLE record). It might also list domain models that are not direct components of the architecture, but that belong to the same <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamily clusters</A> as the components and are useful in helping to name the architecture.<BR><BR>
|
|
|
|
As an example, see the conserved domain architecture for the <A HREF="/Structure/sparcle/archview.html?archid=11530124">PAS and AAA domain-containing protein (architecture ID 11530124)</A>, which was <A HREF="#DataProcessingNamedByDomain">namedByDomain</A>. The "<A HREF="#SummaryPageConservedDomains">Conserved domains in this architecture</A>" section of the SPARCLE record lists the top-scoring domain models (as determined by <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A>) on the proteins that have the architecture:<BR>
|
|
|
|
<BLOCKQUOTE>
|
|
pfam00126: HTH_1 - Bacterial regulatory helix-turn-helix protein, lysR family<BR>
|
|
pfam00158: Sigma54_- activat Sigma-54 interaction domain<BR>
|
|
smart00091: PAS - PAS domain<BR>
|
|
smart00116: CBS - Domain in cystathionine beta-synthase and other proteins<BR></BLOCKQUOTE>
|
|
|
|
The "Supporting Evidence: Conserved Domains" section of that SPARCLE record lists one of the domain models above, as well as three other domain models (with <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">conserved domain accession numbers</A> that begin with a "cd" prefix):<BR>
|
|
|
|
<BLOCKQUOTE>
|
|
cd02205: CBS_pair<BR>
|
|
cd00130: PAS<BR>
|
|
cd00009: AAA<BR>
|
|
pfam00126: HTH_1<BR>
|
|
</BLOCKQUOTE>
|
|
|
|
The domain models with "cd" accessions are not direct components of the architecture (i.e., they are not the top-scoring hits), but they belong to the same <A HREF="../../cdd/cdd_help.shtml#Superfamily">clusters</A> as the component domains and are useful in helping to name the architecture because they are curated domains whose names were carefully selected based on published research about protein functions.</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageEvidencePublications"></A><A NAME="SupportingEvidencePublications"></A><B>Publications</B><BR><BR>
|
|
Published articles that describe the function of proteins that contain the conserved domains in the architecture, and that were used in naming the architecture.</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageEvidenceOther"></A><A NAME="SupportingEvidenceOther"></A><A NAME="EvidenceOther"></A><B>Other</B><BR><BR>
|
|
Other types of evidence, as available, might also influence the name and functional label that is assigned to a conserved domain architecture. An example of additional evidence could be the biological pathway (<A HREF="/biosystems/">biosystem</A>) of which the protein is a part.</LI><BR>
|
|
|
|
<!-- LI><A NAME="_____"></A><B>__________</B> - __________</LI><BR -->
|
|
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
</P>
|
|
|
|
</TD>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<!-- === END_SUMMARY_PAGE_EVIDENCE ===== -->
|
|
|
|
<!-- === END_LEVEL_1_TOPIC_CURATED_NAMES_AND_LABELS ===== -->
|
|
|
|
<!-- ======= LEVEL_1_TOPIC_SUMMARY_PAGE_CONSERVED_DOMAINS ====== -->
|
|
|
|
<A NAME="SummaryPageConservedDomains"></A>
|
|
<A NAME="ConservedDomainsInThisArchitecture"></A>
|
|
<TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>Conserved domains in this architecture</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR><BR>
|
|
|
|
<P CLASS="NormalText indent20">
|
|
|
|
<UL>
|
|
|
|
<LI>Each <A HREF="#ConservedDomainArchitecture">conserved domain architecture</A> reflects the set of conserved domain models that are top-scoring hits (as determined by the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> service) on the proteins that possess the architecture, the sequential order of those domains, and the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">type of hit</a> each domain has to the proteins. Each architecture is given a unique, stable <A HREF="#ArchitectureID">architecture ID</A>. <I>(As noted in the <A HREF="#Overview">Overview</A> section of this document, it is also possible for a domain architecture to consist of a single conserved domain footprint.)</I></LI><BR>
|
|
|
|
<LI>The "<B>Conserved domains in this architecture</B>" section of a <A HREF="#SummaryPage">SPARCLE record</A> provides a tabular list of the conserved domain models that compose the architecture.</LI><BR>
|
|
|
|
<!-- UL>
|
|
<LI><I>Note: the order in which the domains are listed in the table does not necessarily reflect their N-terminal to C-terminal order on the proteins that contain the architecture.</I></LI>
|
|
<LI><I>Rather, the graphic near the top of the architecture's summary page shows the N-terminal to C-terminal order of the domains.</I></LI>
|
|
</UL -->
|
|
|
|
<LI>The <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">type of accession number</A> in the architecture reflects the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">type of hit</A> it has on the proteins that possess the architecture.<BR>
|
|
Accession numbers that begin with the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl">"cl" prefix</A> indicate a <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily">superfamily hit</A> (the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl">"cl" prefix</A> stands for superfamily cluster).<BR>
|
|
All other <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">type of accession numbers</A> indicate <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</A>.</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageConservedDomainsNote1"></A>
|
|
<A NAME="ConservedDomainsInThisArchitectureNote1"></A>The order in which the domains are listed in the table does not necessarily reflect their N-terminal to C-terminal order on the proteins that contain the architecture. The graphic near the top of a conserved domain architecture's summary page, however, does show the N-terminal to C-terminal order of the domains (<A HREF="#SummaryPage">illustrated example</A>).</LI><BR>
|
|
|
|
<LI><A NAME="SummaryPageConservedDomainsNote2"></A>
|
|
<A NAME="ConservedDomainsInThisArchitectureNote2"></A><I>Note: one or more of the conserved domains that compose the architecture might also be listed as <A HREF="#Evidence">supporting evidence</A> that was used in assigning a the <A HREF="#Name">name</A> to the architecture. However, the supporting evidence might also (or might instead) list related conserved domains, as explained in the section of this document that describes that part of a <A HREF="#SummaryPage">SPARCLE record</A>.</I></LI><BR>
|
|
|
|
</UL>
|
|
|
|
</P>
|
|
|
|
</TD>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
</TR>
|
|
|
|
</TABLE>
|
|
|
|
<!-- === END_LEVEL_1_TOPIC_SUMMARY_PAGE_CONSERVED_DOMAINS ===== -->
|
|
|
|
<!-- ====== LEVEL_1_TOPIC_SUMMARY_PAGE_FUNCTIONAL_SITES ======== -->
|
|
|
|
<A NAME="SummaryPageFunctionalSites"></A>
|
|
<TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>Functional sites in this architecture</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR><BR>
|
|
|
|
<P CLASS="NormalText indent20">
|
|
|
|
<UL>
|
|
|
|
<LI>Functional sites are also referred to as <A HREF="../../cdd/cdd_help.shtml#ConservedFeatures"><B>conserved features/sites</B></A>, and typically describe sites such as <B>catalytic residues</B>, <B>binding sites</B>, or <B>motifs</B> commonly referred to in the literature</LI><BR>
|
|
|
|
<LI>The are generally identified in <A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">NCBI-curated domains</A>.</LI><BR>
|
|
|
|
<LI>Functional sites are listed on a <A HREF="#SummaryPage">SPARCLE record</A> <B>only if</B> the proteins possessing that conserved domain architecture have a <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit"><B>specific hit</B></A> to the NCBI-curated domain model in which the conserved features/sites have been annotated.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
</P>
|
|
|
|
</TD>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
</TR>
|
|
|
|
</TABLE>
|
|
<BR>
|
|
|
|
<!-- === END_LEVEL_1_TOPIC_SUMMARY_PAGE_FUNCTIONAL_SITES ===== -->
|
|
|
|
<!-- ======= LEVEL_1_TOPIC_SUMMARY_PAGE_XXXXXXXX ========== -->
|
|
|
|
<A NAME="SummaryPage__________"></A>
|
|
<!-- TABLE WIDTH="100%" BORDER="0" CELLSPACING="0" CELLPADDING="0" BGCOLOR="#FFFFFF">
|
|
|
|
<TR>
|
|
|
|
<TD WIDTH="20" CLASS="NormalText" ALIGN="LEFT" VALIGN="TOP"> </TD>
|
|
|
|
<TD ALIGN="LEFT" VALIGN="TOP">
|
|
|
|
<SPAN CLASS="HeaderText3"><B>_________________</B></SPAN>
|
|
<img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
<BR><BR>
|
|
|
|
<P CLASS="NormalText indent20">
|
|
|
|
<UL>
|
|
<LI>__________</LI>
|
|
<LI>__________</LI>
|
|
<LI>__________</LI>
|
|
<UL>
|
|
<LI>__________</LI>
|
|
<LI>__________</LI>
|
|
<LI>__________</LI>
|
|
</UL>
|
|
</UL>
|
|
|
|
</P>
|
|
|
|
</TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
<BR -->
|
|
|
|
<!-- === END_LEVEL_1_TOPIC_SUMMARY_PAGE_XXXXXXXX ===== -->
|
|
|
|
<!-- ====== PAGE_MARGIN_TO_RIGHT_OF_BLUE_EDGE_BOX_WITH_SECTION_TEMPLATE_CONTENTS ====== -->
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############# END_BLUE_EDGE_BOX_WITH_SECTION_4_CONTENTS ############ -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell NormalText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
<!-- ########### BEGIN_BLUE_HEADER_SECTION_5 ############# -->
|
|
|
|
<A NAME="DataProcessing"></A>
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="SteelBlueCell"><SPAN class="HeaderText1">Data Processing</SPAN></TD>
|
|
<TD class="SteelBlueCell" WIDTH="15" ALIGN="left" VALIGN="center"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############## END_BLUE_HEADER_SECTION_5 ############ -->
|
|
|
|
<!-- ########## BEGIN_BLUE_EDGE_BOX_WITH_SECTION_5_CONTENTS ########### -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="WhiteCellBlueEdgeAll NormalText">
|
|
|
|
<!-- ============ MINI_TOC_FOR_THIS_SECTION ============== -->
|
|
<BR>
|
|
<BLOCKQUOTE><BLOCKQUOTE>
|
|
|
|
<A HREF="#DataProcessingOverview">data processing <B>overview</B></A> |
|
|
<A HREF="#DataProcessingReviewLevel"><B>three tiers</B> of data:</A> <A HREF="#DataProcessingCurated"><B>curated</B> architectures</A>, <A HREF="#DataProcessingAutonamed"><B>autonamed</B> architectures</A>, <A HREF="#DataProcessingNamedByDomain"><B>named by domain</B> architectures</A> | <!-- A HREF="#DataProcessingComputed"><B>computed</B> name architectures</A -->
|
|
<A HREF="#DataProcessingTypesOfArchitectures"><B>two types</B> of architectures:</A> <A HREF="#DataProcessingTypesOfArchitecturesSuperfamily"><B>superfamily</B> architectures</A>, <A HREF="#DataProcessingTypesOfArchitecturesSubfamily"><B>subfamily</B> architectures</A> | <A HREF="#DataProcessingArchitecturesWithSingleConservedDomainFootprint"><B>single domain</B> architectures</A> | <A HREF="#DataProcessingArchitectureID">each architecture receives a unique and stable <B>architecture ID</B></A> |
|
|
<A HREF="#DataProcessingOngoingResearch"><B>ongoing research</B></A> | <A HREF="#DataProcessingLinks"><B>links</B> from architectures to other data types</A>
|
|
|
|
</BLOCKQUOTE></BLOCKQUOTE>
|
|
|
|
<!-- ========== END_MINI_TOC_FOR_THIS_SECTION ============ -->
|
|
|
|
<!-- =========== LEVEL_1_TOPIC_DATA_PROCESSING_OVERVIEW ============= -->
|
|
|
|
<A NAME="DataProcessingOverview"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Data processing overview</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<BLOCKQUOTE>
|
|
As the number of publicly available protein sequences continues to grow exponentially, efforts are underway to organize that data in a biologically meaningful way. This includes identifying relationships among proteins with similar composition and function.<BR><BR>
|
|
|
|
It is possible to cluster proteins by sequence similarity; however, it is computationally costly to compare all proteins against each other. So an all versus all comparison is not an efficient strategy.<BR><BR>
|
|
|
|
Conserved domain annotations on proteins, on the other hand, provide an simple alternative strategy for clustering proteins. NCBI already computes conserved domain annotation on protein sequences as part of the standard data processing pipeline, using the <A HREF="/cdd">Conserved Domain Database (CDD)</A> and <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search tool</A>.<BR><BR>
|
|
|
|
Building on that effort, the <A HREF="/Structure/lexington/lexington.cgi">Conserved Domain Architecture Retrieval Tool (CDART)</A> identifies all proteins that have the same domain annotation (the same order of <A HREF="../../cdd/cdd_help.shtml#CDWhat">conserved domains</A>, and the same <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">type of hit</A> for each domain, i.e., <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific</A> or <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_non_specific_hit">non-specific</A>) and clusters them together in a group.<BR><BR>
|
|
|
|
As noted in the "<A HREF="#Compare">Compare CDD, CDART, and SPARCLE</A>" section of this document, SPARCLE is built upon CDART. Specifically, SPARCLE contains the subset of <B>subset of domain architectures</B> that include at least one conserved domain model that is a <A HREF="/Structure/cdd/cdd_help.shtml#RPSB_hit_type_specific_hit"><B>specific hit</B></A> to at least one protein sequence in the non-redundant ("nr") protein database.<BR><BR>
|
|
|
|
SPARCLE then <B>assigns a name and functional label</B> (a description of the function of protein family that has the architecture) <B>to each conserved domain architecture</B>. As noted below, names are assigned to the architectures either by a manual <A HREF="#DataProcessingCurated">curation process</A>, or by automated processes that use algorithms to <A HREF="#DataProcessingAutonamed">autoname</A> an architecture, or to <A HREF="#DataProcessingNamedByDomain">name an architecture based on the domains it contains</A>. Curated domain architecture records are supported with, and linked to, evidence from high quality sequence data and literature.<BR><BR>
|
|
|
|
In this way, SPARCLE is used to <B>classify proteins</B>, based on the <B>functional characterization and labeling of protein sequences</B> that have been grouped by their characteristic <!-- A HREF="../../lexington/docs/cdart_help.html#WhatIs" --><A HREF="#DomainArchitecture">conserved domain architecture</A>.<BR><BR>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ========= END_LEVEL_1_TOPIC_DATA_PROCESSING_INTRO ======== -->
|
|
|
|
<!-- ======= LEVEL_1_TOPIC_DATA_PROCESSING_THREE_TIERS ========= -->
|
|
|
|
<A NAME="DataProcessingReviewLevel"></A>
|
|
<A NAME="DataProcessingThreeTiers"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Three tiers ("review levels") of conserved domain architectures</B> are present in the <A HREF="/sparcle">SPARCLE</A> database:</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<BLOCKQUOTE>
|
|
<OL>
|
|
<LI><A HREF="#DataProcessingCurated"><B>curated</B> architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingAutonamed"><B>autonamed</B> architectures</A></LI>
|
|
<LI><A HREF="#DataProcessingNamedByDomain"><B>named by domain</B> architectures</A></LI>
|
|
<!-- LI><A HREF="#DataProcessingComputed"><B>computed</B> name architectures</A></LI -->
|
|
</OL>
|
|
</BLOCKQUOTE>
|
|
|
|
To compare these types of records at a glance, click on the following links to retrieve conserved domain architectures that include the term "kinase" in their <A HREF="#SearchFieldName">[Name]</A>, and whose names were assigned by the method indicated in <A HREF="#SearchFieldReviewLevel">[ReviewLevel]</A>:<BR>
|
|
|
|
<BLOCKQUOTE>
|
|
<UL>
|
|
<LI><a HREF="/sparcle?term=kinase%5Bname%5D%20AND%20curated%5BReviewLevel%5D">kinase[Name] AND curated[ReviewLevel]</a></LI>
|
|
<LI><a HREF="/sparcle?term=kinase%5Bname%5D%20AND%20autonamed%5BReviewLevel%5D">kinase[Name] AND autonamed[ReviewLevel]</a></LI>
|
|
<LI><a HREF="/sparcle?term=kinase%5Bname%5D%20AND%20namedbydomain%5BReviewLevel%5D">kinase[Name] AND namedbydomain[ReviewLevel]</a></LI>
|
|
</UL>
|
|
</BLOCKQUOTE>
|
|
|
|
Below are details about the data processing methods for each subset, including descriptions of the methods used to name the architectures, followed by a note about <A HREF="#DataProcessingOngoingResearch">ongoing research</A>.<BR>
|
|
|
|
</BLOCKQUOTE>
|
|
<BR>
|
|
|
|
<!-- ===== END_LEVEL_1_TOPIC_DATA_PROCESSING_THREE_TIERS ======= -->
|
|
|
|
<!-- =========== LEVEL_1_TOPIC_DATA_PROCESSING_CURATED =========== -->
|
|
|
|
<A NAME="DataProcessingCurated"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Curated architectures</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<!-- B>Refine conserved domain models:</B> The resulting clusters are only as good as the ingredients, and the <A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">CDD curation</A> project continues to improve the domain models as new sequence data and published research become available, and through closer analysis of existing clusters. For example, when the CDD curators see a cluster of protein sequences in SPARCLE that is functionally diverse and that can be broken up into subclusters with more precise function, they do that by creating the appropriate domain models that will reflect the diverse functions. The refined domain models are then added to the processing pipeline that defines conserved domain architectures and corresponding groups of protein sequences.<BR><BR -->
|
|
|
|
The manual curation process for conserved domain architectures is carried out by the <A HREF="/cdd/">Conserved Domain Database (CDD)</A> <A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">curators</A> and includes the steps noted below. Various types of evidence that were used by the curators in naming an architecture and describing its biological function are listed in the <A HREF="#SummaryPageEvidence">supporting evidence</A> section of a <A HREF="#SummaryPage">SPARCLE record</A>.<!-- I>(Search tip: to retrieve all curated domain architectures, search the SPARCLE database for: <a HREF="/sparcle?cmd=search&term=curated%5BReviewLevel%5D">curated[ReviewLevel]</a>)</I --><BR><BR>
|
|
|
|
<B>Describe protein function:</B>
|
|
|
|
<UL>
|
|
|
|
<LI>The domain architecture curation process begins by looking at a cluster of proteins that have the same architecture and asking the question, can we describe them functionally?</LI><BR>
|
|
|
|
<LI>The answer depends on how diverse the sequences are and whether scientific experiments have revealed anything about the functions of the proteins in the cluster.</LI><BR>
|
|
|
|
<LI>To arrive at an answer, the curators look at the existing names of <A HREF="/protein">protein sequences</A> in the set and at the available evidence for the function of that set, such as <A HREF="/pubmed">publications</A> linked to individual sequences, associated <A HREF="/structure">3D structures</A>, and other types of evidence the curators might find in additional <A HREF="/gquery/">NCBI databases</A> such as <A HREF="/biosystems">BioSystems</A>, <A HREF="/pcassay">BioAssay</A>, etc.</LI><BR>
|
|
|
|
<LI>Much of the curation process, however, is based on the availability of published literature, 3D structures, and the presence of high quality sequences (e.g., <A HREF="http://www.uniprot.org/uniprot/">Swiss-Prot</A>, <A HREF="/refseq">RefSeq</A>) in the cluster that have been functionally characterized.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<B>Assign a name to the architecture:</B> The curators then make a judgement call in assigning a name to the architecture, with the goal of selecting a name that is representative of the whole cluster of proteins. Below are some <B>examples of situations</B> the curators encounter in the process of naming architectures, and how the names are chosen in each case:<BR>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="DataProcessingCuratedExample1"></A>The set of protein sequences with the architecture have a <I>SPECIFIC HIT</I> to a conserved domain model that results in a <I>HIGH CONFIDENCE</I> of the architecture's biological function:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>In the process of examining the group of sequences that have a given architecture, the curators might find that the proteins have a <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hit</A> to one or more conserved domain models within the architecture. This represents a high confidence level for the inferred function of the protein query sequence, and therefore can influence the name of the architecture.</LI><BR>
|
|
|
|
<LI>For example, the query protein sequence <!-- A HREF="/Structure/cdd/wrpsb.cgi?seqinput=945225&mode=rep" --><A HREF="/Structure/cdd/wrpsb.cgi?seqinput=AAA74451&mode=rep">human guanylyl cyclase (AAA74451)</A> has a <A HREF="/Structure/sparcle/archview.html?archid=11570901">"retinal guanylyl cyclase 2" domain architecture</A>. The domain architecture includes a <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hit</A> to the NCBI-curated domain <A HREF="/Structure/cdd/cddsrv.cgi?uid=cd06371">cd06371</A>, which has a <A HREF="#SearchFieldCDDShortName">short name</A> and <A HREF="#SearchFieldCDDTitle">title</A> of: "PBP1_sensory_GC_DEF_like: Ligand-binding domain of membrane guanylyl cyclases (GC-D, GC-E, and GC-F) that are specifically expressed in sensory tissues." The specific hit to the NCBI-curated domain therefore influenced the name that was given to the architecture: "<A HREF="/Structure/sparcle/archview.html?archid=11570901">retinal guanylyl cyclase 2</A>."
|
|
<!-- only retinal guanylyl cyclases from vertebrates have this architecture: /Structure/sparcle/archview.html?archid=11570901 ; note, however, that the taxonmy scope for the architecture is shown as metazoa; that is too broad because not all metazoans have a retina, so the taxonomic scope is just a general/broad tag --></LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI><A NAME="DataProcessingCuratedExample1"></A>A subset of high quality protein sequences that have the <I>SAME NAME</I> and <I>ARE REPRESENTATIVE</I> of the whole protein sequence cluster:</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>In the process of examining the group of sequences that have a given architecture, the curators might find a subset of high quality sequences
|
|
that all have the same name, and that are representative of the whole cluster of protein sequences of which they are a part. In that case, the conserved domain architecture is given the same name as the subset of high quality sequences.</LI><BR>
|
|
|
|
<LI>For example, let's say a cluster of ~200 protein sequences includes a subset of five sequences from a curated database such as Swiss-Prot or RefSeq. If those five sequences have the same name, and if they are reliable and representative of the whole cluster, then their name is given to the domain architecture as well.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI><A NAME="DataProcessingCuratedExample2"></A>A subset of high quality protein sequences that have the <I>SAME NAME</I> but <I>ARE NOT REPRESENTATIVE</I> of the whole protein sequence cluster:</LI><BR>
|
|
|
|
<UL>
|
|
<LI>In the process of examining the group of sequences that have a given architecture, the curators might find a subset of high quality sequences that all have the same name, but that do not represent the overall cluster (e.g., the subset represents only a specific taxon or other subgroup within the larger cluster). In that case, we cannot conclude that all of the sequences in the cluster will share the same function.</LI><BR>
|
|
|
|
<LI>This is a common situation, and in such a case, the curators try to find a name for the domain architecture that is more generic and that is derived from the types of conserved domain signatures that are present in the protein family.</LI><BR>
|
|
|
|
<LI>For example, let's say a big protein family has a hit to an NAD dependent dehydrogenase, and one of the high quality sequences has been named as an alpha ketoglutarate dehydrogenase. Extrapolating that very specific name to all of the sequences which have the same architecture might be a stretch, because we don't have evidence to support such an extrapolation (e.g., the substrates for the other proteins in the family might not yet be known). So the curators make a judgement call to apply a more general name to the domain architecture, and they might simply call the family a dehygrogenase, rather than an alphaketoglutarate dehydrogenase.</LI><BR>
|
|
</UL>
|
|
|
|
<LI><A NAME="DataProcessingCuratedExample3"></A>A subset of high quality protein sequences that have <I>DIFFERENT NAMES</I></LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>In the process of examining the group of sequences that have a given architecture, the curators might find a subset of high quality sequences
|
|
that have different names, indicating functional diversity in that family. In such a case, the CDD curators look for commonalities among the high quality protein names and identify generalities that can be applied to the architecture overall.</LI><BR>
|
|
|
|
<LI>For example, if the names of Swiss-Prot records indicate that the proteins are a valine tranporter, isoleucine tranporter, and threonine transporter, then the curators would apply the general name of "amino acid transporter" to the domain architecture.</LI><BR>
|
|
|
|
<LI>The curators also take naming rules and standards into consideration. They attempt to find compromises among naming standards (e.g., upper case, lower case, dash, no dash, etc.) that are used by UniProt, Swiss-Prot, and RefSeq, and apply those compromises to the names they apply to SPARCLE domain architectures. There are sometimes differences between American and European naming conventions, and the aim is to minimize or erase those differences over time as naming conventions and standards continue to evolve.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI><A NAME="DataProcessingCuratedExample4"></A>Seemingly redundant SPARCLE architectures that all have the <I>SAME NAME AND FUNCTIONAL LABEL, but they are in fact FUNCTIONALLY DIFFERENT</I></LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>Sometimes there are seemingly redundant SPARCLE architectures that all have the same name and functional label, but they are in fact functionally different.</LI><BR>
|
|
|
|
<LI>For example, many architectures might have the name "<A HREF="/sparcle?term=%22sensor%20histidine%20kinase%22%5Bname%5D&cmd=DetailsSearch">sensor histidine kinase</A>" but each of those might be functionally different from the others. The architectures contain the same basic domain signature (i.e., a catalytic domain, an accessory domain that is phosphorylated by the kinase, and a PAS domain), but we don't yet know what signaling pathways they are involved in, and some of the architectures might contain an additional domain whose specific function is unknown. In such cases, if the curators do not have the experimental evidence needed to give the domain architectures a more specific name, they apply the general name to the architectures. The architecture names and descriptions are later refined as additional data and experimental evidence become available.</LI>
|
|
|
|
</UL>
|
|
|
|
<!-- LI><A NAME="_________"></A>___Bullet2___</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text</LI><BR>
|
|
|
|
<LI>Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text</LI><BR>
|
|
|
|
</UL -->
|
|
|
|
</UL>
|
|
|
|
<I><B>Search tip:</B><BR>
|
|
To retrieve all curated domain architectures, search the <A HREF="/sparcle/">SPARCLE database</A> for:<BR>
|
|
<a HREF="/sparcle?cmd=search&term=curated%5BReviewLevel%5D">curated[ReviewLevel]</a><BR>
|
|
or add that search criterion to a keyword search to limit your retrieval to the desired type of records. For example, a search for:<BR>
|
|
<a HREF="/sparcle?term=kinase%5Bname%5D%20AND%20curated%5BReviewLevel%5D">kinase[Name] AND curated[ReviewLevel]</a><BR>
|
|
will retrieve conserved domain architectures that include the term "kinase" in their name, and whose names were <A HREF="#DataProcessingCurated">assigned manually by NCBI curators</A>.
|
|
</I>
|
|
<BR><BR>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ========= END_LEVEL_1_TOPIC_DATA_PROCESSING_CURATED ========= -->
|
|
|
|
<!-- =========== LEVEL_1_TOPIC_DATA_PROCESSING_AUTONAMED =========== -->
|
|
|
|
<A NAME="DataProcessingAutonamed"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Autonamed architectures</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
<BR>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<BLOCKQUOTE>
|
|
<B>Autonamed</B> conserved domain architectures <B>use an algorithm to automatically generate an architecture name based on the frequency of terms</B> that are present in the definition lines of the proteins that have the architecture. Proteins that were used in naming the architecture are listed in the <A HREF="#SummaryPageEvidence">supporting evidence</A> section of a <A HREF="#SummaryPage">SPARCLE record</A>.<!-- I>(Search tip: to retrieve all autonamed domain architectures, search the SPARCLE database for: <a HREF="/sparcle?cmd=search&term=autonamed%5BReviewLevel%5D">autonamed[ReviewLevel]</a>)</I --><BR><BR>
|
|
|
|
The automatically generated name will begin with the phrase <B>"similar to..."</B> followed by a cleaned up definition line (e.g., removal of taxonomy information, etc.) from the set of high quality proteins that were used to generate the name. The algorithm includes:<BR>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="DataProcessingAutonamedProteinNameAnalysis"></A>Protein name analysis:</LI><BR>
|
|
<UL>
|
|
<LI>The <B>definition lines</B> of all protein sequences that have a given architecture are analyzed.</LI>
|
|
<LI>First, the protein names are <B>tokenized</B> into word terms.</LI>
|
|
<LI>Next, the most popular terms will be selected as representatives to form a <B>voting committee</B>.</LI>
|
|
<LI>The voting committee will vote for the <B>most representative name</B> in this architecture.</LI>
|
|
<!-- LI>Text text text text text text text text text text text text text text.</LI -->
|
|
<!-- LI>Email from Chris L (Jan 18, 2017): ...A domain architecture name is created if it satisfies certain thresholds involving its frequency in the definition lines of proteins that have the architecture.</LI -->
|
|
</UL>
|
|
<BR>
|
|
|
|
<LI><A NAME="DataProcessingAutonamedConsistencyScore"></A>Consistency score:</LI><BR>
|
|
<UL>
|
|
<!-- LI>Email from Lianyi (Jan 25, 2017): The "autonamed" procedure start with the "Protein Name Analysis" for all proteins in a given architecture, where protein name were tokenized into word terms first, then the most popular terms will be selected as representatives to form a voting committee. The voting committee will vote for the most representative name in this architecture. A post-processing step is subsequently followed by calculating a consistency score to determine to what extend the name of this representative protein sharing terms among others. And this score will be used to decide if this computed name will be selected with enough confidence.</LI -->
|
|
<LI>A post-processing step subsequently calculates a <B>consistency score</B> to determine the extent to which the name of the representative protein is sharing terms among the other proteins.</LI>
|
|
<LI>The consistency score will be used to decide if this computed name can be selected with enough <B>confidence</B>.</LI>
|
|
<!-- LI>Please note that only a <B>small fraction</B> of architectures [(~3%)] can be autonamed in this fashion due to the <B>high confidence level required</B>.</LI -->
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
Please note that only a <B>small fraction</B> of architectures <!-- (~3%) -->can be autonamed in this fashion due to the <B>high confidence level required</B>.<BR><BR>
|
|
|
|
Additionally, <B>architecture names are recalculated</B> with each release of the <A HREF="/cdd/">Conserved Domain Database (CDD)</A>. This is because new sequence data are continually added to the <A HREF="/protein/">Protein database</A>. As a result, the number of protein sequences that have a given architecture might increase, which in turn increases the set of protein names from which an architecture name is computed.<BR><BR>
|
|
|
|
<I><B>Search tip:</B><BR>
|
|
To retrieve all autonamed domain architectures, search the <A HREF="/sparcle/">SPARCLE database</A> for:<BR>
|
|
<a HREF="/sparcle?cmd=search&term=autonamed%5BReviewLevel%5D">autonamed[ReviewLevel]</a><BR>
|
|
or add that search criterion to a keyword search to limit your retrieval to the desired type of records. For example, a search for:<BR>
|
|
<a HREF="/sparcle?term=kinase%5Bname%5D%20AND%20autonamed%5BReviewLevel%5D">kinase[Name] AND autonamed[ReviewLevel]</a><BR>
|
|
will retrieve conserved domain architectures that include the term "kinase" in their name, and whose names were assigned computationally by the <A HREF="#DataProcessingAutonamed">Autonamed</A> algorithm.
|
|
</I>
|
|
|
|
</BLOCKQUOTE>
|
|
<BR>
|
|
|
|
<!-- ========= END_LEVEL_1_TOPIC_DATA_PROCESSING_AUTONAMED ========= -->
|
|
|
|
<!-- ======== LEVEL_1_TOPIC_DATA_PROCESSING_NAMED_BY_DOMAIN ======== -->
|
|
|
|
<A NAME="DataProcessingNamedByDomain"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>NamedByDomain architectures</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
<BR>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<BLOCKQUOTE>
|
|
<B>NamedByDomain</B> conserved domain architectures <B>use an algorithm to automatically generate an architecture name based on the highest scoring conserved domains</B> that are present in the architecture. Domains that were used in naming the architecture are listed in the <A HREF="#SummaryPageEvidence">supporting evidence</A> section of a <A HREF="#SummaryPage">SPARCLE record</A>.<!-- I>(Search tip: to retrieve all domain architectures that have been named by domain, search the SPARCLE database for: <a HREF="/sparcle?cmd=search&term=namedbydomain%5BReviewLevel%5D">namedbydomain[ReviewLevel]</a>)</I --><BR>
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="DataProcessingNamedByDomainTopTwoDomains"></A>Architectures that aren't <A HREF="#DataProcessingCurated">curated</A>, and couldn't be <A HREF="#DataProcessingAutonamed">autonamed</A>, are assigned a name based on <B>up to two</B> conserved domain models that are present in the architecture.</LI><BR>
|
|
|
|
<UL>
|
|
<LI>While the architecture's <B>name</B> is based on <B>up to two</B> conserved domains, the functional <B>label</B> can be based on <B>up to four</B> of the conserved domains in the architecture.</LI>
|
|
</UL>
|
|
<BR>
|
|
|
|
<LI><A NAME="DataProcessingNamedByDomainEvalue"></A>Sort conserved domains by e-value</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>If an architecture contains more than two conserved domains, then an algorithm is used to select the two highest-scoring conserved domain models, with a priority given to <a href="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">NCBI-curated</A> domain models.</LI><BR>
|
|
|
|
<LI>All of the conserved domain models that appear in the <a href="../../cdd/cdd_help.shtml#ConciseDisplay">concise view</A> of the architecture are scored based on their <a href="../../cdd/cdd_help.shtml#WRPSBExpect">E-value</A><!-- [against protein sequences in the oldest <a href="/Structure/biosystems/docs/biosystems_help.html#PIG"><a href="#PIG">protein identity group (PIG)</A> (i.e., the lowest PIG ID number) that has the architecture in question.] -->.<BR><BR>
|
|
|
|
<A NAME="DataProcessingNamedByDomainEvalueTechnicalNote"></A>
|
|
<I>Technical note: The E-value of a given conserved domain can vary among the proteins that have the architecture in question, because the composition of the protein sequences may vary outside of the conserved domain architecture. To address this issue, the "NamedByDomain" algorithm uses the E-values of the conserved domain models against protein sequences in the oldest <!-- a href="/Structure/biosystems/docs/biosystems_help.html#PIG" --><a href="#PIG">protein identity group ("PIG," described below)</A> that has the architecture in question. That is, the algorithm uses the E-values of the domain models on the proteins that have the lowest/oldest PIG ID number.<BR><BR>
|
|
|
|
<A NAME="DataProcessingNamedByDomainPIG"></A><A NAME="DataProcessingNamedByDomainEvaluePIG"></A>A <SPAN style="color:#D70000">protein identity group (PIG)</SPAN> is a cluster of protein sequences that are identical to each other in composition and length, regardless of their taxonomic source. The PIGs are automatically generated by the data processing pipeline at NCBI, which identifies all proteins that are identical to each other, regardless of <A HREF="/books/NBK21100/#A268">TaxID</A>, places them together in a protein identity group, and gives each PIG a stable identification number (<SPAN style="color:#D70000">PIG ID</SPAN>).<!-- For step (b) above, links are made to all PIG members that have the same <A HREF="/books/NBK21100/#A268">TaxID</A> as the protein GI cited in the source biosystem record. --></I>
|
|
</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI><A NAME="DataProcessingNamedByDomainSpecificHits"></A>Prioritize <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</A> and <A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">NCBI-curated</A> domain models</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>Conserved domain models that have <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</A> on the proteins are given priority in naming the architecture.<BR><BR>
|
|
|
|
A specific hit represents a very high confidence that the query sequence belongs to the same protein family as the sequences used to create the domain model, and therefore a high confidence level for the inferred function of the protein query sequence.<BR><BR>
|
|
|
|
The <A HREF="../../cdd/cdd_help.shtml#TOC_CDSearch">CD-Search help document</A> provides additional information about <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">hit types</A>, including <A HREF="../../cdd/cdd_help.shtml#SpecificHit"><!-- FONT color="rgb(215,0,0)" --><SPAN style="color:#D70000"><i>details and an illustration</i></SPAN> about the domain-specific E-value threshold</A> that is used to identify specific hit.</LI><BR>
|
|
|
|
<LI><A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">NCBI-curated</A> domain models are given priority:<BR><BR>
|
|
|
|
If an <A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">NCBI-curated</A> domain model, and a domain model from an external <A HREF="../../cdd/cdd_help.shtml#CDSource">source database</A>, both have a bit score that meets or exceeds the <A HREF="../../cdd/cdd_help.shtml#SpecificHitThresholdScore">E-value threshold for a specific hit</A>, then the NCBI-curated domain model is given priority.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<!-- LI><A NAME="_________"></A><B>___Bullet2___</B></LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text</LI><BR>
|
|
|
|
</UL>
|
|
<BR -->
|
|
|
|
</UL>
|
|
|
|
<I><B>Search tip:</B><BR>
|
|
To retrieve all domain architectures that have been named by domain, search the <A HREF="/sparcle/">SPARCLE database</A> for:<BR>
|
|
<a HREF="/sparcle?cmd=search&term=namedbydomain%5BReviewLevel%5D">namedbydomain[ReviewLevel]</a><BR>
|
|
or add that search criterion to a keyword search to limit your retrieval to the desired type of records. For example, a search for:<BR>
|
|
<a HREF="/sparcle?term=kinase%5Bname%5D%20AND%20namedbydomain%5BReviewLevel%5D">kinase[Name] AND namedbydomain[ReviewLevel]</a><BR>
|
|
will retrieve conserved domain architectures that include the term "kinase" in their name, and whose names were assigned computationally by the <A HREF="#DataProcessingNamedByDomain">NamedByDomain</A> algorithm.
|
|
</I>
|
|
<BR><BR>
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ===== END_LEVEL_1_TOPIC_DATA_PROCESSING_NAMED_BY_DOMAIN ===== -->
|
|
|
|
<!-- ======== LEVEL_1_TOPIC_DATA_PROCESSING_COMPUTED ======== -->
|
|
|
|
<A NAME="DataProcessingComputed"></A>
|
|
|
|
<!-- P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Computed name architectures</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
<BR -->
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<!-- BLOCKQUOTE>
|
|
<B>Computed</B> names were used when importing a set of high-coverage conserved domain architectures early in the project. The names were derived from the defline of an example protein that has the architecture. In an email on January18, 2017, Chris L. said that "Computed... is effectively deprecated and these should disappear."
|
|
</BLOCKQUOTE>
|
|
<BR -->
|
|
|
|
<!-- ===== END_LEVEL_1_TOPIC_DATA_PROCESSING_COMPUTED ===== -->
|
|
|
|
|
|
<!-- ====== LEVEL_1_TOPIC_DATA_PROCESSING_TYPES_OF_ARCHITECTURES ===== -->
|
|
|
|
<A NAME="DataProcessingTypesOfArchitectures"></A>
|
|
<A NAME="TypesOfArchitectures"></A>
|
|
<A NAME="DataProcessingArchitectureTypes"></A>
|
|
<A NAME="DataProcessingArchitectureType"></A>
|
|
<A NAME="ArchitectureTypes"></A>
|
|
<A NAME="ArchitectureType"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Two types of conserved domain architectures</B>:</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
<UL>
|
|
|
|
<!-- ======== DATA_PROCESSING_SUPERFAMILY_ARCHITECTURES ======== -->
|
|
|
|
<LI><A NAME="DataProcessingTypesOfArchitecturesSuperfamily"></A><A NAME="TypesOfArchitecturesSuperfamily"></A><A NAME="ArchitectureTypeSuperfamily"></A><A NAME="SuperfamilyArchitectures"></A><A NAME="SuperfamilyArchitecture"></A><B>Superfamily architectures</B><BR><BR>
|
|
|
|
Superfamily architectures consist solely of conserved domain <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamilies</A>. This infers a <B>general functional category</B> for the proteins which have that architecture.<BR><BR>
|
|
|
|
That is, each conserved domain footprint in the architecture has an <A HREF="../../cdd/cdd_help.shtml#RPSBWhat">RPS-BLAST</A> <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily"><B>superfamily hit</B></A> to every protein that has been classified with the architecture. This is designated by the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl"><B>"cl" prefix</B></A> in the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number</A> of each conserved domain in the architecture. The "cl" stands for <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamily</A> <B>cl</B>uster. (To see the accession numbers, mouse over the conserved domain footprints in the architecture's graphical display.)<BR><BR>
|
|
|
|
One example of a superfamily architecture is:<BR><BR>
|
|
     N-terminus------[<B>cl21514</B>]-------[<B>cl00388</B>]------C-terminus<BR><BR>
|
|
|
|
Proteins with this architecture have an <A HREF="../../cdd/cdd_help.shtml#RPSBWhat">RPS-BLAST</A> hit to accession number <A HREF="/Structure/cdd/cddsrv.cgi?uid=cl21514">cl21514</A> (TauE Superfamily: Sulfite exporter TauE/SafE), followed by a hit to <A HREF="/Structure/cdd/cddsrv.cgi?uid=cl00388">cl00388</A> (Thioredoxin_like Superfamily: Protein Disulfide Oxidoreductases and Other Proteins with a Thioredoxin fold)<!-- in the <A HREF="/cdd">Conserved Domain Database (CDD)</A -->.<BR><BR>
|
|
|
|
Specifically, the N-terminal region of each protein with this architecture achieved a statistically significant hit to a conserved domain model that belongs to the TauE superfamily, and the C-terminal region achieved a statistically significant hit to a conserved domain model that belongs to the Thioredoxin_like Superfamily. However, neither hit had a high enough score to be considered a <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hit</a>.<BR><BR>
|
|
|
|
As a result, only the superfamily classification is shown for each region of the protein, and is therefore regarded as a superfamily architecture.<BR><BR>
|
|
|
|
<I>Note: Superfamily architectures are currently found only the <A HREF="/Structure/lexington/lexington.cgi">CDART</A> resource. A <A HREF="#CompareCDART">brief description of CDART</A> is provided in the "<A HREF="#Compare">Compare CDD, CDART, and SPARCLE</A>" section of this document.</I>
|
|
</LI><BR><BR>
|
|
|
|
<!-- ====== END_DATA_PROCESSING_SUPERFAMILY_ARCHITECTURES ====== -->
|
|
|
|
<!-- ======== DATA_PROCESSING_SUBFAMILY_ARCHITECTURES ======== -->
|
|
|
|
<LI><A NAME="DataProcessingTypesOfArchitecturesSubfamily"></A><A NAME="TypesOfArchitecturesSubfamily"></A><A NAME="ArchitectureTypeSubfamily"></A><A NAME="SubfamilyArchitectures"></A><A NAME="SubfamilyArchitecture"></A><B>Subfamily architectures</B><BR><BR>
|
|
|
|
Subfamily architectures <B>either</B> contain a <B>mix</B> of conserved domain <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamilies</A> and <A HREF="../../cdd/cdd_help.shtml#Hierarchy">subfamilies</A>, <B>or</B> consist <B>solely</B> of conserved domain subfamilies.<BR><BR>
|
|
|
|
A subfamily is represented by a conserved domain model that gets a <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hit</a> to the protein query sequence. The <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</a> represent a high confidence that the query sequence belongs to the same protein family as the sequences used to create each conserved domain model, and therefore a <B>high confidence level</B> for the inferred function of the protein query sequence.<BR><BR>
|
|
|
|
<!-- Conserved domain models that represent superfamilies verus subfamilies can be distinguished by their <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number prefix</A>. Superfamilies have a <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl"><B>"cl" prefix</B></A> (which stands for <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamily</A> <B>cl</B>uster), while subfamilies have an accession number prefix that is <B>anything other than "cl"</B>.<BR><BR -->
|
|
|
|
To see if a conserved domain is a superfamily or subfamily, mouse over a conserved domain's footprint in the architecture's graphical display. A <B>superfamily</B> will have a <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix_cl"><B>"cl" prefix</B></A> in the accession number; the "cl" stands for <A HREF="../../cdd/cdd_help.shtml#Superfamily">superfamily</A> <B>cl</B>uster. A <B>subfamily</B> will have an <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number <B>prefix other than "cl"</B></A>.<BR><BR>
|
|
|
|
One example of a subfamily architecture that consists solely of subfamilies is:<BR><BR>
|
|
|
|
     N-terminus------[<B>COG0785</B>]-------[<B>cd03012</B>]------C-terminus<BR><BR>
|
|
|
|
Here, the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number prefixes</A> are "COG" and "cd," indicating that both conserved domains are <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</a>. <!-- The specific hits represent a very high confidence that the query sequence belongs to the same protein family as the sequences used to create each conserved domain model, and therefore a high confidence level for the inferred function of the protein query sequence.<BR><BR -->
|
|
|
|
This architecture can be seen, for <B>example</B>, in the <!-- A HREF="/Structure/cdd/wrpsb.cgi?INPUT_TYPE=live&SEQUENCE=P9WG63.1" --><!-- A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=P9WG63">CD-Search results for the query protein sequence P9WG63: Protein DipZ</A -->
|
|
<A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=NP_217390">CD-Search results for the query protein <B>NP_217390</B></A>: integral membrane C-type cytochrome biogenesis protein DipZ [Mycobacterium tuberculosis H37Rv]</A>. In the "<!-- A HREF="../../cdd/cdd_help.shtml#ProteinClassification" --><B>Protein Classification</B><!-- /A -->" section of the CD-Search results, click on the link for "<A HREF="/Structure/sparcle/archview.html?archid=10002697">domain architecture ID 10002697</A>" to open the corresponding SPARCLE record for that conserved domain architecture, if desired.<BR><BR>
|
|
|
|
Whether you view the <A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=NP_217390">CD-Search results for NP_217390</A>, or the SPARCLE record for <A HREF="/Structure/sparcle/archview.html?archid=10002697">domain architecture ID 10002697</A>, you will see that each conserved domain in the architecture achieves a <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hit</a> to the query protein. This can be viewed on the CD-Search results page, in the "Specific Hits" line of the "Graphical Summary." It can also be viewed in the corresponding architecture record, by mousing over the conserved domain cartoons in the architecture's graphic to see that the <A HREF="../../cdd/cdd_help.shtml#CDSource_accession_prefix">accession number</A> of each graphic begins with a prefix other than "cl".<BR><BR>
|
|
|
|
<I>Note: Subfamily architectures are currently found only the <A HREF="/sparcle">SPARCLE</A> resource. A <A HREF="#CompareSPARCLE">brief description of SPARCLE</A> is provided in the <A HREF="#Compare">Compare CDD, CDART, and SPARCLE</A> section of this document.</I>
|
|
</LI><BR>
|
|
|
|
<!-- ====== END_DATA_PROCESSING_SUBFAMILY_ARCHITECTURES ====== -->
|
|
|
|
</UL>
|
|
|
|
<!-- ======== END_DATA_PROCESSING_TYPES_OF_ARCHITECTURES ======== -->
|
|
|
|
<!-- ========= DATA_PROCESSING_SINGLE_DOMAIN_ARCHITECTURES ========= -->
|
|
|
|
<A NAME="DataProcessingArchitecturesWithSingleConservedDomainFootprint"></A><A NAME="ArchitecturesWithSingleConservedDomainFootprint"></A><A NAME="SingleConservedDomainArchitectures"></A><A NAME="SingleConservedDomainArchitecture"></A><A NAME="SingleDomainArchitectures"></A><A NAME="SingleDomainArchitecture"></A>
|
|
|
|
<B>Architectures with single conserved domain footprint</B>:<BR>
|
|
|
|
<UL>
|
|
<LI>It is also possible for a domain architecture to consist of a <B>single conserved domain footprint</B>. That footprint can represent either a superfamily architecture or a subfamily architecture.</LI>
|
|
</UL><BR>
|
|
|
|
<!-- ===== END_DATA_PROCESSING_SINGLE_DOMAIN_ARCHITECTURES ========= -->
|
|
|
|
<!-- ============ DATA_PROCESSING_ARCHITECTURE_ID ============ -->
|
|
|
|
<A NAME="DataProcessingArchitectureID"></A>
|
|
<B>Each architecture receives a unique and stable architecture ID</B>:<BR>
|
|
|
|
<UL>
|
|
<LI>Each conserved domain architecture receives a unique and stable <A HREF="#ArchitectureID"><B>architecture ID</B></A>, which reflects the set of conserved domain models that are <B>top-scoring hits</B> (as determined by the <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> service) on the proteins that possess the architecture, the <B>sequential order</B> of those domains, and the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types"><B>type of hit</B></a> each domain has to the proteins. Architectures that consist of a single conserved domain footprint also receive an architecture ID.</LI><BR>
|
|
</UL>
|
|
|
|
<!-- =========== END_DATA_PROCESSING_ARCHITECTURE_ID ============ -->
|
|
|
|
<!-- === DATA_PROCESSING_ADDITIONAL_INFO_ABOUT_CONSERVED_DOMAINS === -->
|
|
|
|
<A NAME="DataProcessingAdditionalInfo"></A>
|
|
<B>Additional information about conserved domains</B>:<BR>
|
|
|
|
<UL>
|
|
<LI>The <A HREF="../../cdd/cdd_help.shtml">Conserved Domain Database (CDD) help document</A> provides additional information about <A HREF="../../cdd/cdd_help.shtml#Hierarchy">domain family hierarchies</A>, including superfamilies and subfamilies. It also provides additional information about the companion <A HREF="../../cdd/cdd_help.shtml#CDSearch_help_contents">CD-Search</A> tool, including the <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_types">hit types</a> displayed in <A HREF="../../cdd/cdd_help.shtml#RPSBResults">CD-Search results</A>, such as <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</a>, <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_non_specific_hit">non-specific hits</a>, the <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily">superfamily(ies)</a> to which those hits belong, and <a href="../../cdd/cdd_help.shtml#RPSB_hit_type_multi_domain">multi-domain models</a>. Each superfamily on a CD-Search results page is represented by a cartoon with a distinct <A HREF="../../cdd/cdd_help.shtml#RPSB_HitColors">color/shape combination</A>, in order to distinguish domains from each other.</LI><BR>
|
|
</UL>
|
|
|
|
<!-- == END_DATA_PROCESSING_ADDITIONAL_INFO_ABOUT_CONSERVED_DOMAINS == -->
|
|
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ==== END_LEVEL_1_TOPIC_DATA_PROCESSING_TYPES_OF_ARCHITECTURES ==== -->
|
|
|
|
<!-- ======== LEVEL_1_TOPIC_DATA_PROCESSING_ONGOING_RESEARCH ======== -->
|
|
|
|
<A NAME="DataProcessingOngoingResearch"></A>
|
|
<A NAME="OngoingResearch"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Ongoing Research</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<BLOCKQUOTE>
|
|
<!-- I><B>Ongoing research:</B></I>Please note that conserved domain data, and conserved domain architecture annotations on proteins, continue to evolve as new data become available and as research progresses.<BR><BR -->
|
|
|
|
Please note that conserved domain models, architectures, and the resulting protein sequence clusters, continue to evolve as new data become available and as research progresses. As a result, the domain architecture annotated on a protein sequence, and the members of a protein sequence cluster, might change over time.<BR>
|
|
|
|
<UL>
|
|
|
|
<LI>Specifically, the <A HREF="../../cdd/cdd_help.shtml#CDSource_NCBI_curated">CDD curation</A> project refines conserved domain models as new protein sequences and publications become available, and through closer analysis of existing clusters.</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>For example, when the CDD curators see a cluster of protein sequences in SPARCLE that is functionally diverse and that can be broken up into subclusters with more precise function, they do that by creating the appropriate domain models that will reflect the diverse functions. The refined domain models are then added to the data processing pipeline that defines conserved domain architectures and corresponding groups of protein sequences.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
<LI>Additionally, an architecture that is composed of several individual conserved domain models might later be superceded by a multi-domain model that represents the full-length protein.</LI><BR>
|
|
|
|
<UL>
|
|
|
|
<LI>As an example, in January 2017, the protein sequence <A HREF="/protein/NP_387887">NP_387887</A> was <B>initially</B> annotated with <A HREF="/Structure/sparcle/archview.html?archid=10647733">architecture ID 10647733</A> (as shown in the <SPAN style="color:#D70000">illustrated example</SPAN> in the "<A HREF="#InputSequenceIllustration">input sequence data</A>" section of this document). That architecture is named "DNA gyrase subunit B" and includes <B>four distinct conserved domains</B>.</LI><BR>
|
|
|
|
<LI>In March 2017, when a new build of CDD/SPARCLE was released, the conserved domain architecture annotation for NP_387887 was <B>revised</B> to <A HREF="/Structure/sparcle/archview.html?archid=11481348">architecture ID 11481348</A>, which is a <B>multi-domain</B> that encompasses the four original conserved domains, and which can be seen in the <A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887">current CD-Search results for NP_387887</A>. That architecture has a more specific and precise name, "type IIA DNA topoisomerase subunit B," and reflects the <B>full length protein model</B>.</LI><BR>
|
|
|
|
<LI><B>To see the four distinct conserved domains that compose the full length protein model</B>, simply change the <A HREF="../../cdd/cdd_help.shtml#GlobalOptions">CD-Search display option</A> on the <A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074">live CD-Search results for NP_387887</A> from "<!-- A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074&mode=rep" --><A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887&mode=rep">View Concise Results</A>" to "<!-- A HREF="/Structure/cdd/wrpsb.cgi?SEQUENCE=16077074&mode=full" --><A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887&mode=full">View Full Results</A>" (using the <A HREF="../../cdd/cdd_help.shtml#GlobalOptions">"View" menu</A> near the upper right hand corner of the CD-Search results page). The Full Results display will show the four conserved domains that compose the full length protein model.
|
|
<!-- B>The individual conserved domains that compose the full protein model can be seen by changing the <A HREF="../../cdd/cdd_help.shtml#GlobalOptions">display option</A> on a CD-Search results page</B> from <A HREF="../../cdd/cdd_help.shtml#ConciseDisplay">concise results</A> to <A HREF="../../Structure/cdd/cdd_help.shtml#FullDisplay">full results</A>. (For example, see the <A HREF="/Structure/cdd/wrpsb.cgi?seqinput=NP_387887&mode=full">current full results for NP_387887</A>). --></LI><BR>
|
|
|
|
<LI>As the available data and understanding of conserved domain architectures continue to evolve, the domain architectures that are annotated on proteins may evolve as well, as shown in this example. Comments about the data are welcome and can be sent to the NCBI Support Center/Help Desk, which is accessible as a link in the footer of NCBI web pages<!-- A HREF="mailto:info@ncbi.nlm.nih.gov">info@ncbi.nlm.nih.gov</A -->.</LI><BR>
|
|
|
|
</UL>
|
|
|
|
</UL>
|
|
|
|
In this way, as the available data and understanding of conserved domain architectures continue to evolve, the domain architectures that are annotated on proteins may evolve as well.<BR><BR>
|
|
|
|
Comments about the data are welcome and can be sent to the NCBI Support Center/Help Desk, which is accessible as a link in the footer of NCBI web pages<!-- A HREF="mailto:info@ncbi.nlm.nih.gov">info@ncbi.nlm.nih.gov</A -->.
|
|
</BLOCKQUOTE>
|
|
|
|
<BR>
|
|
|
|
<!-- ===== END_LEVEL_1_TOPIC_DATA_PROCESSING_ONGOING_RESEARCH ===== -->
|
|
|
|
|
|
<!-- ==== LEVEL_1_TOPIC_DATA_PROCESSING_LINKS_TO_OTHER_DATA_TYPES ==== -->
|
|
|
|
<A NAME="DataProcessingLinks"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Links from architectures to other data types</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
The SPARCLE data processing pipeline calculates two types of <B>direct links</B>:<BR>
|
|
|
|
<OL>
|
|
<LI><B>sparcle_protein</B>: each conserved domain architecture in the SPARCLE database links to all protein sequences that have the architecture.
|
|
<!-- The <A HREF="/Structure/cdd/wrpsb.cgi">CD-Search</A> and <A HREF="/Structure/lexington/lexington.cgi">CDART</A> tools are used to identify all protein sequences in the non-redundant (nr) database that contain a given conserved domain architecture. Links are then made from the architecture's record in the SPARCLE database to all corresponding sequences in the protein database.<BR>
|
|
Every conserved domain architecture in SPARCLE database links to all protein sequences in the non-redundant (nr) database that contain the architecture.<BR>
|
|
All proteins in nr that have the sparcle_architecture of interest --></LI><BR>
|
|
<LI><B>sparcle_cdd</B>: each conserved domain architecture in the SPARCLE database links to all of the <A HREF="../../cdd/cdd_help.shtml#CDWhat">conserved domain</A> models (<A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_specific_hit">specific hits</A> and <A HREF="../../cdd/cdd_help.shtml#RPSB_hit_type_superfamily">superfamilies</A>) that compose the architecture. For example, if an architecture contains one specific hit and one superfamily, that SPARCLE record will link to two <A HREF="/cdd/">Conserved Domain Database (CDD)</A> records -- one for the specific hit and one for the superfamily.
|
|
</LI>
|
|
</OL>
|
|
|
|
All other links between SPARCLE and other Entrez databases are <B>indirect</B>, created by a <B>join</B> between the proteins that contain the architecture and the other data types. For example:
|
|
|
|
<UL>
|
|
|
|
<LI>links from <A HREF="/sparcle">SPARCLE</A> architectures to <A HREF="/gene">Gene</A> records are created by a join between the following:<BR>
|
|
<B>sparcle_protein</B>  AND  <B>protein_gene</B>  →  <B>sparcle_gene</B></LI><BR>
|
|
|
|
<LI>links from <A HREF="/sparcle">SPARCLE</A> architectures to <A HREF="/pcassay">BioAssay</A> records are created by a join between the following:<BR>
|
|
<B>sparcle_protein</B>  AND  <B>protein_pcassay_target</B>  →  <B>sparcle_pcassay_target</B></LI><BR>
|
|
|
|
</UL>
|
|
|
|
</BLOCKQUOTE>
|
|
<BR>
|
|
|
|
<!-- === END_LEVEL_1_TOPIC_DATA_PROCESSING_LINKS_TO_OTHER_DATA_TYPES === -->
|
|
|
|
|
|
<!-- ====== PAGE_MARGIN_TO_RIGHT_OF_BLUE_EDGE_BOX_WITH_SECTION_TEMPLATE_CONTENTS ====== -->
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ############# END_BLUE_EDGE_BOX_WITH_SECTION_5_CONTENTS ############ -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell NormalText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
<!-- ################## BLUE_HEADER_SECTION_88_CHANGE_LOG ######################## -->
|
|
|
|
<A NAME="ChangeLog"></A>
|
|
<A NAME="LogofChanges"></A>
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="SteelBlueCell"><SPAN class="HeaderText1">Log of Changes to SPARCLE</SPAN></TD>
|
|
<TD class="SteelBlueCell" WIDTH="15" ALIGN="left" VALIGN="center"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ################## END_BLUE_HEADER_SECTION_88 ######################## -->
|
|
|
|
<!-- ######### BEGIN_BLUE_EDGE_BOX_WITH_SECTION_88_CONTENTS_CHANGE_LOG ########### -->
|
|
|
|
<TABLE class="NormalText" width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="WhiteCellBlueEdgeAll">
|
|
|
|
<!-- ================= CHANGE_LOG_IN_TABLE_FORMAT ==================== -->
|
|
|
|
<BLOCKQUOTE>
|
|
<table width="100%" border="0" cellspacing="0" cellpadding="3" bgcolor="#ffffff">
|
|
|
|
<!-- tr>
|
|
<td width="110" class="NormalText" align="left" valign="top">________</td>
|
|
<td class="NormalText" align="left" valign="top">________</td>
|
|
</tr -->
|
|
|
|
<!-- tr>
|
|
<td width="110" class="NormalText" align="left" valign="top">DD MMM YYYY</td>
|
|
<td class="NormalText" align="left" valign="top">____________________</td>
|
|
</tr -->
|
|
|
|
<tr>
|
|
<td width="110" class="NormalText" align="left" valign="top">12 OCT 2016</td>
|
|
<td class="NormalText" align="left" valign="top">Initial release of the <A HREF="/sparcle"><B>Subfamily Protein Architecture Labeling Engine (SPARCLE)</B></A>.<BR>
|
|
SPARCLE is a resource for the functional characterization and labeling of protein sequences that have been grouped by their characteristic domain architecture. To use SPARCLE, you can either: (1) <A HREF="/Structure/cdd/wrpsb.cgi">enter a query protein sequence into CD-Search</A>, which will display a "<B>Protein Classification</B>" on the results page if the query protein has a hit to a curated domain architecture in the SPARCLE database, or (2) <A HREF="/sparcle">search the SPARCLE database by keyword</A> to retrieve domain architectures that contain the term(s) of interest in their descriptions. With either approach, the corresponding SPARCLE record(s) will display the name and functional label of the architecture, supporting evidence, and links to other proteins with the same architecture. Additional information and illustrated examples are provided on the "<A HREF="sparcle_about.html"><B>About SPARCLE</B></A>" page and in this help document.</td>
|
|
</tr>
|
|
|
|
</table>
|
|
</BLOCKQUOTE>
|
|
<BR>
|
|
|
|
<!-- ================= END_CHANGE_LOG_IN_TABLE_FORMAT ==================== -->
|
|
|
|
<!-- ====== PAGE_MARGIN_TO_RIGHT_OF_BLUE_EDGE_BOX_WITH_SECTION_CONTENTS ====== -->
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ########## END_BLUE_EDGE_BOX_WITH_SECTION_88_CONTENTS_CHANGE_LOG ########## -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell NormalText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
<!-- ################## BLUE_HEADER_SECTION_99 ######################## -->
|
|
|
|
<A NAME="References"></A>
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="SteelBlueCell"><SPAN class="HeaderText1">References</SPAN></TD>
|
|
<TD class="SteelBlueCell" WIDTH="15" ALIGN="left" VALIGN="center"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ################## END_BLUE_HEADER_SECTION_99 ######################## -->
|
|
|
|
|
|
<!-- ############## BEGIN_BLUE_EDGE_BOX_WITH_SECTION_99_CONTENTS ########### -->
|
|
|
|
<TABLE class="NormalText" width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="WhiteCellBlueEdgeAll">
|
|
|
|
<!-- ============ MINI_TOC_FOR_THIS_SECTION ============== -->
|
|
<!-- BR>
|
|
<BLOCKQUOTE><BLOCKQUOTE>
|
|
|
|
| <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> |
|
|
|
|
</BLOCKQUOTE></BLOCKQUOTE -->
|
|
|
|
<!-- ========== END_MINI_TOC_FOR_THIS_SECTION ============ -->
|
|
|
|
<!-- ================= LEVEL_1_REFERENCES_CITING ==================== -->
|
|
|
|
<A NAME="Citing"></A>
|
|
<BR><BR>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Citing SPARCLE:</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<!-- ======== SPARCLE_NAR_20170104_HARD_COPY ======= -->
|
|
|
|
<BLOCKQUOTE>
|
|
<TABLE border="0" cellpadding="4" class="ReferenceText">
|
|
|
|
<TR>
|
|
<TD width="18" valign="top" align="left"><A NAME="SPARCLE_201701_NAR"></A>
|
|
<A HREF="/pubmed/27899674"><img border=0 align=left height=15 width=15 style="margin-right:0.2em" src="../../IMG/PubMed.gif"></A></TD>
|
|
<TD valign="top" align="left"><A HREF="/pubmed/27899674">Marchler-Bauer A, Bo Y, Han L, He J, Lanczycki CJ, Lu S, Chitsaz F, Derbyshire MK, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Lu F, Marchler GH, Song JS, Thanki N, Wang Z, Yamashita RA, Zhang D, Zheng C, Geer LY, Bryant SH. <B>CDD/SPARCLE: functional classification of proteins via subfamily domain architectures.</B> <B><I>Nucleic Acids Res.</I></B> <B>2017</B> Jan 4;45(D1):D200-D203. doi: 10.1093/nar/gkw1129. Epub 2016 Nov 29. [PubMed PMID: 27899674]</A> <A HREF="https://academic.oup.com/nar/article-lookup/doi/10.1093/nar/gkw1129">[Full Text at Oxford Academic]</A> <!-- A HREF="/pmc/articles/PMC5210587/">[Full Text in PubMed Central]</A --></TD>
|
|
<TD width="124" align="center" valign="top"><A href="/pmc/articles/PMC5210587/"><img src="/Structure/IMG/full_text/pubmed-pmc.gif" WIDTH="120" HEIGHT="28" BORDER="0" ALT="Click here to read"></A></TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ======== END_SPARCLE_NAR_20170104_HARD_COPY ======= -->
|
|
|
|
<!-- ======== SPARCLE_NAR_20161129_EPUB ======= -->
|
|
|
|
<!-- BLOCKQUOTE>
|
|
<TABLE border="0" cellpadding="4" class="ReferenceText">
|
|
|
|
<TR>
|
|
<TD width="18" valign="top" align="left"><A NAME="SPARCLE_20161129_NAR_ePub"></A><A HREF="/pubmed/27899674"><img border=0 align=left height=15 width=15 style="margin-right:0.2em" src="../../IMG/PubMed.gif"></A></TD>
|
|
<TD valign="top" align="left" colspan="2"><!-- A HREF="/pubmed/27899674"><A HREF="http://nar.oxfordjournals.org/cgi/content/abstract/gkw1129?ijkey=flTteKSH6X26cio&keytype=ref">Marchler-Bauer A, Yu B, Han L, He J, Lanczycki CJ, Lu S, Chitsaz F, Derbyshire MK, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Lu F, Marchler GH, Song JS, Thanki N, Wang Z, Yamashita RA, Zhang D, Zheng C, Geer LY, Bryant SH. CDD/SPARCLE: functional classification of proteins via subfamily domain architectures. <B><I>Nucleic Acids Res.</I> [2017]</B> [Jan;XX(Database issue):DXXX-XX.] Epub 2016 Nov 29.[PubMed PMID: 27899674]</A> <A HREF="http://nar.oxfordjournals.org/cgi/content/full/gkw1129?ijkey=flTteKSH6X26cio&keytype=ref">[Full Text]</A> <A HREF="http://nar.oxfordjournals.org/cgi/reprint/gkw1129?ijkey=flTteKSH6X26cio&keytype=ref">[PDF]</A></TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
</BLOCKQUOTE -->
|
|
|
|
<!-- ======== END_SPARCLE_NAR_20161129_EPUB ======= -->
|
|
|
|
<!-- ======== SPARCLE_WEB_CITATION ======= -->
|
|
|
|
<!-- BLOCKQUOTE>
|
|
<TABLE border="0" cellpadding="4" class="ReferenceText">
|
|
|
|
<TR>
|
|
<TD align="left" class="NormalText" colspan="3">To cite <A HREF="../icn3d.html">iCn3D</A>, please use the following format, based on <I><A HREF="/books/bv.fcgi?rid=citmed.TOC&depth=2">Citing Medicine</A>: The NLM Style Guide for Authors, Editors, and Publishers</I> (2nd ed., 2007), <A HREF="/books/bv.fcgi?rid=citmed.chapter.57255">Chapter 24</A>: Databases/Retrieval Systems on the Internet.
|
|
</TD>
|
|
</TR>
|
|
|
|
<TR>
|
|
<TD width="18" valign="top" align="left"><A NAME="SPARCLE_web_citation"></A><A NAME="CitingiSPARCLEWebSite"></A><A HREF="/sparcle"><img border=0 align=left height=15 width=15 style="margin-right:0.2em" src="../../IMG/PubMed.gif"></A></TD>
|
|
<TD valign="top" align="left" colspan="2"><A HREF="/sparcle">SPARCLE [Internet]. Bethesda (MD): National Library of Medicine (US), National Center for Biotechnology Information. 2016 - [cited YYYY MMM DD]. Available from: /sparcle</A></TD>
|
|
</TR>
|
|
|
|
</TABLE>
|
|
</BLOCKQUOTE -->
|
|
|
|
<!-- ======== END_SPARCLE_WEB_CITATION ======= -->
|
|
|
|
<!-- ================ LEVEL_1_REFERENCES_ADDITIONAL ================== -->
|
|
|
|
<A NAME="ReferencesAdditional"></A>
|
|
<A NAME="AdditionalReferences"></A>
|
|
|
|
<P class="indent20">
|
|
<SPAN class="HeaderText3"><B>Additional references:</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P>
|
|
|
|
<BLOCKQUOTE>
|
|
<TABLE border="0" cellpadding="4" class="ReferenceText">
|
|
|
|
<!-- ======== CDD_SPARCLE_NAR_202001_HARD_COPY ======= -->
|
|
|
|
<!-- TR>
|
|
<TD width="18" valign="top" align="left"><A NAME="CDD_SPARCLE_202001_NAR"></A>
|
|
<A HREF="/pubmed/31777944"><img border=0 align=left height=15 width=15 style="margin-right:0.2em" src="../../IMG/PubMed.gif"></A></TD>
|
|
<TD valign="top" align="left"><A HREF="/pubmed/31777944">Lu S, Wang J, Chitsaz F, Derbyshire MK, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Marchler GH, Song JS, Thanki N, Yamashita RA, Yang M, Zhang D, Zheng C, Lanczycki CJ, Marchler-Bauer A. <B>CDD/SPARCLE: CDD/SPARCLE: the conserved domain database in 2020.</B> <B><I>Nucleic Acids Res.</I></B> <B>2020</B> Jan __;__(D1):D___-D___. doi: 10.1093/nar/gkz991. Epub 2019 Nov 28. [PubMed PMID: 31777944]</A> <A HREF="https://academic.oup.com/nar/advance-article/doi/10.1093/nar/gkz991/5645006">[Full Text at Oxford Academic]</A></TD>
|
|
<TD width="124" align="center" valign="top"><A href="/pmc/articles/______/"><img src="/Structure/IMG/full_text/pubmed-pmc.gif" WIDTH="120" HEIGHT="28" BORDER="0" ALT="Click here to read"></A></TD>
|
|
</TR -->
|
|
|
|
<!-- ======== END_CDD_SPARCLE_NAR_202001_HARD_COPY ======= -->
|
|
|
|
<!-- ======== CDD_SPARCLE_NAR_20191128_EPUB ======= -->
|
|
|
|
<TR>
|
|
<TD width="18" valign="top" align="left"><A NAME="CDD_SPARCLE_201911_NAR"></A>
|
|
<A HREF="/pubmed/31777944"><img border=0 align=left height=15 width=15 style="margin-right:0.2em" src="../../IMG/PubMed.gif"></A></TD>
|
|
<TD valign="top" align="left">
|
|
<A HREF="/pubmed/31777944?dopt=AbstractPlus">Lu S, Wang J, Chitsaz F, Derbyshire MK, Geer RC, Gonzales NR, Gwadz M, Hurwitz DI, Marchler GH, Song JS, Thanki N, Yamashita RA, Yang M, Zhang D, Zheng C, Lanczycki CJ, Marchler-Bauer A. CDD/SPARCLE: the conserved domain database in 2020. <B><I>Nucleic Acids Res.</I> 2019</B> Nov 28. pii: gkz991. doi: 10.1093/nar/gkz991. [Epub ahead of print] [PubMed PMID: 31777944]</A> <A HREF="https://academic.oup.com/nar/advance-article/doi/10.1093/nar/gkz991/5645006">[Full Text at Oxford Academic]</A><BR><BR>
|
|
|
|
<I>(NOTE: The above reference is for the <B>e-publication ahead of print</B>, and will be updated to reflect the volume, issue, pages, and publication date of the print version, once it becomes available in January 2020.)</I>
|
|
|
|
</TD>
|
|
<TD width="124" align="center" valign="top"><!-- A href="/pmc/articles/__________/"><img src="/Structure/IMG/full_text/pubmed-pmc.gif" WIDTH="120" HEIGHT="28" BORDER="0" ALT="Click here to read"></A --></TD>
|
|
</TR>
|
|
|
|
<!-- ======== END_CDD_SPARCLE_NAR_20191128_EPUB ======= -->
|
|
|
|
<!-- ======== CROSS_REFERENCE_TO_CDD_PUBLICATIONS_PAGE ======= -->
|
|
<TR>
|
|
<TD width="18" valign="top" align="left"><A HREF="../../cdd/docs/cdd_publications.html"><img border=0 align=left height=15 width=15 style="margin-right:0.2em" src="../../IMG/PubMed.gif"></A></TD>
|
|
<TD align="left">A separate page lists <A HREF="../../cdd/docs/cdd_publications.html"><SPAN class="ThumbText"><B>all publications</B></SPAN></A> about NCBI's <A HREF="../../cdd/cdd.shtml"><SPAN class="ThumbText"><B>Conserved Domains and Protein Classification Resources</B></SPAN></A>.<BR><BR>
|
|
</TD>
|
|
<TD width="124" align="center" valign="top"><!-- A href="/pmc/articles/______/"><img src="/Structure/IMG/full_text/pubmed-pmc.gif" WIDTH="120" HEIGHT="28" BORDER="0" ALT="Click here to read"></A --></TD>
|
|
</TR>
|
|
<!-- ====== END_CROSS_REFERENCE_TO_CDD_PUBLICATIONS_PAGE ===== -->
|
|
|
|
</TABLE>
|
|
</BLOCKQUOTE>
|
|
|
|
<!-- ====== PAGE_MARGIN_TO_RIGHT_OF_BLUE_EDGE_BOX_WITH_SECTION_CONTENTS ====== -->
|
|
|
|
</TD>
|
|
</TR>
|
|
</TABLE>
|
|
<!-- BR -->
|
|
|
|
<!-- ############### END_BLUE_EDGE_BOX_WITH_SECTION_99_CONTENTS ############ -->
|
|
|
|
<!-- ########### BEGIN_BLUE_HEADER_SECTION_N ############# -->
|
|
|
|
<A NAME="_________"></A>
|
|
|
|
<!-- TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="SteelBlueCell"><SPAN class="HeaderText1">___________________________</SPAN></TD>
|
|
<TD class="SteelBlueCell" WIDTH="15" ALIGN="left" VALIGN="center"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A></TD>
|
|
</TR>
|
|
</TABLE -->
|
|
|
|
<!-- ############## END_BLUE_HEADER_SECTION_N ############ -->
|
|
|
|
<!-- ########## BEGIN_BLUE_EDGE_BOX_WITH_SECTION_N_CONTENTS ########### -->
|
|
|
|
<!-- TABLE width="100%" border="0" cellspacing="0" cellpadding="0" bgcolor="#F0F8FF">
|
|
<TR>
|
|
<TD class="WhiteCellBlueEdgeAll NormalText" -->
|
|
|
|
<!-- ============ MINI_TOC_FOR_THIS_SECTION ============== -->
|
|
<!-- BR -->
|
|
<!-- BLOCKQUOTE><BLOCKQUOTE>
|
|
|
|
<A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A> | <A HREF="#_____">_________</A>
|
|
|
|
</BLOCKQUOTE></BLOCKQUOTE -->
|
|
|
|
<!-- ========== END_MINI_TOC_FOR_THIS_SECTION ============ -->
|
|
|
|
<!-- ================= LEVEL_1_TOPIC_xxxxxxxxxx ==================== -->
|
|
|
|
<A NAME="__________"></A>
|
|
|
|
<!-- P class="indent20">
|
|
<SPAN class="HeaderText3"><B>___________</B></SPAN> <img SRC="/Structure/IMG/spacer.gif" width="25" height="1" border="0"><A HREF="#Top"><img SRC="/Structure/IMG/arrowup_blue.gif" width="12" height="12" border="0" alt="back to top"></A>
|
|
</P -->
|
|
|
|
<!-- IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right"><IMG SRC="images/______" WIDTH="200" HEIGHT="100" BORDER="0" ALT="________" ALIGN="right">
|
|
<IMG SRC="images/______" WIDTH="20" HEIGHT="10" BORDER="0" ALT="________" ALIGN="right" -->
|
|
|
|
<!-- P class="indent20">
|
|
Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text.<BR><BR>
|
|
|
|
Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text.<BR><BR>
|
|
|
|
Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text.
|
|
</P>
|
|
|
|
<BR>
|
|
|
|
<BLOCKQUOTE>
|
|
|
|
Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text.
|
|
|
|
<UL>
|
|
|
|
<LI><A NAME="_________"></A><B>___Bullet1___</B></LI>
|
|
<UL>
|
|
<LI>Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text</LI>
|
|
</UL>
|
|
<BR>
|
|
|
|
<LI><A NAME="_________"></A><B>___Bullet2___</B></LI>
|
|
<UL>
|
|
<LI>Text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text text</LI>
|
|
</UL>
|
|
<BR>
|
|
|
|
</UL>
|
|
|
|
</BLOCKQUOTE -->
|
|
|
|
<!-- ============== END_LEVEL_1_TOPIC_xxxxxxxxxx =============== -->
|
|
|
|
<!-- ====== PAGE_MARGIN_TO_RIGHT_OF_BLUE_EDGE_BOX_WITH_SECTION_TEMPLATE_CONTENTS ====== -->
|
|
|
|
<!-- /TD>
|
|
</TR>
|
|
</TABLE -->
|
|
|
|
<!-- ############# END_BLUE_EDGE_BOX_WITH_SECTION_N_CONTENTS ############ -->
|
|
|
|
<!-- ==================== VERTICAL SPACER ======================= -->
|
|
|
|
<TABLE width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD class="WhiteCell NormalText"> </TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- ==================== END_VERTICAL SPACER ======================= -->
|
|
|
|
<!-- =================== PAGE_FOOTER_AND_DATE_REVISED ================== -->
|
|
|
|
<TABLE style="margin:0px 0px 0px 0px;" width="100%" border="0" cellspacing="0" cellpadding="0">
|
|
<TR>
|
|
<TD VALIGN="top" ALIGN="RIGHT"><SPAN CLASS="HELPBAR1" STYLE="color="#646464"">Revised 02 December 2019</SPAN></TD>
|
|
<TD width="25"><IMG SRC="/coreweb/template1/pix/pixel.gif" width="1" height="1" alt="" border=0></TD>
|
|
</TR>
|
|
</TABLE>
|
|
|
|
<!-- =================== END_PAGE_FOOTER_AND_DATE_REVISED ================== -->
|
|
|
|
|
|
<!-- ##################### END_EDITABLE_CONTENT ##################### -->
|
|
|
|
<script type="text/javascript">doc_tmp.getDocFtr('<A HREF="/research/">Computational Biology Branch</A> > <A HREF="/Structure/index.shtml">Structure Group</A> > <A HREF="/Structure/cdd/cdd.shtml">Conserved Domains and Protein Classification</A> > <A HREF="/sparcle/">SPARCLE</A> > Help','other');</script>
|
|
|
|
<!-- script type="text/javascript">doc_tmp.getDocFtr(' > <A HREF="______">__________</A> > <A HREF="______">__________</A> > <A HREF="______">__________</A> > ___currentpage_____', 'other');</script -->
|
|
|
|
</body>
|
|
</html>
|