nih-gov/www.ncbi.nlm.nih.gov/genome/doc/internatprot_nomenguide/index.html

<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">

    <head><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
        <!-- AppResources meta begin -->
        <meta name="paf-app-resources" content="" />
        <!-- AppResources meta end -->

        <!-- TemplateResources meta begin -->
        <meta name="paf_template" content="StdNCol" />

        <!-- TemplateResources meta end -->

        <!-- Page meta begin -->

        <!-- Page meta end -->

        <!-- Logger begin -->
        <meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_app" content="genbank" /><meta xmlns:ncbi-portal="http://ncbi.gov/portal/XSLT/namespace" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" name="ncbi_pdid" content="custom-page" />
        <!-- Logger end -->

        <title>International Protein Nomenclature Guidelines</title>

        <!-- PageFixtures headcontent begin -->

<meta name="cms-local-nav-url" content="https://cms.ncbi.nlm.nih.gov//genbank/_nav" />

        <!-- PageFixtures headcontent end -->

        <!-- AppResources external_resources begin -->
        <script type="text/javascript" src="/core/jig/1.15.6/js/jig.min.js"></script>

        <!-- AppResources external_resources end -->

        <!-- Page headcontent begin -->
        <meta name="subsite" content="genbank" />
<meta name="path" content="genbank/internatprot_nomenguide" />
<meta name="modified" content="2024-01-24T20:07:23Z" />
        <!-- Page headcontent end -->
        <!-- PageFixtures resources begin -->
        <link xmlns="http://www.w3.org/1999/xhtml" type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218191/css/4207974/4206132.css" xml:base="http://127.0.0.1/sites/static/header_footer" />

        <!-- PageFixtures resources end -->
    <link rel="shortcut icon" href="//www.ncbi.nlm.nih.gov/favicon.ico" /><meta name="ncbi_phid" content="CE8B65437C81A3A10000000000CF00A3.m_6" />
<meta name='referrer' content='origin-when-cross-origin'/><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/css/4121862/3974050/3917732/251717/4108189/14534/45193/3534283/4128070/3407145/4005757/4062871.css" /><link type="text/css" rel="stylesheet" href="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/css/3529741/3529739.css" media="print" /></head>
    <body class=" custom-page">
        <div class="grid">
            <div class="col twelve_col nomargin shadow">
                <!-- System messages like service outage or JS required; this is handled by the TemplateResources portlet -->
                <div class="sysmessages">
                    <noscript>
	<p class="nojs">
	<strong>Warning:</strong>
	The NCBI web site requires JavaScript to function.
	<a href="/guide/browsers/#enablejs" title="Learn how to enable JavaScript" target="_blank">more...</a>
	</p>
	</noscript>
                </div>
                <!--/.sysmessage-->
                <div class="wrap">
                    <div class="page">
                        <div xmlns:xi="http://www.w3.org/2001/XInclude">
    <div xmlns="http://www.w3.org/1999/xhtml" id="universal_header" xml:base="http://127.0.0.1/sites/static/header_footer">
	<section class="usa-banner">
		<div class="usa-accordion">
			<header class="usa-banner-header">
				<div class="usa-grid usa-banner-inner">
					<img src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/favicons/favicon-57.png" alt="U.S. flag" />
					<p>An official website of the United States government</p>
					<button class="non-usa-accordion-button usa-banner-button" aria-expanded="false" aria-controls="gov-banner-top" type="button">
						<span class="usa-banner-button-text">Here's how you know</span>
					</button>
				</div>
			</header>
			<div class="usa-banner-content usa-grid usa-accordion-content" id="gov-banner-top" aria-hidden="true">
				<div class="usa-banner-guidance-gov usa-width-one-half">
					<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-dot-gov.svg" alt="Dot gov" />
					<div class="usa-media_block-body">
						<p>
							<strong>The .gov means it's official.</strong>
							<br />
							Federal government websites often end in .gov or .mil. Before
							sharing sensitive information, make sure you're on a federal
							government site.
						</p>
					</div>
				</div>
				<div class="usa-banner-guidance-ssl usa-width-one-half">
					<img class="usa-banner-icon usa-media_block-img" src="https://www.ncbi.nlm.nih.gov/coreutils/uswds/img/icon-https.svg" alt="Https" />
					<div class="usa-media_block-body">
						<p>
							<strong>The site is secure.</strong>
							<br />
							The <strong>https://</strong> ensures that you are connecting to the
							official website and that any information you provide is encrypted
							and transmitted securely.
						</p>
					</div>
				</div>
			</div>
		</div>
	</section>
	<div class="usa-overlay"></div>
	<header class="ncbi-header" role="banner" data-section="Header">

		<div class="usa-grid">
			<div class="usa-width-one-whole">

				<div class="ncbi-header__logo">
					<a href="/" class="logo" aria-label="NCBI Logo" data-ga-action="click_image" data-ga-label="NIH NLM Logo">
						<img src="https://www.ncbi.nlm.nih.gov/coreutils/nwds/img/logos/AgencyLogo.svg" alt="NIH NLM Logo" />
					</a>
				</div>

				<div class="ncbi-header__account">
					<a id="account_login" href="https://account.ncbi.nlm.nih.gov" class="usa-button header-button" style="display:none" data-ga-action="open_menu" data-ga-label="account_menu">Log in</a>
					<button id="account_info" class="header-button" style="display:none" aria-controls="account_popup" type="button">
						<span class="fa fa-user" aria-hidden="true">
							<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20px" height="20px">
								<g style="fill: #fff">
									<ellipse cx="12" cy="8" rx="5" ry="6"></ellipse>
									<path d="M21.8,19.1c-0.9-1.8-2.6-3.3-4.8-4.2c-0.6-0.2-1.3-0.2-1.8,0.1c-1,0.6-2,0.9-3.2,0.9s-2.2-0.3-3.2-0.9    C8.3,14.8,7.6,14.7,7,15c-2.2,0.9-3.9,2.4-4.8,4.2C1.5,20.5,2.6,22,4.1,22h15.8C21.4,22,22.5,20.5,21.8,19.1z"></path>
								</g>
							</svg>
						</span>
						<span class="username desktop-only" aria-hidden="true" id="uname_short"></span>
						<span class="sr-only">Show account info</span>
					</button>
				</div>

				<div class="ncbi-popup-anchor">
					<div class="ncbi-popup account-popup" id="account_popup" aria-hidden="true">
						<div class="ncbi-popup-head">
							<button class="ncbi-close-button" data-ga-action="close_menu" data-ga-label="account_menu" type="button">
								<span class="fa fa-times">
									<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 48 48" width="24px" height="24px">
										<path d="M38 12.83l-2.83-2.83-11.17 11.17-11.17-11.17-2.83 2.83 11.17 11.17-11.17 11.17 2.83 2.83 11.17-11.17 11.17 11.17 2.83-2.83-11.17-11.17z"></path>
									</svg>
								</span>
								<span class="usa-sr-only">Close</span></button>
							<h4>Account</h4>
						</div>
						<div class="account-user-info">
							Logged in as:<br />
							<b><span class="username" id="uname_long">username</span></b>
						</div>
						<div class="account-links">
							<ul class="usa-unstyled-list">
								<li><a id="account_myncbi" href="/myncbi/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_myncbi">Dashboard</a></li>
								<li><a id="account_pubs" href="/myncbi/collections/bibliography/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_pubs">Publications</a></li>
								<li><a id="account_settings" href="/account/settings/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_settings">Account settings</a></li>
								<li><a id="account_logout" href="/account/signout/" class="set-base-url" data-ga-action="click_menu_item" data-ga-label="account_logout">Log out</a></li>
							</ul>
						</div>
					</div>
				</div>

			</div>
		</div>
	</header>
	<div role="navigation" aria-label="access keys">
		<a id="nws_header_accesskey_0" href="https://www.ncbi.nlm.nih.gov/guide/browsers/#ncbi_accesskeys" class="usa-sr-only" accesskey="0" tabindex="-1">Access keys</a>
		<a id="nws_header_accesskey_1" href="https://www.ncbi.nlm.nih.gov" class="usa-sr-only" accesskey="1" tabindex="-1">NCBI Homepage</a>
		<a id="nws_header_accesskey_2" href="/myncbi/" class="set-base-url usa-sr-only" accesskey="2" tabindex="-1">MyNCBI Homepage</a>
		<a id="nws_header_accesskey_3" href="#maincontent" class="usa-sr-only" accesskey="3" tabindex="-1">Main Content</a>
		<a id="nws_header_accesskey_4" href="#" class="usa-sr-only" accesskey="4" tabindex="-1">Main Navigation</a>
	</div>
	<section data-section="Alerts">
		<div class="ncbi-alerts-placeholder"></div>
	</section>
</div>
</div>
                        <!--/.header-->
                        <div class="header">
                            <div class="res_logo"><h1 class="res_name"><a href="/genbank/" title="GenBank home">GenBank</a></h1><h2 class="res_tagline">Public nucleic acid sequence repository</h2></div>
                            <div class="search"><form method="get" action="/nuccore/"><div class="search_form"><label for="database" class="offscreen_noflow">Search database</label><select id="database"><optgroup label="Recent"><option value="nuccore" selected="selected">Nucleotide</option><option value="sra">SRA</option><option value="books">Books</option><option value="omim" class="last">OMIM</option></optgroup><optgroup label="All"><option value="gquery">All Databases</option><option value="assembly">Assembly</option><option value="biocollections">Biocollections</option><option value="bioproject">BioProject</option><option value="biosample">BioSample</option><option value="books">Books</option><option value="clinvar">ClinVar</option><option value="cdd">Conserved Domains</option><option value="gap">dbGaP</option><option value="dbvar">dbVar</option><option value="gene">Gene</option><option value="genome">Genome</option><option value="gds">GEO DataSets</option><option value="geoprofiles">GEO Profiles</option><option value="gtr">GTR</option><option value="ipg">Identical Protein Groups</option><option value="medgen">MedGen</option><option value="mesh">MeSH</option><option value="nlmcatalog">NLM Catalog</option><option value="nuccore">Nucleotide</option><option value="omim">OMIM</option><option value="pmc">PMC</option><option value="protein">Protein</option><option value="proteinclusters">Protein Clusters</option><option value="protfam">Protein Family Models</option><option value="pcassay">PubChem BioAssay</option><option value="pccompound">PubChem Compound</option><option value="pcsubstance">PubChem Substance</option><option value="pubmed">PubMed</option><option value="snp">SNP</option><option value="sra">SRA</option><option value="structure">Structure</option><option value="taxonomy">Taxonomy</option><option value="toolkit">ToolKit</option><option value="toolkitall">ToolKitAll</option><option value="toolkitbookgh">ToolKitBookgh</option></optgroup></select><div class="nowrap"><label for="term" class="offscreen_noflow" accesskey="/">Search term</label><div class="nowrap"><input type="text" name="term" id="term" title="Search Nucleotide" value="" class="jig-ncbiclearbutton jig-ncbiautocomplete" data-jigconfig="isEnabled:false,disableUrl:'NcbiSearchBarAutoComplCtrl'" autocomplete="off" data-sbconfig="ds:'no',pjs:'no',afs:'yes'" /></div><button id="search" type="submit" class="button_search nowrap" cmd="go">Search</button></div></div></form></div>

                        </div>
                        <div class="nav_and_browser">
    <div class="localnav"><ul class="jig-ncbilocalnav">
<li><a href="#">GenBank</a><ul>
<li><a href="/genbank/">About GenBank</a></li>
<li><a href="/genbank/submit_types">Submission Types</a></li>
<li><a href="/genbank/submit">Submission Tools</a></li>
<li><a href="/genbank/update">Update GenBank Records</a></li>
<li><a href="/nuccore/">Search</a></li>
<li><a href="/BLAST/Blast.cgi?CMD=Web&amp;PAGETYPE=BLASTHome">BLAST</a></li>
<li><a href="/genbank/statistics">Statistics</a></li>
<li><a href="/genbank/samplerecord/">Sample Record</a></li>
<li><a href="/genbank/sequencerevisionhistory/">Revision History</a></li>
<li><a href="/genbank/sequenceids/">Sequence IDs</a></li>
</ul>
</li>
<li><a href="#">Submit</a><ul>
<li><a href="/genbank/submit">Submission Tools</a></li>
<li><a href="/genbank/submit_types">Submission Types</a></li>
<li><a href="/WebSub/?tool=genbank">BankIt</a></li>
<li><a href="/genbank/table2asn">table2asn</a></li>
<li><a href="https://www.ncbi.nlm.nih.gov/sra/docs/sequence-data-processing">Sequence Data Processing</a></li>
</ul>
</li>
<li><a href="#">Genomes</a><ul>
<li><a href="/genbank/genomesubmit">Complete Genome Submission Guide</a></li>
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Genome Annotation Guide</a></li>
<li><a href="/genbank/eukaryotic_genome_submission_annotation">Eukaryotic Genome Annotation Guide</a></li>
<li><a href="/genbank/examples.wgs">Annotation Examples</a></li>
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/wgs/">Genome Submission Portal</a></li>
</ul>
</li>
<li><a title="Whole Genome Shotgun sequences and submissions" href="#">WGS</a><ul>
<li><a href="/genbank/wgs">About WGS</a></li>
<li><a href="/Traces/wgs">WGS Project List</a></li>
<li><a href="/genbank/wgs.submit">WGS Submission Guide</a></li>
<li><a href="/genbank/wgsfaq/">FAQ</a></li>
<li><a href="https://submit.ncbi.nlm.nih.gov/subs/wgs/">Genome Submission Portal</a></li>
<li><a href="/genbank/eukaryotic_genome_submission_annotation">Eukaryotic Annotation Guide</a></li>
<li><a href="/genbank/genomesubmit_annotation">Prokaryotic Annotation Guide</a></li>
<li><a href="/genbank/asndisc">Discrepancy Report</a></li>
<li><a href="/assembly/agp/AGP_Specification/">AGP format</a></li>
</ul>
</li>
<li><a href="#">Metagenomes</a><ul>
<li><a href="/genbank/metagenome">About Metagenomes</a></li>
<li><a href="/genbank/structuredcomment">Structured Comment</a></li>
</ul>
</li>
<li><a href="#">TPA</a><ul>
<li><a href="/genbank/TPA">About TPA</a></li>
<li><a href="/genbank/tpafaq">FAQ</a></li>
<li><a href="/genbank/TPA-Exp">TPA-Exp</a></li>
<li><a href="/genbank/TPA-Inf">TPA-Inf</a></li>
</ul>
</li>
<li><a href="#">TSA</a><ul>
<li><a href="/genbank/TSA">About TSA</a></li>
<li><a href="/genbank/TSAguide">TSA Submission Guide</a></li>
<li><a href="/genbank/TSAfaq">FAQ</a></li>
</ul>
</li>
<li><a href="#">INSDC</a><ul>
<li><a href="/genbank/collab">About INSDC</a></li>
<li><a href="/genbank/collab/country">Geographic Location Name List</a></li>
<li><a href="/genbank/collab/db_xref">db_xref List</a></li>
<li><a href="http://www.insdc.org/documents/feature_table.html">Feature Table</a></li>
</ul>
</li>
<li><a href="#">Documentation</a><ul>
<li><a href="https://www.ncbi.nlm.nih.gov/sra/docs/sequence-data-processing/">Sequence Data Processing</a></li>
<li><a href="/genbank/submission_brokers">Submission Brokers</a></li>
<li><a href="/genbank/acc_prefix">Accession Number Prefixes</a></li>
<li><a href="/genbank/organelle_submit/">Organelle Submission Guide</a></li>
<li><a href="/genbank/monkeypox_submission/">Monkeypox Submission Guide</a></li>
<li><a href="/genbank/validation/">Common Submission Errors</a> </li>
<li><a href="/genbank/sequencecheck/">Ribosomal Submission Errors</a></li>
<li><a href="/genbank/sequencecheck/virus">Common Sequence Errors</a></li>
<li><a href="https://support.nlm.nih.gov/knowledgebase/category/?id=CAT-01240">Submission FAQs</a></li>
</ul>
</li>
<li><a href="#">Other</a><ul>
<li><a href="/genbank/htgs">About HTGs</a></li>
<li><a href="/genbank/dbest">About EST</a></li>
<li><a href="/genbank/dbgss">About GSS</a></li>
<li><a href="/genbank/tls">About TLS</a></li>
<li><a href="/genbank/tlsguide">Submit TLS</a></li>
</ul>
</li>
</ul></div>
</div>

                        <!-- was itemctrl -->
                        <div class="container">
                            <div id="maincontent" class="content col twelve_col last">
                                <div class="col1">
                                    <h1 id="international-protein-nomenclatu">International Protein Nomenclature Guidelines</h1>


<p><strong>Mission statement</strong></p>


<p>These guidelines have been produced jointly by the European Bioinformatics Institute (EMBL-EBI), the National Center for Biotechnology Information (NCBI), the Protein Information Resource (PIR) and the Swiss Institute for Bioinformatics (SIB) and are intended for use by anyone who wants to name a protein to promote consistency in protein naming across databases, which aids data retrieval and improves communication.</p>


<p><strong>Table of contents</strong></p>


<div class="toc">
<ul>
<li><a href="#1-introduction">1. Introduction</a></li>
<li><a href="#2-formats-for-protein-names">2. Formats for Protein Names</a><ul>
<li><a href="#a-language">A. Language</a></li>
<li><a href="#b-abbreviations-and-symbols">B. Abbreviations and symbols</a></li>
<li><a href="#c-punctuation">C. Punctuation</a></li>
<li><a href="#d-notation">D. Notation</a></li>
<li><a href="#e-style-and-format">E. Style and format</a></li>
<li><a href="#f-word-usage">F. Word usage</a></li>
</ul>
</li>
<li><a href="#3-choosing-protein-names">3. Choosing Protein Names</a><ul>
<li><a href="#a-sources-of-protein-name-annota">A. Sources of protein name annotation</a></li>
<li><a href="#b-naming-procedure-for-specific-">B. Naming procedure for specific cases</a></li>
</ul>
</li>
</ul>
</div>


<h2 id="1-introduction">1. Introduction</h2>


<p>Consistent protein nomenclature is indispensable for communication, literature searching and entry retrieval. A good protein name is one which is unique, unambiguous, can be attributed to orthologs from other species and follows official gene nomenclature where applicable. The process of associating a name with a protein sequence has various components: sequence function identification/prediction, choosing a name and applying formatting. This document provides guidelines on naming choices and universal formatting. This does not include best practices on methods to be used for sequence function identification/prediction.</p>


<h2 id="2-formats-for-protein-names">2. Formats for Protein Names</h2>


<h3 id="a-language">A. Language</h3>


<ul>
<li>
<p><strong>Use American spelling, not British spelling </strong></p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>uncharacterized protein </em><em>not</em><em> uncharacterised protein</em></p>
</li>
<li>
<p><em>hemoglobin </em><em>not</em><em> haemoglobin</em></p>
</li>
</ul>
</li>
<li>
<p><strong>Use protein names ending in 'in' (not 'ine') </strong></p>
<p><em>Example: maurocalcin </em><em>not</em><em> maurocalcine</em></p>
</li>
<li>
<p><strong>Avoid diacritics such as accents, umlauts etc.</strong></p>
<p><em>Example: protein spatzle 5 </em><em>not</em><em> protein spätzle 5</em></p>
</li>
<li>
<p><strong>Avoid pluralization for names based on domain and repeat content</strong></p>
<p><em>Example: ankyrin repeat-containing protein </em><em>not</em><em> ankyrin repeats-containing protein</em></p>
</li>
<li>
<p><strong>Avoid common words</strong></p>
<p>Avoid naming proteins with common words which makes querying difficult e.g. avoid names such as ‘protein IMPACT’.</p>
</li>
<li>
<p><strong>Avoid duplication</strong></p>
<p>Check if the proposed name for a newly discovered protein is already used for a different protein.</p>
</li>
</ul>


<h3 id="b-abbreviations-and-symbols">B. Abbreviations and symbols</h3>


<ul>
<li>
<p><strong>Avoid using an abbreviation as the complete name</strong></p>
<p><em>Example: acyl carrier protein </em><em>not</em><em> ACP</em></p>
</li>
<li>
<p><strong>An abbreviation may be part of a protein name</strong></p>
<p><em>Example: (3R)-hydroxymyristoyl-ACP dehydratase</em></p>
<p>See below for a list of standard scientific abbreviations.</p>
</li>
<li>
<p><strong>Protein name based on a protein symbol (PS) or gene symbol (GS)</strong></p>
<p>Protein and gene symbols should use the same abbreviation. Some gene and protein symbols are easily recognized by database users in certain research communities and can be used as part of a protein name to provide specification and aid data retrieval.</p>
<p><strong>Prokaryote symbol guidelines</strong></p>
<ul>
<li>
<p>A protein symbol is most commonly used in prokaryote protein names in combination with a functional protein name.</p>
</li>
<li>
<p>The first letter of a protein symbol is capitalized for prokaryotes e.g. RecA.</p>
</li>
<li>
<p>In rare occurrences when there is no functional protein name, the format "protein &lt;PS&gt;" may be used, not "&lt;PS&gt; protein".</p>
</li>
</ul>
<p><em>Example: recombinase RecA</em></p>
<p><strong>Eukaryote symbol guidelines</strong></p>
<ul>
<li>
<p>A gene symbol is commonly used in eukaryote protein names in combination with a functional protein name.</p>
</li>
<li>
<p>Capitalization conventions of gene symbols differ between organism communities and this is reflected in the casing of gene symbols used as part of eukaryotic protein names. For vertebrates, use an all uppercase gene symbol in a protein name. For non-vertebrate eukaryotes, follow the gene casing conventions of the species in question.</p>
</li>
<li>
<p>In the case of conserved genes, if there is no known gene symbol in use in the species already, a known orthologous gene symbol from a species where the symbol was originally defined may be used.</p>
</li>
<li>
<p>In rare occurrences when there is no functional protein name, the format “protein &lt;GS&gt;” may be used, not “&lt;GS&gt; protein”.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p>Human: <em>tyrosine-protein kinase ABL1</em></p>
</li>
<li>
<p>Mouse: <em>tyrosine-protein kinase ABL1</em></p>
</li>
<li>
<p><em>C.elegans</em>: <em>tyrosine-protein kinase abl-1</em></p>
</li>
<li>
<p><em>D.melanogaster</em>: <em>tyrosine-protein kinase Abl</em></p>
</li>
<li>
<p><em>S.cerevisiae</em>: <em>recombinase RAD51</em></p>
</li>
<li>
<p><em>S.pombe: recombinase rad51</em></p>
</li>
</ul>
</li>
</ul>
</li>
<li>
<p><strong>Prime symbol (')</strong></p>
<ul>
<li>
<p>Use to indicate the cleavage location on a substrate and to distinguish different subunits with the same notation.</p>
</li>
<li>
<p>Use the single quote character (not the backtick) for the prime symbol.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>H(+)-transporting V0 sector ATPase subunit c'</em></p>
</li>
<li>
<p><em>5'-nucleotidase </em><em>not</em><em> 5-prime-nucleotidase</em></p>
</li>
<li>
<p><em>coatomer subunit beta' </em><em>not</em><em> coatomer subunit beta-prime</em></p>
</li>
</ul>
</li>
</ul>
</li>
<li>
<p><strong>Chemical symbols may be part of a protein name</strong></p>
<ul>
<li>
<p>For elements with a single valence type, use the full element name with no valence indicated.</p>
</li>
<li>
<p>For elements that have variable types of valency, use the chemical symbol for the element followed by the valence in parenthesis.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>sodium/lithium-exporting P-type ATPase </em><em>not</em><em> Na(+)/Li(+)-exporting P-type ATPase</em></p>
</li>
<li>
<p><em>magnesium transporter </em><em>not</em><em> Mg(2+) transporter</em></p>
</li>
<li>
<p><em>Fe(3+)/Cu(2+)-chelate reductase </em><em>not</em><em> ferric/cupric-chelate reductase </em><em>or not</em><em> Fe(III)/Cu(II)-chelate reductase</em></p>
</li>
</ul>
</li>
</ul>
</li>
<li>
<p><strong>Standard scientific abbreviations may be part of a protein name</strong></p>
<ul>
<li>
<p>Deoxyribonucleic acid: DNA, cDNA, dsDNA, ssDNA</p>
</li>
<li>
<p>Ribonucleic acid: dsRNA, mRNA, miRNA, piRNA, siRNA, snRNA, snoRNA, ssRNA, tRNA, tmRNA, rRNA</p>
</li>
<li>
<p>Mono-, di-, tri-nucleoside phosphates: dAMP, dCMP, dGMP, dTMP, dADP, dCDP, dGDP, dTDP, dATP, dCTP, dGTP, dTTP</p>
</li>
<li>
<p>Cofactors: FAD, FMN, NAD, NADP</p>
</li>
<li>
<p>Classes for transporters that inform about structure (e.g. ABC, MFS, RND, MATE, SMR) rather than substrate (e.g. <strong>not</strong> MDR)</p>
</li>
</ul>
<p><em>Example: rRNA methyltransferase </em><em>not</em><em> ribosomal RNA methyltransferase</em></p>
</li>
</ul>


<h3 id="c-punctuation">C. Punctuation</h3>


<ul>
<li>
<p><strong>Slash</strong></p>
<ul>
<li>
<p>Do not use a back slash: ‘\’.</p>
</li>
<li>
<p>For separating multiple domains or functions, the forward slash ‘/’ or the word ‘and’ may be used.</p>
</li>
</ul>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>adenylyltransferase/ADP-heptose synthase cyclohydrolase </em><em>not</em><em> adenylyltransferase\ADP-heptose synthase cyclohydrolase</em></p>
</li>
<li>
<p><em>WD repeat and FYVE domain-containing protein 3 </em><em>not</em><em> WD repeat\FYVE domain-containing protein 3</em></p>
</li>
</ul>
</li>
<li>
<p><strong>Hyphen</strong></p>
<ul>
<li>
<p><strong>Compound adjective:</strong> a hyphen should be used to form compound modifiers (i.e. two or more words that are acting as a single modifier for a noun)</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>Ras GTPase-activating protein </em><em>not</em><em> Ras GTPase activating protein</em></p>
</li>
<li>
<p><em>secretin-binding protein </em><em>not</em><em> secretin binding protein</em></p>
</li>
<li>
<p><em>pyrophosphate-dependent phosphofructokinase </em><em>not</em><em> pyrophosphate dependent phosphofructokinase</em></p>
</li>
</ul>
</li>
<li>
<p><strong>Examples of common modifiers:</strong> activated, activating, adapting, adding, amplified, anchored, anchoring, antagonizing, associated, associating, attracting, binding, blocking, bound, branching, bridging, bundling, capping, complementing, concentrating, conjugating, containing, controlled, controlling, converting, coupled, coupling, decapping, degrading, dependent, depolymerizing, derepressing, derived, deriving, destabilizing, docking, editing, enhanced, enhancing, enriched, exposed, flanking, forming, gated, grabbing, harvesting, independent, induced, inducible, inducing, inhibited, inhibiting, insensitive, interacting, laying, like, linked, linking, metabolizing, modifying, modulating, polymerizing, potentiating, preventing, processing, promoting, recognizing, recruited, recruiting, regulated, regulating, related, released, releasing, remodeling, removing, repressing, required, requiring, resistant, responsive, rich, ripening, scaffolding, sensing, sensitive, signaling, specific, splicing, spreading, stabilized, stabilizing, stacking, stimulated, stimulating, structuring, sulfating, suppressing, trafficking, transformed, transforming, transporting</p>
</li>
<li>
<p><strong>More than one domain/repeat in a name:</strong> if there is more than one domain/repeat, only use a hyphen for the last item preceding "containing", even though this violates conventional grammar.</p>
<p><em>Example: ankyrin repeat and SAM domain-containing protein 6 </em><em>not</em><em> ankyrin repeat- and SAM domain-containing protein 6</em></p>
</li>
</ul>
</li>
<li>
<p><strong>Avoid apostrophes, periods, commas and other undesirable punctuation</strong></p>
<ul>
<li>
<p>Remove trailing periods from names.</p>
</li>
<li>
<p>Avoid use of commas except when their usage is part of accepted chemical names.</p>
<p><em>Example: SGT2 family TPR domain-containing protein </em><em>not</em><em> TPR repeat protein, SGT2 family</em></p>
<p><em><strong>Exception</strong> example: 3-hydroxy-16-methoxy-2,3-dihydrotabersonine N-methyltransferase</em></p>
</li>
<li>
<p>Avoid the semi-colon ";" or colon “:” except when it is part of an enzyme name.</p>
<p><em>Example</em>: <em>type I cuticular keratin Ha8 </em><em>not</em><em> “Keratin, type I cuticular Ha8; Hair keratin,type I Ha8; Keratin-38; K38”</em></p>
<p><em><strong>Exception</strong> example: phospholipid:diacylglycerol acyltransferase</em></p>
</li>
<li>
<p>Avoid the percentage sign ‘%’</p>
</li>
<li>
<p>Avoid the at sign '@’</p>
</li>
<li>
<p>Avoid the equal sign ‘=’</p>
</li>
</ul>
<p><em>Example: guanine nucleotide-binding protein G(t) subunit alpha-3 </em><em>not</em><em> gustducin:SUBUNIT=alpha</em></p>
</li>
<li>
<p><strong>Avoid autocorrection of protein names</strong></p>
<ul>
<li>Data submitters should not let Microsoft Excel, Word, Outlook, or any other utility with format interpolation and spelling autocorrection touch any protein names, especially those with quotes and double-hyphens.</li>
</ul>
</li>
</ul>


<h3 id="d-notation">D. Notation</h3>


<ul>
<li>
<p><strong>Use Arabic rather than Roman numerals</strong></p>
<p>Use Arabic numbers for notation (e.g. 1, 2, 3, etc.) unless Roman numerals are a widely accepted formal nomenclature like “RNA polymerase II”</p>
<p><em>Example: caveolin-2 </em><em>not</em><em> caveolin-II</em></p>
<p><em><strong>Exception</strong> example: DNA-directed RNA polymerase II core subunit RPB2</em></p>
</li>
<li>
<p><strong>Specifying different members encoded by a multigene family</strong></p>
<p>Use Arabic numbers to specify the different members encoded by a multigene family. Refrain from inventing new numbers if a notation system for protein/gene family members has been previously published.</p>
</li>
</ul>


<h3 id="e-style-and-format">E. Style and format</h3>


<ul>
<li>
<p><strong>Capitalization</strong></p>
<p>Use lowercase except for acronyms or proper nouns.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>proteasome core particle subunit beta 5 </em><em>not</em><em> Proteasome CORE PARTICLE subunit BETA 5</em></p>
</li>
<li>
<p><em>enolase </em><em>not</em><em> ENOLASE</em></p>
</li>
</ul>
</li>
<li>
<p><strong>Greek letters</strong></p>
<ul>
<li>
<p>Greek letters should be written in full and entirely in lower case when indicating one of a series of proteins e.g. "alpha", "beta", “gamma”.</p>
</li>
<li>
<p>In the context of steroid/fatty acid metabolism nomenclature, “Delta” should start with an upper case letter.</p>
</li>
</ul>
</li>
<li>
<p><strong>Usage of the term 'protein' in a name</strong></p>
<ul>
<li>
<p>Avoid if not necessary, especially when the name includes terms such as "factor", "enzyme", "inhibitor" or "regulator".</p>
</li>
<li>
<p>Enzyme names commonly end with 'ase' (aminoacylase, arginase, etc). Do not append the term ‘protein’ to the enzyme name.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>Fe(3+) uptake regulator </em><em>not</em><em> Fe(3+) uptake regulator protein</em></p>
</li>
<li>
<p><em>ribonuclease </em><em>not</em><em> ribonuclease protein</em></p>
</li>
</ul>
</li>
</ul>
</li>
<li>
<p><strong>Usage of the term ‘enzyme’ in a name</strong></p>
<p>Enzyme names commonly end with ‘ase’ (tautomerase, phosphotransferase, etc). Do not append the term ‘enzyme’ to the enzyme name.</p>
</li>
<li>
<p><strong>Protein name based on a pathway</strong></p>
<p>Use this format: "&lt;Pathway&gt; synthesis protein &lt;GS&gt;"</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>thiamine synthesis protein ThiC</em></p>
</li>
<li>
<p><em>folic acid synthesis protein FOL1 </em><em>not</em><em> trifunctional dihydropteroate synthetase/dihydrohydroxymethylpterin pyrophosphokinase/dihydroneopterin aldolase FOL1</em></p>
</li>
</ul>
</li>
<li>
<p><strong>Transfer enzymes</strong></p>
<p>Transfer enzymes are often indicated with the source and destination substrate separated by a double hyphen (--).</p>
<p><em>Example: formylmethanofuran--tetrahydromethanopterin formyltransferase</em></p>
</li>
<li>
<p><strong>tRNA-charging enzymes</strong></p>
<p>Use this format: &lt;amino acid being attached&gt;--tRNA (tRNA type using the three-letter amino acid code with the first letter capitalized) ligase.</p>
<p><em>Example: tyrosine--tRNA (Tyr) ligase</em></p>
</li>
<li>
<p><strong>Identifier types to avoid</strong></p>
<p>COG ID, EC number, FOG ID, GO terms, cluster identifiers.
  Stable locus tags and stable HMM identifiers should be used only in special situations in which they point to families of proteins, and this is made clear by a qualifier in the protein name such as “family protein” or “domain-containing protein”. They should not be used for naming low copy conserved proteins. A protein name based on a locus tag (e.g. MA_1614) can never be transferred by homology, even to identical proteins, because locus tags indicate a position in one specific genome. All use of locus tags in protein names is discouraged because of the danger that simplistic annotation methods can too easily make overly specific (and therefore incorrect) assertions. The one exception is the use of a locus tag in combination with a “family” qualifier, where the locus tag is frequently used in the literature from an annotation present in the INSDC and frequently used in comparative analyses and it is necessary to distinguish among proteins that otherwise would receive insufficiently informative names, e.g. “BB3110 family autotransporter”. Names based on a Hidden Markov Model (HMM) identifier similarly may be used to improve clarity. These too must be qualified by the terms “family protein” or “domain-containing protein”. See section 3B about Novel proteins of unknown function.</p>
</li>
<li>
<p><strong>Avoid kingdom, genus or species-specific characteristics in a name</strong></p>
<ul>
<li>
<p>Avoid expression, abundance information, disease, phenotype and anatomy-related information.</p>
</li>
<li>
<p>Avoid cellular, subcellular and environmental location. Location information is not always transferable among all organisms and should be applied conservatively.</p>
</li>
<li>
<p>Avoid molecular weight except for ribosomal proteins and well-established historical names, e.g. myosins, clathrins, dyneins.</p>
<p><strong>Exception</strong> examples:
-   <em>Eukaryotes: 60S ribosomal protein subunit L19B</em>
-   <em>Prokaryotes: 50S ribosomal protein subunit L1</em>
-   <em>myosin heavy chain 1</em>
    -   Avoid referencing chromosomal or cytogenetic locations of the gene
       <em>Example: methylcytosine dioxygenase TET1 </em><em>not</em><em><em> </em>ten-eleven translocation-1</em></p>
</li>
<li>
<p>Avoid locus_tag identifiers.</p>
</li>
<li>
<p>Avoid regulatory content such as ‘regulated by’, ‘regulates’.</p>
</li>
<li>
<p>Avoid organism names or abbreviations of species/genus/kingdom of origin or homologous species. An exception to this is adjectival organism names which can be included in rare cases where it will make a name more descriptive and less general.</p>
<p><em><strong>Exception</strong> example: staphylococcal nuclease domain-containing protein 1</em></p>
</li>
</ul>
</li>
</ul>


<h3 id="f-word-usage">F. Word usage</h3>


<ul>
<li>
<p><strong>Avoid linking words and phrases</strong></p>
<ul>
<li>Avoid the following linking words: for, or (as in name1 or name2), of, to, with.</li>
</ul>
<p><em>Example</em>: <em>two-component system sensor histidine kinase </em><em>not</em><em> histidine kinase sensor of two component system</em></p>
<ul>
<li>Avoid the following linking phrases: also known as, together with.</li>
</ul>
</li>
<li>
<p><strong>Other phrases to avoid</strong></p>
<ul>
<li>cell surface, cell surface protein, conserved hypothetical, hypothetical conserved, identified by, identity to, involved in, implicated in, protein domain protein, protein of unknown function, protein hypothetical, protein protein, protein putative, putative putative, questionable protein, related to, signal peptide protein, similar to, surface antigen, surface protein, unknown protein, authentic point mutation, low quality protein, C term(inal), N term(inal), inactivated derivative, conserved uncharacterized, uncharacterized conserved</li>
</ul>
</li>
<li>
<p><strong>Terms to avoid</strong></p>
<ul>
<li>antigen, CDS, conserved, cytoplasmic, deletion, dubious, doubtful, expressed, fragment, frame shift, frameshift, genome, homolog (unless phylogenetically determined), interrupt, KDa, K Da, likely, locus, locus_tag, novel, ORF, partial, possible, potential, predicted, probable, pseudo, pseudogene, secreted, strongly, truncat(ed), under, unique, unnamed, WGS, Xray, X-ray</li>
<li>
<p>Naming proteins as antigens is discouraged but there may be rare exceptions to match widespread community/publication usage.</p>
<p><strong>Exception</strong> example: <em>cellular tumor antigen p53</em></p>
</li>
<li>
<p>Note that use of the term ‘putative’ is acceptable in certain cases - see the topic “Novel proteins of unknown function” in section 3B.</p>
</li>
</ul>
</li>
</ul>


<h2 id="3-choosing-protein-names">3. Choosing Protein Names</h2>


<h3 id="a-sources-of-protein-name-annota">A. Sources of protein name annotation</h3>


<p>Protein names are ideally supported by evidence from expert sources, the literature, HMMs and other protein signatures, and/or domain architectures. NCBI-RefSeq and UniProt aim to store, and publicly report, name source information of curated records which may include the expert database name, individual scientist name, PubMed ID, HMM ID, and curated domain architectures. The current rank of sources for protein naming is: a) expert sources &gt; b) experimental reports &gt; c) HMMs and other signatures &gt; d) domain architectures. Note that BLAST results, FASTA headers and definition lines in database records may contain information such as organism names and other information which should not be included in a protein name. Be aware that sources of functional protein annotation listed below do not necessarily meet all the international protein nomenclature guidelines. In particular, resources may not be available to retroactively update older data.</p>


<p><strong>a) Expert sources of specific and definitive names may include:</strong></p>


<p><strong>Species-specific naming authorities</strong></p>


<ul>
<li>
<p>Established and maintained database authorities such as species-specific nomenclature bodies (some are listed here: <a href="http://www.uniprot.org/docs/nomlist"><em>http://www.uniprot.org/docs/nomlist</em></a>).</p>
</li>
<li>
<p>Avoid names from species-specific authorities that relate to phenotype, anatomical features or any taxon-specific characteristics. In these cases, use the widely recognized gene symbol in combination with a functional name rather than a phenotypical name. For example, ‘minichromosome maintenance complex component 7’ is not applicable to organisms which do not have minichromosomes so to avoid transferring such a protein name, use the gene symbol MCM7 combined with a functional name instead.</p>
</li>
</ul>


<p><em>Example: DNA replication licensing factor MCM7 </em><em>not</em><em> minichromosome maintenance complex component 7</em></p>


<p><strong>Enzyme names from Enzyme Commission (EC)</strong></p>


<ul>
<li>
<p>Strong preference to use the preferred name when it is a specific and accurate reflection of the main function of the protein and the EC name is neither too general nor too specific to apply to a group of proteins.</p>
</li>
<li>
<p>In contrast, expert curators may override the EC name in certain circumstances such as when the name is not the primary function of the enzyme or they may choose an alternative EC name if the preferred EC name ends with a qualifier in parenthesis or contains two or more sets of brackets/parentheses.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>ABC transporter ATP-binding protein </em><em>not</em><em> ATPase</em></p>
</li>
<li>
<p><em>NADP-dependent isocitrate dehydrogenase IDP3</em> <strong>rather than</strong> <em>isocitrate</em> <em>dehydrogenase (NADP(+)) IDP3</em></p>
</li>
<li>
<p><em>phosphoribosylformimino-5-aminoimidazole carboxamide ribotide isomerase</em> <strong>rather than</strong> <em>1-(5-phosphoribosyl)-5-((5-phosphoribosylamino)methylideneamino)imidazole-4-carboxamide isomerase</em></p>
</li>
</ul>
</li>
<li>
<p>Keep the double hyphen '- -' used for transferases and ligases.</p>
<p><em>Example: formylmethanofuran--tetrahydromethanopterin formyltransferase</em></p>
</li>
<li>
<p>Use the following format for enzymes that remove or transfer phosphate groups: "&lt;modified_residues&gt;-protein &lt;activity&gt;”.</p>
<p><em>Example: tyrosine-protein phosphatase</em></p>
</li>
</ul>


<p><strong>UniProtKB/Swiss-Prot</strong></p>


<ul>
<li>UniProtKB/Swiss-Prot name of an orthologous or paralogous protein, provided that it meets the guidelines in this document.</li>
</ul>


<p><strong>Other</strong></p>


<ul>
<li>Individual scientists who specialize in a protein family.</li>
</ul>


<p><strong>b) Experimental reports</strong></p>


<ul>
<li>
<p>A recent literature-supported name from a paper that characterized the protein function is likely the most specific and definitive name to apply (with format refinement as needed). The literature may provide a history of names over time.</p>
</li>
<li>
<p>Newer more functionally specific names are preferred over older more general or biosystem-related names.</p>
</li>
</ul>


<p><strong>c) HMMs and other signatures</strong></p>


<ul>
<li>
<p>Equivalogs are homologs that have retained a specific function from their common ancestor, whatever the evolutionary path of each protein. This stands in contrast to the definitions of orthologs (homologs from speciation events only), paralogs (homologs from duplication events), and xenologs (homologs from lateral transfer events), all of which make no assertion about function.    </p>
</li>
<li>
<p>An equivalog-type HMM is any HMM that asserts its member proteins share a specific function, and that supplies a descriptive protein name and other attributes for automated pipelines to use during genome annotation.</p>
</li>
<li>
<p>Most TIGRFAM models are designated equivalogs, meaning they assign a specific name to proteins conserved in function from a common ancestral sequence.</p>
<p><em>Example of an equivalog type name versus a general name (see UniProtKB/SwissProt record <a href="https://www.ncbi.nlm.nih.gov/protein/P0A288.1"><em>P0A288</em></a>): peptide chain release factor 1 </em><em>versus</em><em> PCRF domain-containing protein</em></p>
</li>
<li>
<p>To apply names to proteins related to proteins named by equivalog type HMMs, use XXX-like protein or XXX family protein. These synonymous terms will carry the association that, despite obvious sequence similarity to XXX, it may or may not have the same role and function as XXX and thus it might be XXX itself, or something related. Also see the usage of “putative XXX” in section 3B about Novel proteins of unknown function.</p>
<p><em>Example: glycine cleavage protein H-like protein for proteins of the family TIGR03077. These proteins are not bona fide glycine cleavage protein H which belong to family TIGR00528.</em></p>
</li>
</ul>


<p><strong>d) Profiles and domain architectures</strong></p>


<ul>
<li>
<p>The domain architecture is defined as the sequential order of conserved domains in a protein sequence. In some cases the architecture consists of a single domain that covers the full length of the protein. Domain architecture names are usually more general than equivalog-type HMM names but provide additional protein naming evidence. A protein name based on a multi-domain architecture is more informative than a protein name based only on domain content.</p>
<p><em>Example: PAS domain-containing sensor histidine kinase</em> (based on a multi-domain architecture).</p>
</li>
<li>
<p>Protein names can be based on a single domain which does not cover the full length protein and may be associated with varied architectures.</p>
<p><em>Example: PAS domain-containing protein</em> (general)</p>
</li>
<li>
<p>Be cautious when parsing domain names. Automatically extracting a name from a domain or profile may end up being uninformative e.g. Pfam accession PF00083, Sugar_tr which results in a protein product called ‘sugar’.</p>
</li>
</ul>


<h3 id="b-naming-procedure-for-specific-">B. Naming procedure for specific cases</h3>


<ul>
<li>
<p><strong>Multifunctional proteins</strong></p>
<ul>
<li>
<p>Multifunctional proteins may catalyze multiple enzymatic reactions such as human protein GNE which has both epimerase and kinase activities or they may be involved in different functions such as <em>Arabidopsis thaliana protein ENO2</em> which acts as an enolase and is also involved in transcription regulation.</p>
</li>
<li>
<p>No need to list all functions.</p>
</li>
<li>
<p>If no other name is applicable, the words bifunctional or multifunctional may be used in combination with the functional names.</p>
</li>
<li>
<p>When using bifunctional, list the functions based on the order of the domains in the sequence and separate them with a forward slash.</p>
</li>
<li>
<p>In rare cases and when no other name is applicable, enzymes with more than two functions may use the format: “multifunctional protein &lt;GS&gt;”.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>bifunctional adenylyltransferase/ADP-heptose synthase cyclohydrolase</em></p>
</li>
<li>
<p><em>fatty acid oxidation complex subunit alpha </em><em>not</em><em> multifunctional enoyl-CoA hydratase/3-hydroxybutyryl-CoA epimerase/3-hydroxyacyl-CoA dehydrogenase</em></p>
</li>
<li>
<p><em>multifunctional proline degradation protein PutA </em><em>not</em><em> multifunctional DNA-binding transcriptional repressor/proline dehydrogenase/1-pyrroline-5-carboxylate dehydrogenase</em></p>
</li>
</ul>
</li>
</ul>
</li>
<li>
<p><strong>Naming proteins based on protein complex membership</strong></p>
<ul>
<li>Protein complex members for well-defined multi-subunit complexes of known composition can be named according to the complex followed by the specific subunit name.</li>
<li>
<p>Use 'subunit', not 'chain' or 'component', for members of protein complexes.
    The exception is historical cases where ‘chain’ is exclusively used e.g. myosins, clathrins, dyneins.</p>
<p><em><strong>Exception</strong> example: myosin heavy chain 1</em></p>
</li>
<li>
<p>If the 'type' of subunit is known, then 'type' goes first where ‘type’ can be catalytic, ATP-binding, regulatory etc.</p>
<p><em>Example:</em> <em>26S proteasome non-ATPase regulatory subunit 1</em></p>
</li>
<li>
<p>If a subunit has a designator, then that follows the term ‘subunit’, e.g. subunit 1, subunit A, subunit AbcD, subunit alpha. The preference for designator use is: number &gt; letter &gt; gene symbol &gt; greek letter spelled out.</p>
<p><em>Example: F1Fo ATP synthase subunit alpha </em><em>not</em><em> F1Fo ATP synthase alpha subunit</em></p>
</li>
<li>
<p>An abbreviation may be part of a protein complex name.</p>
<p><em>Example: (3R)-hydroxymyristoyl-ACP dehydratase</em></p>
</li>
<li>
<p>Avoid 'large subunit' or 'small subunit' when possible, but well-established historical names are an exception.</p>
<p><em><strong>Exception</strong> example: 2,3-diketo-L-gulonate TRAP transporter large permease</em></p>
</li>
</ul>
</li>
<li>
<p><strong>Inactive proteins</strong></p>
<ul>
<li>Inactive proteins do not refer to pseudogenes. Inactive versions of proteins refer to proteins with altered catalytic residues or inability to undergo autocatalytic cleavage, resulting in loss of expected activity. Reserve the usage of “inactive” in a protein name for such cases.</li>
</ul>
<p><em>Example: inactive glutathione hydrolase 2</em></p>
</li>
<li>
<p><strong>Novel proteins of unknown function</strong></p>
<p>Where no functional information is available, any of the following methods may be used to name a protein.</p>
<ul>
<li>
<p><strong>where domains, repeats or motifs associated with a variety of architectures are observed:</strong> Use the format ‘xxx domain-containing protein’ but avoid transferring ‘xxx domain-containing protein’ names based on a BLAST search. Use a protein signature search instead.</p>
<p><em>Example: PAS domain-containing protein</em></p>
</li>
<li>
<p><strong>where sequence similarity to a defined protein family is observed:</strong> Avoid asserting the function of the family. Use a general name such as 'XXXX family protein'. Proteins given the name "XXX family protein" might be XXX itself, or something related. The name "XXX family protein" may be thought of as an unspecific and temporary name that will be replaced when more specific annotation becomes available.</p>
<p><em>Example: flavodoxin family protein</em></p>
</li>
<li>
<p><strong>where a known family protein has a predicted activity:</strong> In general, use of the word ‘putative’ should be avoided. In this specific case, prefix the activity with 'putative', not the whole protein name. The term 'putative' should be located before the activity that it refers to. "putative XXX" should be used when "XXX" is considered the most likely prediction, but the reasoning used to perform the annotation carries with it enough doubt that the disclaimer is useful. The term should not be used in an automated fashion simply to mean "protein showing low-scoring homology to XXX”.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>radical SAM family putative peptide maturase </em><em>not</em><em> putative radical SAM family peptide maturase </em><em>or not</em><em> radical SAM family peptide maturase, putative</em></p>
</li>
<li>
<p><em>putative acetylornithine deacetylase </em><em>not</em><em> predicted acetylornithine deacetylase </em><em>not</em><em> possible acetylornithine deacetylase </em><em>not</em><em> probable acetylornithine deacetylase </em><em>not</em><em> potential acetylornithine deacetylase </em><em>not</em><em> hypothetical acetylornithine deacetylase</em></p>
</li>
</ul>
</li>
<li>
<p><strong>where a full length protein HMM or other signature match associated with a single architecture (equivalog type signatures) is observed:</strong> Use the HMM name or other protein family signature name to name the protein, conforming to the rules of this document. Caution: Protein signature identifiers and the signatures themselves are not stable and may change, requiring review and renaming of proteins named using this method.</p>
<p><em>Example: TIGR01212 family radical SAM protein</em></p>
</li>
<li>
<p><strong>where no domain or motif is observed:</strong> If a gene symbol or protein symbol has been published for this protein, use the protein &lt;GS&gt; or protein &lt;PS&gt; format. Otherwise, use the default name ‘hypothetical protein’ or ‘uncharacterized protein’ (all lowercase) with no further specifications.</p>
<p><em>Examples:</em></p>
<ul>
<li>
<p><em>hypothetical protein </em><em>not</em><em> hypothetical protein, conserved</em></p>
</li>
<li>
<p><em>uncharacterized protein </em><em>not</em><em> uncharacterized protein conserved in archaea</em></p>
</li>
<li>
<p><em>protein XYZ1</em></p>
</li>
</ul>
</li>
</ul>
</li>
</ul>


<p>Last updated: 02-MAR-2020</p>
                                </div>
                                <!--/.col1-->
                                <div class="col2">

                                </div>
                                <!--/.col2-->
                                <div class="col3">

                                </div>
                                <!--/.col3-->
                                <div class="col4">

                                </div>
                                <!--/.col4-->
                                <div class="col5">

                                </div>
                                <div class="col6">

                                </div>
                                <div class="col7">

                                </div>
                                <div class="col8">

                                </div>
                                <div class="col9">

                                </div>
                            </div><!--/.content-->
                        </div><!--/.container-->
                        <div id="NCBIFooter_dynamic">
    <div class="breadcrumbs">You are here:
            <span id="breadcrumb_text"><a href="/guide/">NCBI</a></span></div>
    <a id="help-desk-link" class="help_desk" href="https://support.ncbi.nlm.nih.gov/ics/support/default.asp?Time=2025-03-05T05:21:03-05:00&amp;Snapshot=%2Fprojects%2Fstaticsites%2Fgenbank%2Fgenbank@2.21&amp;Host=portal104&amp;ncbi_phid=CE8B65437C81A3A10000000000CF00A3&amp;ncbi_session=CE8B5AF87C7FFCB1_0191SID&amp;from=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fgenbank%2Finternatprot_nomenguide%2F&amp;Ncbi_App=genbank&amp;Page=custom-page&amp;style=classic&amp;deptID=28049" target="_blank">Support Center</a>
    <noscript><img alt="" src="/stat?jsdisabled=true&amp;ncbi_app=genbank&amp;ncbi_db=&amp;ncbi_pdid=custom-page&amp;ncbi_phid=CE8B65437C81A3A10000000000CF00A3" /></noscript>
</div>


<div xmlns:xi="http://www.w3.org/2001/XInclude">
    <div xmlns="http://www.w3.org/1999/xhtml" class="footer" id="footer" xml:base="http://127.0.0.1/sites/static/header_footer">
	<section class="icon-section">
		<div id="icon-section-header" class="icon-section_header">Follow NCBI</div>
		<div class="grid-container container">
			<div class="icon-section_container">
				<a class="footer-icon" id="footer_twitter" href="https://twitter.com/ncbi" aria-label="Twitter">
					<svg xmlns="http://www.w3.org/2000/svg" width="40" height="40" viewBox="0 0 40 40" fill="none">
						<title>Twitter</title>
						<g id="twitterx1008">
							<path id="path1008" d="M6.06736 7L16.8778 20.8991L6.00001 32.2H10.2L18.6 23.1L25.668 32.2H34L22.8 17.5L31.9 7H28.4L20.7 15.4L14.401 7H6.06898H6.06736ZM9.66753 8.73423H12.9327L29.7327 30.4658H26.5697L9.66753 8.73423Z" fill="#5B616B"></path>
						</g>
					</svg>
				</a>
				<a class="footer-icon" id="footer_facebook" href="https://www.facebook.com/ncbi.nlm" aria-label="Facebook"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
					<title>Facebook</title>
					<path class="cls-11" d="M210.5,115.12H171.74V97.82c0-8.14,5.39-10,9.19-10h27.14V52l-39.32-.12c-35.66,0-42.42,26.68-42.42,43.77v19.48H99.09v36.32h27.24v109h45.41v-109h35Z">
					</path>
				</svg></a>
				<a class="footer-icon" id="footer_linkedin" href="https://www.linkedin.com/company/ncbinlm" aria-label="LinkedIn"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
						<title>LinkedIn</title>
						<path class="cls-11" d="M101.64,243.37H57.79v-114h43.85Zm-22-131.54h-.26c-13.25,0-21.82-10.36-21.82-21.76,0-11.65,8.84-21.15,22.33-21.15S101.7,78.72,102,90.38C102,101.77,93.4,111.83,79.63,111.83Zm100.93,52.61A17.54,17.54,0,0,0,163,182v61.39H119.18s.51-105.23,0-114H163v13a54.33,54.33,0,0,1,34.54-12.66c26,0,44.39,18.8,44.39,55.29v58.35H198.1V182A17.54,17.54,0,0,0,180.56,164.44Z">
						</path>
					</svg></a>
				<a class="footer-icon" id="footer_github" href="https://github.com/ncbi" aria-label="GitHub"><svg xmlns="http://www.w3.org/2000/svg" data-name="Layer 1" viewBox="0 0 300 300">
					<defs>
						<style>
							.cls-11,
							.cls-12 {
							fill: #737373;
							}

							.cls-11 {
							fill-rule: evenodd;
							}
						</style>
					</defs>
					<title>GitHub</title>
					<path class="cls-11" d="M151.36,47.28a105.76,105.76,0,0,0-33.43,206.1c5.28,1,7.22-2.3,7.22-5.09,0-2.52-.09-10.85-.14-19.69-29.42,6.4-35.63-12.48-35.63-12.48-4.81-12.22-11.74-15.47-11.74-15.47-9.59-6.56.73-6.43.73-6.43,10.61.75,16.21,10.9,16.21,10.9,9.43,16.17,24.73,11.49,30.77,8.79,1-6.83,3.69-11.5,6.71-14.14C108.57,197.1,83.88,188,83.88,147.51a40.92,40.92,0,0,1,10.9-28.39c-1.1-2.66-4.72-13.42,1-28,0,0,8.88-2.84,29.09,10.84a100.26,100.26,0,0,1,53,0C198,88.3,206.9,91.14,206.9,91.14c5.76,14.56,2.14,25.32,1,28a40.87,40.87,0,0,1,10.89,28.39c0,40.62-24.74,49.56-48.29,52.18,3.79,3.28,7.17,9.71,7.17,19.58,0,14.15-.12,25.54-.12,29,0,2.82,1.9,6.11,7.26,5.07A105.76,105.76,0,0,0,151.36,47.28Z">
					</path>
					<path class="cls-12" d="M85.66,199.12c-.23.52-1.06.68-1.81.32s-1.2-1.06-.95-1.59,1.06-.69,1.82-.33,1.21,1.07.94,1.6Zm-1.3-1">
					</path>
					<path class="cls-12" d="M90,203.89c-.51.47-1.49.25-2.16-.49a1.61,1.61,0,0,1-.31-2.19c.52-.47,1.47-.25,2.17.49s.82,1.72.3,2.19Zm-1-1.08">
					</path>
					<path class="cls-12" d="M94.12,210c-.65.46-1.71,0-2.37-.91s-.64-2.07,0-2.52,1.7,0,2.36.89.65,2.08,0,2.54Zm0,0"></path>
					<path class="cls-12" d="M99.83,215.87c-.58.64-1.82.47-2.72-.41s-1.18-2.06-.6-2.7,1.83-.46,2.74.41,1.2,2.07.58,2.7Zm0,0">
					</path>
					<path class="cls-12" d="M107.71,219.29c-.26.82-1.45,1.2-2.64.85s-2-1.34-1.74-2.17,1.44-1.23,2.65-.85,2,1.32,1.73,2.17Zm0,0">
					</path>
					<path class="cls-12" d="M116.36,219.92c0,.87-1,1.59-2.24,1.61s-2.29-.68-2.3-1.54,1-1.59,2.26-1.61,2.28.67,2.28,1.54Zm0,0">
					</path>
					<path class="cls-12" d="M124.42,218.55c.15.85-.73,1.72-2,1.95s-2.37-.3-2.52-1.14.73-1.75,2-2,2.37.29,2.53,1.16Zm0,0"></path>
				</svg></a>
				<a class="footer-icon" id="footer_blog" href="https://ncbiinsights.ncbi.nlm.nih.gov/" aria-label="Blog">
					<svg xmlns="http://www.w3.org/2000/svg" id="Layer_1" data-name="Layer 1" viewBox="0 0 40 40">
						<defs><style>.cls-1{fill:#737373;}</style></defs>
						<title>NCBI Insights Blog</title>
						<path class="cls-1" d="M14,30a4,4,0,1,1-4-4,4,4,0,0,1,4,4Zm11,3A19,19,0,0,0,7.05,15a1,1,0,0,0-1,1v3a1,1,0,0,0,.93,1A14,14,0,0,1,20,33.07,1,1,0,0,0,21,34h3a1,1,0,0,0,1-1Zm9,0A28,28,0,0,0,7,6,1,1,0,0,0,6,7v3a1,1,0,0,0,1,1A23,23,0,0,1,29,33a1,1,0,0,0,1,1h3A1,1,0,0,0,34,33Z"></path>
					</svg>
				</a>
			</div>
		</div>
	</section>

	<section class="container-fluid bg-primary">
		<div class="container pt-5">
			<div class="row mt-3">
				<div class="col-lg-3 col-12">
					<p><a class="text-white" href="https://www.nlm.nih.gov/socialmedia/index.html">Connect with NLM</a></p>
					<ul class="list-inline social_media">
						<li class="list-inline-item"><a href="https://twitter.com/NLM_NIH" aria-label="Twitter" target="_blank" rel="noopener noreferrer">
							<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
								<title>Twitter</title>
								<g id="twitterx1009" clip-path="url(#clip0_65276_3946)">
									<path id="Vector_Twitter" d="M17.5006 34.6565C26.9761 34.6565 34.6575 26.9751 34.6575 17.4996C34.6575 8.02416 26.9761 0.342773 17.5006 0.342773C8.02514 0.342773 0.34375 8.02416 0.34375 17.4996C0.34375 26.9751 8.02514 34.6565 17.5006 34.6565Z" fill="#205493" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
									<path id="path1009" d="M8.54811 8.5L16.2698 18.4279L8.50001 26.5H11.5L17.5 20L22.5486 26.5H28.5L20.5 16L27 8.5H24.5L19 14.5L14.5007 8.5H8.54927H8.54811ZM11.1197 9.73873H13.4519L25.4519 25.2613H23.1926L11.1197 9.73873Z" fill="white"></path>
								</g>
								<defs>
									<clipPath id="clip0_65276_3946">
										<rect width="35" height="35" fill="white"></rect>
									</clipPath>
								</defs>
							</svg>
						</a></li>
						<li class="list-inline-item"><a href="https://www.facebook.com/nationallibraryofmedicine" aria-label="Facebook" rel="noopener noreferrer" target="_blank">
							<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
								<title>Facebook</title>
								<g id="Facebook" clip-path="url(#clip0_1717_1086)">
									<path id="Vector_Facebook" d="M15.1147 29.1371C15.1147 29.0822 15.1147 29.0296 15.1147 28.9747V18.9414H11.8183C11.6719 18.9414 11.6719 18.9414 11.6719 18.8018C11.6719 17.5642 11.6719 16.3289 11.6719 15.0937C11.6719 14.9793 11.7062 14.9518 11.816 14.9518C12.8683 14.9518 13.9206 14.9518 14.9751 14.9518H15.1215V14.8329C15.1215 13.8057 15.1215 12.774 15.1215 11.7492C15.1274 10.9262 15.3148 10.1146 15.6706 9.37241C16.1301 8.38271 16.9475 7.60378 17.9582 7.19235C18.6492 6.90525 19.3923 6.76428 20.1405 6.7783C21.0029 6.79202 21.8653 6.83091 22.7278 6.86065C22.8879 6.86065 23.048 6.89496 23.2082 6.90182C23.2974 6.90182 23.3271 6.94071 23.3271 7.02993C23.3271 7.54235 23.3271 8.05477 23.3271 8.5649C23.3271 9.16882 23.3271 9.77274 23.3271 10.3767C23.3271 10.4819 23.2974 10.5139 23.1921 10.5116C22.5379 10.5116 21.8814 10.5116 21.2271 10.5116C20.9287 10.5184 20.6316 10.5528 20.3395 10.6146C20.0822 10.6619 19.8463 10.7891 19.6653 10.9779C19.4842 11.1668 19.3672 11.4078 19.3307 11.6669C19.2857 11.893 19.2612 12.1226 19.2575 12.3531C19.2575 13.1904 19.2575 14.0299 19.2575 14.8695C19.2575 14.8946 19.2575 14.9198 19.2575 14.9564H23.0229C23.1807 14.9564 23.183 14.9564 23.1624 15.1074C23.0778 15.7662 22.9885 16.425 22.9039 17.0816C22.8322 17.6321 22.7636 18.1827 22.698 18.7332C22.6729 18.9437 22.6797 18.9437 22.4693 18.9437H19.2644V28.8992C19.2644 28.9793 19.2644 29.0593 19.2644 29.1394L15.1147 29.1371Z" fill="white"></path>
									<path id="Vector_2_Facebook" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
								</g>
								<defs>
									<clipPath id="clip0_1717_1086">
										<rect width="35" height="35" fill="white"></rect>
									</clipPath>
								</defs>
							</svg>
						</a></li>
						<li class="list-inline-item"><a href="https://www.youtube.com/user/NLMNIH" aria-label="Youtube" target="_blank" rel="noopener noreferrer">
							<svg xmlns="http://www.w3.org/2000/svg" width="35" height="35" viewBox="0 0 36 35" fill="none">
								<title>Youtube</title>
								<g id="YouTube" clip-path="url(#clip0_1717_1101)">
									<path id="Vector_Youtube" d="M26.2571 11.4791C25.9025 11.1589 25.5709 10.9576 24.228 10.834C22.5512 10.6785 20.2797 10.6556 18.564 10.6533H16.4365C14.7208 10.6533 12.4493 10.6785 10.7725 10.834C9.43196 10.9576 9.09798 11.1589 8.7434 11.4791C7.81464 12.321 7.6202 14.6268 7.59961 16.8938C7.59961 17.3178 7.59961 17.741 7.59961 18.1635C7.62706 20.4121 7.82837 22.686 8.7434 23.521C9.09798 23.8412 9.42967 24.0425 10.7725 24.1661C12.4493 24.3216 14.7208 24.3445 16.4365 24.3468H18.564C20.2797 24.3468 22.5512 24.3216 24.228 24.1661C25.5686 24.0425 25.9025 23.8412 26.2571 23.521C27.1722 22.6929 27.3735 20.451 27.4009 18.2206C27.4009 17.7402 27.4009 17.2599 27.4009 16.7795C27.3735 14.5491 27.1699 12.3072 26.2571 11.4791ZM15.5604 20.5311V14.652L20.561 17.5001L15.5604 20.5311Z" fill="white"></path>
									<path id="Vector_2_Youtube" d="M17.5006 34.657C26.9761 34.657 34.6575 26.9756 34.6575 17.5001C34.6575 8.02465 26.9761 0.343262 17.5006 0.343262C8.02514 0.343262 0.34375 8.02465 0.34375 17.5001C0.34375 26.9756 8.02514 34.657 17.5006 34.657Z" stroke="white" stroke-width="1.0" stroke-miterlimit="10"></path>
								</g>
								<defs>
									<clipPath id="clip0_1717_1101">
										<rect width="35" height="35" fill="white"></rect>
									</clipPath>
								</defs>
							</svg>
						</a></li>
					</ul>
				</div>
				<div class="col-lg-3 col-12">
					<p class="address_footer text-white">National Library of Medicine<br />
						<a href="https://www.google.com/maps/place/8600+Rockville+Pike,+Bethesda,+MD+20894/@38.9959508,-77.101021,17z/data=!3m1!4b1!4m5!3m4!1s0x89b7c95e25765ddb:0x19156f88b27635b8!8m2!3d38.9959508!4d-77.0988323" class="text-white" target="_blank" rel="noopener noreferrer">8600 Rockville Pike<br />
							Bethesda, MD 20894</a></p>
				</div>
				<div class="col-lg-3 col-12 centered-lg">
					<p><a href="https://www.nlm.nih.gov/web_policies.html" class="text-white">Web Policies</a><br />
						<a href="https://www.nih.gov/institutes-nih/nih-office-director/office-communications-public-liaison/freedom-information-act-office" class="text-white">FOIA</a><br />
						<a href="https://www.hhs.gov/vulnerability-disclosure-policy/index.html" class="text-white" id="vdp">HHS Vulnerability Disclosure</a></p>
				</div>
				<div class="col-lg-3 col-12 centered-lg">
					<p><a class="supportLink text-white" href="https://support.nlm.nih.gov/">Help</a><br />
						<a href="https://www.nlm.nih.gov/accessibility.html" class="text-white">Accessibility</a><br />
						<a href="https://www.nlm.nih.gov/careers/careers.html" class="text-white">Careers</a></p>
				</div>
			</div>
			<div class="row">
				<div class="col-lg-12 centered-lg">
					<nav class="bottom-links">
						<ul class="mt-3">
							<li>
								<a class="text-white" href="//www.nlm.nih.gov/">NLM</a>
							</li>
							<li>
								<a class="text-white" href="https://www.nih.gov/">NIH</a>
							</li>
							<li>
								<a class="text-white" href="https://www.hhs.gov/">HHS</a>
							</li>
							<li>
								<a class="text-white" href="https://www.usa.gov/">USA.gov</a>
							</li>
						</ul>
					</nav>
				</div>
			</div>
		</div>
	</section>
	<script type="text/javascript" src="/portal/portal3rc.fcgi/rlib/js/InstrumentOmnitureBaseJS/InstrumentNCBIConfigJS/InstrumentNCBIBaseJS/InstrumentPageStarterJS.js?v=1"> </script>
	<script type="text/javascript" src="/portal/portal3rc.fcgi/static/js/hfjs2.js"> </script>
</div>
</div>
                        <!--/.footer-->
                        <p class="last-updated small">Last updated: 2024-01-24T20:07:23Z</p>
                    </div>
                    <!--/.page-->
                </div>
                <!--/.wrap-->
                <span class="PAFAppResources"></span>


            </div><!-- /.twelve_col -->
        </div>
        <!-- /.grid -->


        <!-- usually for JS scripts at page bottom -->
        <span class="pagefixtures"></span>


<!-- CE8B5AF87C7FFCB1_0191SID /projects/staticsites/genbank/genbank@2.21 portal104 v4.1.r689238 Tue, Oct 22 2024 16:10:51 -->
<span id="portal-csrf-token" style="display:none" data-token="CE8B5AF87C7FFCB1_0191SID"></span>

<script type="text/javascript" src="//static.pubmed.gov/portal/portal3rc.fcgi/4218137/js/3879255/4121861/1490097/4087685.js" snapshot="genbank"></script></body>
</html>