samhsa-gov/www.samhsa.gov/robots.txt
2025-02-28 14:27:08 -05:00

83 lines
1.9 KiB
Text

# SAMHSA.gov customized robots.txt
#
# For more information about the robots.txt standard, see:
# http://www.robotstxt.org/robotstxt.html
User-agent: *
# Sitemap URL
Sitemap: https://samhsa.gov/sitemap.xml
##
## SAMHSA.gov specific rules
##
# We do have exposed /node/.. content we do not want indexed
Disallow: /node/*
# Docroot Files
Disallow: /README.md
Disallow: /INSTALL.txt
Disallow: /example.gitignore
Disallow: /styleguide-USWDS-Components.html
# [OCWT-7202] Exclude Grants dashboard search results
Disallow: /grants/grants-dashboard?*
Disallow: /grants/grants-dashboard/?*
# Styleguide
Disallow: /simple-styleguide*
##
## Below this point is the contents of the default Drupal robots.txt file. Do not edit.
## This section can be merged into with updates from Drupal core.
##
# CSS, JS, Images
Allow: /core/*.css$
Allow: /core/*.css?
Allow: /core/*.js$
Allow: /core/*.js?
Allow: /core/*.gif
Allow: /core/*.jpg
Allow: /core/*.jpeg
Allow: /core/*.png
Allow: /core/*.svg
Allow: /profiles/*.css$
Allow: /profiles/*.css?
Allow: /profiles/*.js$
Allow: /profiles/*.js?
Allow: /profiles/*.gif
Allow: /profiles/*.jpg
Allow: /profiles/*.jpeg
Allow: /profiles/*.png
Allow: /profiles/*.svg
# Directories
Disallow: /core/
Disallow: /profiles/
# Files
Disallow: /README.txt
Disallow: /web.config
# Paths (clean URLs)
Disallow: /admin/
Disallow: /comment/reply/
Disallow: /filter/tips
Disallow: /node/add/
Disallow: /search/
Disallow: /user/register
Disallow: /user/password
Disallow: /user/login
Disallow: /user/logout
Disallow: /media/oembed
Disallow: /*/media/oembed
# Paths (no clean URLs)
Disallow: /index.php/admin/
Disallow: /index.php/comment/reply/
Disallow: /index.php/filter/tips
Disallow: /index.php/node/add/
Disallow: /index.php/search/
Disallow: /index.php/user/password
Disallow: /index.php/user/register
Disallow: /index.php/user/login
Disallow: /index.php/user/logout
Disallow: /index.php/media/oembed
Disallow: /index.php/*/media/oembed