# ═══════════════════════════════════════════════════════════════ # robots.txt — Learning.Coupons # Primary domain : https://learning.coupons (canonical) # Sitemap : https://learning.coupons/sitemap.xml # Last updated : 2026-07-02 # Spec : https://www.rfc-editor.org/rfc/rfc9309 (RFC 9309) # ═══════════════════════════════════════════════════════════════ # --------------------------------------------------------------- # DEFAULT — allow all well-behaved bots full crawl access # --------------------------------------------------------------- User-agent: * Allow: / Disallow: /admin/ Disallow: /private/ Disallow: /tmp/ Disallow: /cache/ Disallow: /api/ Disallow: /*.json$ # Block tracking-parameter URL variants (Google honours these patterns) Disallow: /*?*utm_source= Disallow: /*?*utm_medium= Disallow: /*?*utm_campaign= Disallow: /*?*fbclid= Disallow: /*?*gclid= Disallow: /*?*msclkid= Disallow: /*?*ref= Disallow: /*?*source= Disallow: /*?*sessionid= Disallow: /*?*PHPSESSID= Disallow: /*?*replytocom= # --------------------------------------------------------------- # MAJOR SEARCH ENGINES — explicit allow (clean signal) # --------------------------------------------------------------- User-agent: Googlebot Allow: / User-agent: Googlebot-Image Allow: / User-agent: Googlebot-News Allow: / User-agent: AdsBot-Google Allow: / User-agent: AdsBot-Google-Mobile Allow: / User-agent: Mediapartners-Google Allow: / User-agent: Bingbot Allow: / User-agent: Slurp Allow: / User-agent: DuckDuckBot Allow: / User-agent: Baiduspider Allow: / User-agent: YandexBot Allow: / # --------------------------------------------------------------- # SOCIAL / MESSAGING CRAWLERS — needed for OG / Twitter previews # --------------------------------------------------------------- User-agent: Twitterbot Allow: / User-agent: facebookexternalhit Allow: / User-agent: Facebot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / User-agent: Slackbot Allow: / User-agent: Slackbot-LinkExpanding Allow: / User-agent: Discordbot Allow: / User-agent: TelegramBot Allow: / User-agent: Pinterest Allow: / User-agent: Pinterestbot Allow: / User-agent: redditbot Allow: / # --------------------------------------------------------------- # AI SEARCH BOTS (citation-driving) — ALLOW for visibility # These power AI search results that LINK BACK to your site. # --------------------------------------------------------------- User-agent: PerplexityBot Allow: / User-agent: OAI-SearchBot Allow: / User-agent: ChatGPT-User Allow: / User-agent: Applebot Allow: / # --------------------------------------------------------------- # AI / LLM TRAINING SCRAPERS — BLOCK (no SEO benefit, takes content) # --------------------------------------------------------------- User-agent: GPTBot Disallow: / User-agent: CCBot Disallow: / User-agent: anthropic-ai Disallow: / User-agent: Claude-Web Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Meta-ExternalAgent Disallow: / User-agent: Meta-ExternalFetcher Disallow: / User-agent: FacebookBot Disallow: / User-agent: Bytespider Disallow: / User-agent: Amazonbot Disallow: / User-agent: Diffbot Disallow: / User-agent: ImagesiftBot Disallow: / User-agent: Omgili Disallow: / User-agent: Omgilibot Disallow: / User-agent: PetalBot Disallow: / User-agent: cohere-ai Disallow: / User-agent: Timpibot Disallow: / User-agent: ICC-Crawler Disallow: / User-agent: VelenPublicWebCrawler Disallow: / User-agent: Webzio-Extended Disallow: / User-agent: NovaAct Disallow: / User-agent: AwarioRssBot Disallow: / User-agent: AwarioSmartBot Disallow: / # --------------------------------------------------------------- # AGGRESSIVE SEO SCRAPERS — BLOCK (drain bandwidth, no value) # --------------------------------------------------------------- User-agent: DataForSeoBot Disallow: / User-agent: SemrushBot Disallow: / User-agent: SemrushBot-SA Disallow: / User-agent: AhrefsBot Disallow: / User-agent: AhrefsSiteAudit Disallow: / User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / User-agent: BLEXBot Disallow: / User-agent: SEOkicks Disallow: / User-agent: serpstatbot Disallow: / # --------------------------------------------------------------- # SITEMAP (must be absolute URL — RFC 9309) # --------------------------------------------------------------- Sitemap: https://learning.coupons/sitemap.xml