# Inflowave — https://inflowave.io
# Crawlers welcome. AI training crawlers welcome. LLM retrieval crawlers welcome.
# AI-readable index: https://inflowave.io/llms.txt (llmstxt.org spec)

# Default: allow everything except user-flow pages that waste crawl budget
User-agent: *
Allow: /
Allow: /images/
Allow: /_astro/
Allow: /chunks/
Disallow: /reviews/
Disallow: /search?*
Disallow: /beta
# Auth + flow pages — no SEO value. We had 14 locale variants of each
# (e.g., /cs/canceled, /de/success, /fr/appointment-confirmed) being
# crawled, hitting noindex, and burning crawl budget on ~98 dead pages.
# Block both root and locale-prefixed variants.
Disallow: /canceled
Disallow: /*/canceled
Disallow: /success
Disallow: /*/success
Disallow: /appointment-confirmed
Disallow: /*/appointment-confirmed
Disallow: /checkout
Disallow: /*/checkout
Disallow: /checkout/*
Disallow: /*/checkout/*
Disallow: /dashboard
Disallow: /*/dashboard
Disallow: /dashboard/*
Disallow: /*/dashboard/*
# Allow specific knowledge-base articles whose slug happens to collide
# with the /*/dashboard wildcard above. /knowledge-base/dashboard is a
# legit indexable KB article. Per Google's robots.txt spec, the more
# specific Allow takes precedence over the broader Disallow.
Allow: /knowledge-base/dashboard
# Login/register/auth pages — no SEO value, hit Cloudflare auth rate limit
# (10 req/min on /login path) and return 429s when crawlers walk all 14
# locale variants in sequence. Disallowing here saves crawl budget AND
# stops Semrush flagging cs/da/nl/pl-pl /login as broken (4xx errors).
Disallow: /login
Disallow: /*/login
Disallow: /login/*
Disallow: /*/login/*
Disallow: /register
Disallow: /*/register
Disallow: /register/*
Disallow: /*/register/*
Disallow: /signup
Disallow: /*/signup
# /demo is indexable (meta robots=index) and is a hreflang target from 14
# locale homepages. Previously disallowed for crawl-budget reasons, but that
# left Semrush flagging every /xx/demo → /demo hreflang as broken.
# Cloudflare auto-injects /cdn-cgi/scripts/…/rocket-loader.min.js into every
# page. Disallowing /cdn-cgi/ caused Semrush to flag every page as having a
# blocked internal resource (2,440 blocked-resource warnings). Allow it so
# crawlers can fetch the injected script — it's harmless CDN tooling and not
# sensitive.
Allow: /cdn-cgi/scripts/

# ─── AI / LLM crawlers (training + retrieval) ──────────────────────────────
User-agent: GPTBot
Allow: /

User-agent: ChatGPT-User
Allow: /

User-agent: OAI-SearchBot
Allow: /

User-agent: anthropic-ai
Allow: /

User-agent: Claude-Web
Allow: /

User-agent: ClaudeBot
Allow: /

User-agent: Claude-SearchBot
Allow: /

User-agent: Claude-User
Allow: /

User-agent: Google-Extended
Allow: /

User-agent: GoogleOther
Allow: /

User-agent: PerplexityBot
Allow: /

User-agent: Perplexity-User
Allow: /

User-agent: CCBot
Allow: /

User-agent: cohere-ai
Allow: /

User-agent: Bytespider
Allow: /

User-agent: Applebot-Extended
Allow: /

User-agent: Applebot
Allow: /

User-agent: Meta-ExternalAgent
Allow: /

User-agent: Meta-ExternalFetcher
Allow: /

User-agent: FacebookBot
Allow: /

User-agent: DuckAssistBot
Allow: /

User-agent: YouBot
Allow: /

User-agent: AI2Bot
Allow: /

User-agent: Diffbot
Allow: /

User-agent: Timpibot
Allow: /

User-agent: MistralAI-User
Allow: /

User-agent: Omgilibot
Allow: /

User-agent: ImageSiftBot
Allow: /

# ─── SEO / traditional crawlers ────────────────────────────────────────────
User-agent: Googlebot
Allow: /

User-agent: bingbot
Allow: /

User-agent: Slurp
Allow: /

User-agent: DuckDuckBot
Allow: /

User-agent: Yandex
Allow: /

User-agent: Baiduspider
Allow: /

User-agent: SemrushBot
Allow: /

User-agent: AhrefsBot
Allow: /

User-agent: DotBot
Allow: /

User-agent: MJ12bot
Allow: /

Sitemap: https://inflowave.io/sitemap-index.xml