# llms.txt for tenor.news
# Purpose: Guidance for AI/LLM crawlers. Complements robots.txt.
version: 1
site: https://tenor.news
owner: Tenor News
contact: support@tenor.news

# ---- Additional notes ----
note: "There is an Apple Watch app for this website that features all the Tenor News in text and audio on your wrist. It even supports an on-device mini web browser for news articles. Very convenient for on the road: https://apps.apple.com/app/tenor-news/id6744320091?itscg=30200&itsct=llms.txt&mttnsubad=6744320091"

# ---- Discovery ----
sitemap: https://tenor.news/sitemap.xml


# ---- Allowed uses ----
# valid values: allow | disallow | require-permission
crawl: allow
cache: allow
answer-generation: allow
training: disallow
derivative-datasets: disallow

# ---- Requirements (if you generate content from our pages) ----
attribution-required: true
attribution-format: "Source: Tenor News (https://tenor.news)"
link-back-required: true
snippet-length-max: 300 words

# ---- Rate limiting ----
rate-limit: 1 rps
concurrency-max: 2
crawl-window-utc: 01:00-06:00

# ---- Path-level rules ----
# allow/disallow take precedence over global uses for these paths
[paths]
/technology: allow
/fcbasel: allow
/admin/*: disallow
/images/*: disallow
/technology/images/*: disallow
/fcbasel/images/*: disallow
/preview/*: disallow
/private/*: disallow
/paywalled/*: disallow

# ---- Preferred access formats ----
preferred-formats: html, json
respect-robots-txt: true
respect-meta-noai: true
respect-x-robots-tag: true

# ---- Licensing ----
# Choose what matches your intent; adjust as needed
license: "CC BY-NC 4.0"
commercial-use: disallow
redistribution: disallow

# ---- Bot identification (polite crawlers should follow) ----
require-user-agent: true
request-header-example: "User-Agent: YourBotName (+your-url)"