{
  "version": "1.0.0",
  "last_updated": "2025-01-01",
  "total_bots": 57,
  "source": "LLM Bot Tracker by Hueston",
  "license": "MIT",
  "website": "https://llmbottracker.com",
  "bots": [
    {
      "id": "gptbot",
      "name": "GPTBot",
      "operator": "OpenAI",
      "category": "AI Training",
      "description": "Crawls web content for training GPT models and improving AI capabilities",
      "user_agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0; +https://openai.com/gptbot)",
      "respects_robots_txt": true,
      "documentation_url": "https://platform.openai.com/docs/gptbot",
      "purpose": ["training", "model_improvement"]
    },
    {
      "id": "oai-searchbot",
      "name": "OAI-SearchBot",
      "operator": "OpenAI",
      "category": "AI Search",
      "description": "Powers real-time web search capabilities for ChatGPT responses",
      "user_agent": "Mozilla/5.0 (compatible; OAI-SearchBot/1.0; +https://openai.com/searchbot)",
      "respects_robots_txt": true,
      "documentation_url": "https://platform.openai.com/docs/searchbot",
      "purpose": ["realtime_search", "content_retrieval"]
    },
    {
      "id": "chatgpt-user",
      "name": "ChatGPT-User",
      "operator": "OpenAI",
      "category": "AI Browser",
      "description": "Fetches web content when users interact with ChatGPT's browsing feature",
      "user_agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko); compatible; ChatGPT-User/1.0; +https://openai.com/bot",
      "respects_robots_txt": true,
      "documentation_url": "https://platform.openai.com/docs",
      "purpose": ["user_requested", "browsing"]
    },
    {
      "id": "claudebot",
      "name": "ClaudeBot",
      "operator": "Anthropic",
      "category": "AI Training",
      "description": "Crawls web content for training Claude AI models",
      "user_agent": ["Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; ClaudeBot/1.0; +claudebot@anthropic.com)", "ClaudeBot/1.0; +https://www.anthropic.com"],
      "respects_robots_txt": "mixed",
      "documentation_url": "https://www.anthropic.com",
      "purpose": ["training", "model_improvement"]
    },
    {
      "id": "claude-web",
      "name": "Claude-Web",
      "operator": "Anthropic",
      "category": "AI Browser",
      "description": "Fetches content for Claude's real-time web browsing during conversations",
      "user_agent": "Varies (API-driven, session-specific; often contains claude-web or claudebot)",
      "respects_robots_txt": "mixed",
      "documentation_url": "https://www.anthropic.com",
      "purpose": ["user_requested", "browsing"]
    },
    {
      "id": "anthropic-ai",
      "name": "anthropic-ai",
      "operator": "Anthropic",
      "category": "AI Training",
      "description": "Bulk model training crawler for Anthropic's AI systems",
      "user_agent": "anthropic-ai",
      "respects_robots_txt": "mixed",
      "documentation_url": "https://www.anthropic.com",
      "purpose": ["training", "bulk_collection"]
    },
    {
      "id": "perplexitybot",
      "name": "PerplexityBot",
      "operator": "Perplexity AI",
      "category": "AI Search",
      "description": "Indexes web content for Perplexity's AI-powered search engine",
      "user_agent": "Mozilla/5.0 (compatible; PerplexityBot/1.0; +https://www.perplexity.ai/robot)",
      "respects_robots_txt": true,
      "documentation_url": "https://docs.perplexity.ai",
      "purpose": ["search_index", "content_retrieval"]
    },
    {
      "id": "perplexity-user",
      "name": "Perplexity-User",
      "operator": "Perplexity AI",
      "category": "AI Browser",
      "description": "Human-triggered visits when users search on Perplexity",
      "user_agent": "Mozilla/5.0 (compatible; Perplexity-User/1.0; +https://www.perplexity.ai/robot) or Perplexity-User",
      "respects_robots_txt": false,
      "documentation_url": "https://docs.perplexity.ai",
      "purpose": ["user_requested", "search"]
    },
    {
      "id": "mistralai-user",
      "name": "MistralAI-User",
      "operator": "Mistral AI",
      "category": "AI Browser",
      "description": "Fetches content for Mistral's AI models and chat applications",
      "user_agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; MistralAI-User/1.0; +https://docs.mistral.ai/robots)",
      "respects_robots_txt": true,
      "documentation_url": "https://docs.mistral.ai",
      "purpose": ["user_requested", "chat"]
    },
    {
      "id": "mistral-le-chat",
      "name": "Mistral Le Chat Agent",
      "operator": "Mistral AI",
      "category": "AI Browser",
      "description": "Powers Mistral's conversational AI chat assistant",
      "user_agent": "Mistral Le Chat AI Agent 1.0",
      "respects_robots_txt": true,
      "documentation_url": "https://docs.mistral.ai",
      "purpose": ["chat", "conversation"]
    },
    {
      "id": "applebot-extended",
      "name": "Applebot-Extended",
      "operator": "Apple",
      "category": "AI Integration",
      "description": "Powers search technology integrated into Spotlight, Siri, and Safari",
      "user_agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Applebot/0.2",
      "respects_robots_txt": true,
      "documentation_url": "https://support.apple.com/en-us/HT204683",
      "purpose": ["search", "ai_features"]
    },
    {
      "id": "ccbot",
      "name": "CCBot",
      "operator": "Common Crawl",
      "category": "Data Collection",
      "description": "Releases monthly snapshots anyone can download",
      "user_agent": "CCBot/2.0 (+http://commoncrawl.org/faq/)",
      "respects_robots_txt": true,
      "documentation_url": "http://commoncrawl.org/faq/",
      "purpose": ["public_dataset", "research"]
    },
    {
      "id": "amazonbot",
      "name": "Amazonbot",
      "operator": "Amazon",
      "category": "AI Services",
      "description": "Crawls web content for Amazon's product search and AI services",
      "user_agent": "Amazonbot/1.0 (+https://www.amazon.com/bot.html)",
      "respects_robots_txt": true,
      "documentation_url": "https://www.amazon.com/bot.html",
      "purpose": ["product_search", "ai_services"]
    },
    {
      "id": "pangubot",
      "name": "PanguBot",
      "operator": "Huawei",
      "category": "AI Services",
      "description": "Indexes content for Huawei's AI and cloud services",
      "user_agent": "Mozilla/5.0 (compatible; PanguBot/1.0; +https://www.huaweicloud.com)",
      "respects_robots_txt": true,
      "documentation_url": "https://www.huaweicloud.com",
      "purpose": ["cloud_services", "ai_features"]
    },
    {
      "id": "petalbot",
      "name": "PetalBot",
      "operator": "Huawei",
      "category": "AI Search",
      "description": "Powers Huawei's Petal Search engine and AI features",
      "user_agent": "PetalBot (+https://aspiegel.com/petalbot)",
      "respects_robots_txt": true,
      "documentation_url": "https://aspiegel.com/petalbot",
      "purpose": ["search", "ai_features"]
    },
    {
      "id": "duckassistbot",
      "name": "DuckAssistBot",
      "operator": "DuckDuckGo",
      "category": "AI Search",
      "description": "Enables DuckDuckGo's AI-powered search assistance features",
      "user_agent": "DuckAssistBot/1.0",
      "respects_robots_txt": true,
      "documentation_url": "https://duckduckgo.com",
      "purpose": ["search_assist", "ai_features"]
    },
    {
      "id": "meta-externalagent",
      "name": "meta-externalagent",
      "operator": "Meta",
      "category": "AI Training",
      "description": "Crawls content for Meta's AI training and services",
      "user_agent": "meta-externalagent/1.0",
      "respects_robots_txt": true,
      "documentation_url": "https://about.meta.com",
      "purpose": ["training", "ai_services"]
    },
    {
      "id": "facebookexternalhit",
      "name": "facebookexternalhit",
      "operator": "Meta",
      "category": "Social Media",
      "description": "Fetches link previews and content for Facebook sharing",
      "user_agent": "facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)",
      "respects_robots_txt": true,
      "documentation_url": "http://www.facebook.com/externalhit_uatext.php",
      "purpose": ["link_preview", "sharing"]
    },
    {
      "id": "apis-google",
      "name": "APIs-Google",
      "operator": "Google",
      "category": "AI Services",
      "description": "General-purpose crawler for various Google API services",
      "user_agent": "APIs-Google (+https://developers.google.com/webmasters/APIs-Google)",
      "respects_robots_txt": true,
      "documentation_url": "https://developers.google.com/webmasters/APIs-Google",
      "purpose": ["api_services", "various"]
    },
    {
      "id": "google-extended",
      "name": "Google-Extended",
      "operator": "Google",
      "category": "AI Training",
      "description": "Controls access to Bard AI and Vertex AI, Google's machine learning platform",
      "user_agent": "Google-Extended",
      "respects_robots_txt": true,
      "documentation_url": "https://developers.google.com/search/docs/crawling-indexing/google-extended",
      "purpose": ["training", "vertex_ai"]
    },
    {
      "id": "gemini-deep-research",
      "name": "Gemini-Deep-Research",
      "operator": "Google",
      "category": "AI Research",
      "description": "Collects and scans resources for Google Gemini's Deep Research feature",
      "user_agent": "Gemini-Deep-Research",
      "respects_robots_txt": true,
      "documentation_url": "https://gemini.google.com",
      "purpose": ["deep_research", "analysis"]
    },
    {
      "id": "microsoft-copilot",
      "name": "Microsoft Copilot",
      "operator": "Microsoft",
      "category": "AI Assistant",
      "description": "Powers Microsoft's AI assistant across Bing, Edge, and Office products",
      "user_agent": "No single string; often uses browser-like UA (Edge/WebView) with payload tied to Copilot/AI",
      "respects_robots_txt": true,
      "documentation_url": "https://copilot.microsoft.com",
      "purpose": ["ai_assistant", "productivity"]
    },
    {
      "id": "bytespider",
      "name": "Bytespider",
      "operator": "ByteDance",
      "category": "AI Training",
      "description": "Downloads training data for ByteDance's LLMs including Doubao",
      "user_agent": "Bytespider",
      "respects_robots_txt": true,
      "documentation_url": "https://bytedance.com",
      "purpose": ["training", "llm_development"]
    },
    {
      "id": "cohere-ai",
      "name": "Cohere-Ai",
      "operator": "Cohere",
      "category": "AI Training",
      "description": "Collects training data to improve Cohere's NLP models for enterprise AI",
      "user_agent": "Mozilla/5.0 (compatible; Cohere-AI/1.0; +https://cohere.com/) or cohere-ai",
      "respects_robots_txt": true,
      "documentation_url": "https://cohere.com",
      "purpose": ["training", "enterprise_ai"]
    },
    {
      "id": "cohere-command",
      "name": "Cohere-Command",
      "operator": "Cohere",
      "category": "AI Services",
      "description": "Powers Cohere's command-based AI model interactions",
      "user_agent": "Cohere-Command",
      "respects_robots_txt": true,
      "documentation_url": "https://cohere.com",
      "purpose": ["command_ai", "interactions"]
    },
    {
      "id": "youbot",
      "name": "YouBot",
      "operator": "You.com",
      "category": "AI Search",
      "description": "Indexes content for You.com's AI-powered search engine",
      "user_agent": "YouBot",
      "respects_robots_txt": true,
      "documentation_url": "https://you.com",
      "purpose": ["search", "ai_powered"]
    },
    {
      "id": "xai-bot",
      "name": "xAI-Bot",
      "operator": "xAI",
      "category": "AI Training",
      "description": "Crawls web content for Elon Musk's xAI company and Grok AI",
      "user_agent": "xAI-Bot",
      "respects_robots_txt": true,
      "documentation_url": "https://x.ai",
      "purpose": ["training", "grok_ai"]
    },
    {
      "id": "deepseekbot",
      "name": "DeepSeekBot",
      "operator": "DeepSeek",
      "category": "AI Training",
      "description": "Collects data for DeepSeek's open-source AI models",
      "user_agent": "DeepseekBot",
      "respects_robots_txt": true,
      "documentation_url": "https://deepseek.com",
      "purpose": ["training", "open_source"]
    },
    {
      "id": "huggingface-bot",
      "name": "HuggingFace-Bot",
      "operator": "HuggingFace",
      "category": "AI Platform",
      "description": "Indexes content for HuggingFace's AI model repository and services",
      "user_agent": "HuggingFace-Bot",
      "respects_robots_txt": true,
      "documentation_url": "https://huggingface.co",
      "purpose": ["model_repository", "ai_services"]
    },
    {
      "id": "character-ai",
      "name": "Character-AI",
      "operator": "Character.AI",
      "category": "AI Characters",
      "description": "Fetches content for Character.AI's conversational AI characters",
      "user_agent": "Character-AI",
      "respects_robots_txt": true,
      "documentation_url": "https://character.ai",
      "purpose": ["ai_characters", "conversation"]
    },
    {
      "id": "groq-bot",
      "name": "Groq-Bot",
      "operator": "Groq",
      "category": "AI Infrastructure",
      "description": "Collects data for Groq's high-performance AI inference platform",
      "user_agent": "Groq-Bot",
      "respects_robots_txt": true,
      "documentation_url": "https://groq.com",
      "purpose": ["inference", "ai_platform"]
    },
    {
      "id": "together-bot",
      "name": "Together-Bot",
      "operator": "Together AI",
      "category": "AI Platform",
      "description": "Powers Together AI's decentralized AI compute platform",
      "user_agent": "Together-Bot",
      "respects_robots_txt": true,
      "documentation_url": "https://together.ai",
      "purpose": ["decentralized_ai", "compute"]
    },
    {
      "id": "replicate-bot",
      "name": "Replicate-Bot",
      "operator": "Replicate",
      "category": "AI Platform",
      "description": "Supports Replicate's AI model deployment and hosting platform",
      "user_agent": "Replicate-Bot",
      "respects_robots_txt": true,
      "documentation_url": "https://replicate.com",
      "purpose": ["model_hosting", "deployment"]
    },
    {
      "id": "timpibot",
      "name": "TimpiBot",
      "operator": "Timpi",
      "category": "Decentralized Search",
      "description": "Indexes content for Timpi's decentralized search engine",
      "user_agent": "TimpiBot",
      "respects_robots_txt": true,
      "documentation_url": "https://timpi.io",
      "purpose": ["decentralized_search", "indexing"]
    },
    {
      "id": "webzio-extended",
      "name": "Webzio-Extended",
      "operator": "Webz.io",
      "category": "Data Collection",
      "description": "Can be used to sell crawled data to LLM companies",
      "user_agent": "Webzio-Extended",
      "respects_robots_txt": true,
      "documentation_url": "https://webz.io",
      "purpose": ["data_collection", "resale"]
    },
    {
      "id": "iboubot",
      "name": "IbouBot",
      "operator": "Unknown",
      "category": "Unknown",
      "description": "AI-related crawler with undocumented purpose",
      "user_agent": "IbouBot",
      "respects_robots_txt": true,
      "documentation_url": null,
      "purpose": ["unknown"]
    },
    {
      "id": "imagesiftbot",
      "name": "ImagesiftBot",
      "operator": "The Hive",
      "category": "Image AI",
      "description": "Reverse image search tool associated with The Hive's image generation models",
      "user_agent": "ImagesiftBot",
      "respects_robots_txt": true,
      "documentation_url": "https://thehive.ai",
      "purpose": ["image_search", "image_generation"]
    },
    {
      "id": "kangaroo-bot",
      "name": "Kangaroo Bot",
      "operator": "Unknown",
      "category": "Unknown",
      "description": "AI-related crawler with undocumented purpose",
      "user_agent": "Kangaroo Bot",
      "respects_robots_txt": true,
      "documentation_url": null,
      "purpose": ["unknown"]
    },
    {
      "id": "diffbot",
      "name": "Diffbot",
      "operator": "Diffbot",
      "category": "Knowledge Graph",
      "description": "Extracts structured data from web pages using AI for knowledge graph building",
      "user_agent": "Diffbot",
      "respects_robots_txt": true,
      "documentation_url": "https://diffbot.com",
      "purpose": ["knowledge_graph", "data_extraction"]
    },
    {
      "id": "ai2bot",
      "name": "AI2Bot",
      "operator": "Allen Institute",
      "category": "AI Research",
      "description": "Crawls content for Allen Institute for AI's research projects",
      "user_agent": "AI2Bot",
      "respects_robots_txt": true,
      "documentation_url": "https://allenai.org",
      "purpose": ["research", "academic"]
    },
    {
      "id": "brightbot",
      "name": "Brightbot",
      "operator": "Bright Data",
      "category": "Data Collection",
      "description": "Web data collection for AI training and business intelligence",
      "user_agent": "Brightbot",
      "respects_robots_txt": true,
      "documentation_url": "https://brightdata.com",
      "purpose": ["data_collection", "business_intelligence"]
    },
    {
      "id": "firecrawlagent",
      "name": "FirecrawlAgent",
      "operator": "Firecrawl",
      "category": "Data Conversion",
      "description": "Converts websites into LLM-ready markdown or structured data",
      "user_agent": "FirecrawlAgent",
      "respects_robots_txt": true,
      "documentation_url": "https://firecrawl.dev",
      "purpose": ["data_conversion", "llm_ready"]
    },
    {
      "id": "runpod-bot",
      "name": "RunPod-Bot",
      "operator": "RunPod",
      "category": "AI Infrastructure",
      "description": "Supports RunPod's GPU cloud platform for AI workloads",
      "user_agent": "RunPod-Bot",
      "respects_robots_txt": true,
      "documentation_url": "https://runpod.io",
      "purpose": ["gpu_cloud", "ai_workloads"]
    },
    {
      "id": "bigsur-ai",
      "name": "bigsur.ai",
      "operator": "BigSur AI",
      "category": "AI Services",
      "description": "AI-related crawler for BigSur's machine learning services",
      "user_agent": "bigsur.ai",
      "respects_robots_txt": true,
      "documentation_url": "https://bigsur.ai",
      "purpose": ["ml_services", "ai_platform"]
    },
    {
      "id": "crawlspace",
      "name": "Crawlspace",
      "operator": "Unknown",
      "category": "Unknown",
      "description": "AI-related crawler with undocumented purpose",
      "user_agent": "Crawlspace",
      "respects_robots_txt": true,
      "documentation_url": null,
      "purpose": ["unknown"]
    },
    {
      "id": "devin",
      "name": "Devin",
      "operator": "Cognition AI",
      "category": "AI Developer",
      "description": "Powers Devin, the AI software engineer assistant",
      "user_agent": "Devin",
      "respects_robots_txt": true,
      "documentation_url": "https://cognition.ai",
      "purpose": ["ai_developer", "coding_assistant"]
    },
    {
      "id": "cotoyogi",
      "name": "Cotoyogi",
      "operator": "Unknown",
      "category": "Unknown",
      "description": "AI-related crawler with undocumented purpose",
      "user_agent": "Cotoyogi",
      "respects_robots_txt": true,
      "documentation_url": null,
      "purpose": ["unknown"]
    },
    {
      "id": "andibot",
      "name": "Andibot",
      "operator": "Andi",
      "category": "AI Search",
      "description": "Crawls content for Andi's conversational search engine",
      "user_agent": "Andibot",
      "respects_robots_txt": true,
      "documentation_url": "https://andisearch.com",
      "purpose": ["conversational_search", "ai_search"]
    },
    {
      "id": "chatgpt-browser",
      "name": "ChatGPT-Browser",
      "operator": "OpenAI",
      "category": "Legacy",
      "description": "Legacy crawler for ChatGPT's browsing capabilities",
      "user_agent": "ChatGPT-Browser",
      "respects_robots_txt": true,
      "documentation_url": "https://openai.com",
      "purpose": ["legacy", "browsing"]
    },
    {
      "id": "bard-ai",
      "name": "Bard-Ai",
      "operator": "Google",
      "category": "Legacy",
      "description": "Legacy crawler for Google's Bard AI (now Gemini)",
      "user_agent": "Bard-Ai",
      "respects_robots_txt": true,
      "documentation_url": "https://bard.google.com",
      "purpose": ["legacy", "replaced_by_gemini"]
    },
    {
      "id": "gemini-ai",
      "name": "Gemini-Ai",
      "operator": "Google",
      "category": "AI Assistant",
      "description": "Powers Google's Gemini AI assistant and chat features",
      "user_agent": "Gemini-Ai",
      "respects_robots_txt": true,
      "documentation_url": "https://gemini.google.com",
      "purpose": ["ai_assistant", "chat"]
    },
    {
      "id": "facebookbot",
      "name": "FacebookBot",
      "operator": "Meta",
      "category": "Social Media",
      "description": "General crawler for Facebook's content indexing and AI features",
      "user_agent": "FacebookBot",
      "respects_robots_txt": true,
      "documentation_url": "https://developers.facebook.com",
      "purpose": ["content_indexing", "ai_features"]
    },
    {
      "id": "linkedinbot",
      "name": "LinkedInBot",
      "operator": "LinkedIn",
      "category": "Professional Network",
      "description": "Indexes content for LinkedIn's professional networking and AI features",
      "user_agent": "LinkedInBot",
      "respects_robots_txt": true,
      "documentation_url": "https://linkedin.com",
      "purpose": ["professional_networking", "ai_features"]
    }
  ],
  "categories": {
    "AI Training": "Bots that collect data to train language models",
    "AI Search": "Bots that power real-time AI search results",
    "AI Browser": "Bots triggered by user interactions with AI assistants",
    "AI Research": "Bots that perform deep research and analysis",
    "AI Assistant": "Bots powering AI assistant interactions",
    "AI Services": "General AI service crawlers",
    "AI Platform": "Platform-specific AI crawlers",
    "AI Infrastructure": "Infrastructure and compute platform crawlers",
    "AI Characters": "AI character and personality bots",
    "AI Developer": "Developer and coding assistant bots",
    "Data Collection": "General data collection bots",
    "Data Conversion": "Data transformation and conversion bots",
    "Social Media": "Social media platform crawlers",
    "Professional Network": "Professional networking crawlers",
    "Decentralized Search": "Decentralized search engine crawlers",
    "Knowledge Graph": "Knowledge graph building crawlers",
    "Image AI": "Image-related AI crawlers",
    "Legacy": "Deprecated or replaced bots",
    "Unknown": "Bots with undocumented purposes"
  },
  "endpoints": {
    "full_list": "https://llmbottracker.com/api/bots.json",
    "by_operator": "https://llmbottracker.com/api/bots/{operator}.json",
    "by_category": "https://llmbottracker.com/api/bots/category/{category}.json",
    "latest_changes": "https://llmbottracker.com/api/bots/changes.json"
  },
  "mcp_config": {
    "name": "llm-bot-tracker",
    "version": "1.0.0",
    "description": "Access the comprehensive AI bot database",
    "resources": [
      {
        "uri": "https://llmbottracker.com/api/bots.json",
        "name": "AI Bot Database",
        "mimeType": "application/json",
        "description": "Complete list of 57 AI crawlers with metadata, user agents, and categorization"
      }
    ]
  }
}
