Scraping - AnyCrawl Docs

Turn a URL into structured data and LLM-ready data

Scrape

AnyCrawl scrapes a URL, turns it into structured data and LLM-ready data. It supports multiple engines, including Cheerio, Playwright, Puppeteer, and more. It also supports multiple output formats, including HTML, Markdown, JSON, and more.

Authorization

AuthorizationRequiredBearer <token>

JWT token for API authentication

In: header

Request Body

application/jsonRequired

template_idRequiredstring

Template ID to use

variablesobject

Template variables

urlstring

Optional URL when using template

Format: "uri"

Response Body

Scraping response format (HTTP 200) - can contain either successful or failed scraping results

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the scraping request was successful

Value in: true

dataRequiredobject

Standard error response format for validation errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed

Value in: false

errorRequiredstring

Error message

detailsRequiredobject

Validation error details

Unauthorized response format for authentication errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to authentication issues

Value in: false

errorRequiredstring

Authentication error message

Payment required response format with credit information

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to insufficient credits

Value in: false

errorRequiredstring

Error message

current_creditsRequirednumber

Current credit balance of the user

Internal server error response format

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to server error

Value in: false

errorRequiredstring

Server error message

messageRequiredstring

Detailed error message describing what went wrong

curl -X POST "https://api.anycrawl.dev/v1/scrape" \
  -H "Authorization: Bearer <token>" \
  -H "Content-Type: application/json" \
  -d '{
    "template_id": "string",
    "variables": {
      "property1": null,
      "property2": null
    },
    "url": "http://example.com"
  }'

const body = JSON.stringify({
  "template_id": "string",
  "variables": {
    "property1": null,
    "property2": null
  },
  "url": "http://example.com"
})

fetch("https://api.anycrawl.dev/v1/scrape", {
  headers: {
    "Authorization": "Bearer <token>"
  },
  body
})

package main

import (
  "fmt"
  "net/http"
  "io/ioutil"
  "strings"
)

func main() {
  url := "https://api.anycrawl.dev/v1/scrape"
  body := strings.NewReader(`{
    "template_id": "string",
    "variables": {
      "property1": null,
      "property2": null
    },
    "url": "http://example.com"
  }`)
  req, _ := http.NewRequest("POST", url, body)
  req.Header.Add("Authorization", "Bearer <token>")
  req.Header.Add("Content-Type", "application/json")
  res, _ := http.DefaultClient.Do(req)
  defer res.Body.Close()
  body, _ := ioutil.ReadAll(res.Body)

  fmt.Println(res)
  fmt.Println(string(body))
}

import requests

url = "https://api.anycrawl.dev/v1/scrape"
body = {
  "template_id": "string",
  "variables": {
    "property1": null,
    "property2": null
  },
  "url": "http://example.com"
}
response = requests.request("POST", url, json = body, headers = {
  "Authorization": "Bearer <token>",
  "Content-Type": "application/json"
})

print(response.text)

{
  "success": true,
  "data": {
    "url": "https://httpstat.us/200",
    "status": "completed",
    "jobId": "7a2e165d-8f81-4be6-9ef7-23222330a396",
    "title": "",
    "html": "200 OK",
    "markdown": "200 OK",
    "metadata": [],
    "cachedAt": "2026-02-08T12:34:56.000Z",
    "maxAge": 172800000,
    "timestamp": "2025-05-25T07:56:44.162Z"
  }
}

{
  "success": false,
  "error": "Validation error",
  "details": {
    "issues": [
      {
        "field": "engine",
        "message": "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'",
        "code": "invalid_enum_value"
      }
    ],
    "messages": [
      "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'"
    ]
  }
}

{
  "success": false,
  "error": "Invalid API key"
}

{
  "success": false,
  "error": "Insufficient credits",
  "current_credits": -2
}

{
  "success": false,
  "error": "Internal server error",
  "message": "Job 0ae56ed9-d9a9-4998-aea9-2ff5b51b2e4e timed out after 30000 seconds"
}