AnyCrawl

Crawl

Crawl a site and aggregate per-page scraping outputs

Create Crawl task

Start a site crawl job. The job runs asynchronously and returns a job_id for polling.

POST
/v1/crawl

Authorization

AuthorizationRequiredBearer <token>

JWT token for API authentication

In: header

Request Body

application/jsonRequired
urlRequiredstring

Seed URL to start crawling

Format: "uri"
engineRequiredstring

The scraping engine used for each crawled page

Value in: "playwright" | "cheerio" | "puppeteer"
proxystring
Format: "uri"
formatsarray<string>
Default: ["markdown"]
timeoutnumber
Default: 300000Minimum: 1000Maximum: 600000
wait_fornumber
Minimum: 1Maximum: 60000
retryboolean
Default: false
include_tagsarray<string>
exclude_tagsarray<string>
json_optionsobject
extract_sourcestring
Default: "markdown"Value in: "html" | "markdown"
scrape_optionsobject

Per-page scraping options applied during crawling

exclude_pathsarray<string>

Glob patterns or path prefixes to exclude from crawling

include_pathsarray<string>

Glob patterns or path prefixes to include for crawling (applied after exclusion rules)

scrape_pathsarray<string>

Glob patterns or path prefixes for content extraction. Only URLs matching these patterns will have content extracted and saved. If not specified, all included URLs will be scraped (default behavior)

max_depthnumber

Maximum crawl depth from the seed URL

Default: 10Minimum: 1Maximum: 50
strategystring

Crawl scope strategy

Default: "same-domain"Value in: "all" | "same-domain" | "same-hostname" | "same-origin"
limitnumber

Maximum number of pages to crawl

Default: 100Minimum: 1Maximum: 50000

Response Body

Crawl job creation response (HTTP 200)

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the crawl job was accepted and queued

Value in: true
dataRequiredobject

Standard error response format for validation errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed

Value in: false
errorRequiredstring

Error message

detailsRequiredobject

Validation error details

Unauthorized response format for authentication errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to authentication issues

Value in: false
errorRequiredstring

Authentication error message

Payment required response format with credit information

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to insufficient credits

Value in: false
errorRequiredstring

Error message

current_creditsRequirednumber

Current credit balance of the user

Internal server error response format

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to server error

Value in: false
errorRequiredstring

Server error message

messageRequiredstring

Detailed error message describing what went wrong

curl -X POST "https://api.anycrawl.dev/v1/crawl" \
  -H "Authorization: Bearer <token>" \
  -H "Content-Type: application/json" \
  -d '{
    "url": "https://anycrawl.dev",
    "engine": "cheerio"
  }'
const body = JSON.stringify({
  "url": "https://anycrawl.dev",
  "engine": "cheerio"
})

fetch("https://api.anycrawl.dev/v1/crawl", {
  headers: {
    "Authorization": "Bearer <token>"
  },
  body
})
package main

import (
  "fmt"
  "net/http"
  "io/ioutil"
  "strings"
)

func main() {
  url := "https://api.anycrawl.dev/v1/crawl"
  body := strings.NewReader(`{
    "url": "https://anycrawl.dev",
    "engine": "cheerio"
  }`)
  req, _ := http.NewRequest("POST", url, body)
  req.Header.Add("Authorization", "Bearer <token>")
  req.Header.Add("Content-Type", "application/json")
  res, _ := http.DefaultClient.Do(req)
  defer res.Body.Close()
  body, _ := ioutil.ReadAll(res.Body)

  fmt.Println(res)
  fmt.Println(string(body))
}
import requests

url = "https://api.anycrawl.dev/v1/crawl"
body = {
  "url": "https://anycrawl.dev",
  "engine": "cheerio"
}
response = requests.request("POST", url, json = body, headers = {
  "Authorization": "Bearer <token>",
  "Content-Type": "application/json"
})

print(response.text)
{
  "success": true,
  "data": {
    "job_id": "7a2e165d-8f81-4be6-9ef7-23222330a396",
    "status": "created",
    "message": "Crawl job has been queued for processing"
  }
}
{
  "success": false,
  "error": "Validation error",
  "details": {
    "issues": [
      {
        "field": "engine",
        "message": "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'",
        "code": "invalid_enum_value"
      }
    ],
    "messages": [
      "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'"
    ]
  }
}
{
  "success": false,
  "error": "Invalid API key"
}
{
  "success": false,
  "error": "Insufficient credits",
  "current_credits": -2
}
{
  "success": false,
  "error": "Internal server error",
  "message": "Job 0ae56ed9-d9a9-4998-aea9-2ff5b51b2e4e timed out after 30000 seconds"
}

Check Crawl status

Get the current status of a crawl job

GET
/v1/crawl/{jobId}/status

Authorization

AuthorizationRequiredBearer <token>

JWT token for API authentication

In: header

Path Parameters

jobIdRequiredstring

The crawl job ID

Format: "uuid"

Response Body

Crawl job status response (HTTP 200)

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean
Value in: true
messageRequiredstring

Status message

dataRequiredobject

Standard error response format for validation errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed

Value in: false
errorRequiredstring

Error message

detailsRequiredobject

Validation error details

Unauthorized response format for authentication errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to authentication issues

Value in: false
errorRequiredstring

Authentication error message

Internal server error response format

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to server error

Value in: false
errorRequiredstring

Server error message

messageRequiredstring

Detailed error message describing what went wrong

curl -X GET "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08/status" \
  -H "Authorization: Bearer <token>"
fetch("https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08/status", {
  headers: {
    "Authorization": "Bearer <token>"
  }
})
package main

import (
  "fmt"
  "net/http"
  "io/ioutil"
)

func main() {
  url := "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08/status"

  req, _ := http.NewRequest("GET", url, nil)
  req.Header.Add("Authorization", "Bearer <token>")
  res, _ := http.DefaultClient.Do(req)
  defer res.Body.Close()
  body, _ := ioutil.ReadAll(res.Body)

  fmt.Println(res)
  fmt.Println(string(body))
}
import requests

url = "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08/status"

response = requests.request("GET", url, headers = {
  "Authorization": "Bearer <token>"
})

print(response.text)
{
  "success": true,
  "message": "Job status retrieved successfully",
  "data": {
    "job_id": "453bd7d7-5355-4d6d-a38e-d9e7eb218c3f",
    "status": "pending",
    "start_time": "2025-05-25T07:56:44.162Z",
    "expires_at": "2025-05-26T07:56:44.162Z",
    "credits_used": 0,
    "total": 120,
    "completed": 30,
    "failed": 2
  }
}
{
  "success": false,
  "error": "Validation error",
  "details": {
    "issues": [
      {
        "field": "engine",
        "message": "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'",
        "code": "invalid_enum_value"
      }
    ],
    "messages": [
      "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'"
    ]
  }
}
{
  "success": false,
  "error": "Invalid API key"
}
{
  "success": false,
  "error": "Internal server error",
  "message": "Job 0ae56ed9-d9a9-4998-aea9-2ff5b51b2e4e timed out after 30000 seconds"
}

Get Crawl results

Get crawl results (paginated via skip query param).

GET
/v1/crawl/{jobId}

Authorization

AuthorizationRequiredBearer <token>

JWT token for API authentication

In: header

Path Parameters

jobIdRequiredstring

The crawl job ID

Format: "uuid"

Query Parameters

skipinteger

Number of results to skip (page offset)

Minimum: 0

Response Body

Crawl job results (paginated) response (HTTP 200)

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean
Value in: true
statusRequiredstring
Value in: "pending" | "completed" | "failed" | "cancelled"
totalRequirednumber
completedRequirednumber
creditsUsedRequirednumber
nextstring | null | null

Next page URL if more results are available

Format: "uri"
dataRequiredarray<unknown>

Array of per-page scraping results produced by the crawl

Standard error response format for validation errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed

Value in: false
errorRequiredstring

Error message

detailsRequiredobject

Validation error details

Unauthorized response format for authentication errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to authentication issues

Value in: false
errorRequiredstring

Authentication error message

Internal server error response format

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to server error

Value in: false
errorRequiredstring

Server error message

messageRequiredstring

Detailed error message describing what went wrong

curl -X GET "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08?skip=0" \
  -H "Authorization: Bearer <token>"
fetch("https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08?skip=0", {
  headers: {
    "Authorization": "Bearer <token>"
  }
})
package main

import (
  "fmt"
  "net/http"
  "io/ioutil"
)

func main() {
  url := "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08?skip=0"

  req, _ := http.NewRequest("GET", url, nil)
  req.Header.Add("Authorization", "Bearer <token>")
  res, _ := http.DefaultClient.Do(req)
  defer res.Body.Close()
  body, _ := ioutil.ReadAll(res.Body)

  fmt.Println(res)
  fmt.Println(string(body))
}
import requests

url = "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08?skip=0"

response = requests.request("GET", url, headers = {
  "Authorization": "Bearer <token>"
})

print(response.text)
{
  "success": true,
  "status": "pending",
  "total": 120,
  "completed": 30,
  "creditsUsed": 12,
  "next": "https://api.anycrawl.dev/v1/crawl/7a2e165d-8f81-4be6-9ef7-23222330a396?skip=100",
  "data": [
    null
  ]
}
{
  "success": false,
  "error": "Validation error",
  "details": {
    "issues": [
      {
        "field": "engine",
        "message": "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'",
        "code": "invalid_enum_value"
      }
    ],
    "messages": [
      "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'"
    ]
  }
}
{
  "success": false,
  "error": "Invalid API key"
}
{
  "success": false,
  "error": "Internal server error",
  "message": "Job 0ae56ed9-d9a9-4998-aea9-2ff5b51b2e4e timed out after 30000 seconds"
}

Cancel crawl

Cancel a pending crawl job

DELETE
/v1/crawl/{jobId}

Authorization

AuthorizationRequiredBearer <token>

JWT token for API authentication

In: header

Path Parameters

jobIdRequiredstring

The crawl job ID

Format: "uuid"

Response Body

Crawl job cancellation response (HTTP 200)

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean
Value in: true
messageRequiredstring
dataRequiredobject

Standard error response format for validation errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed

Value in: false
errorRequiredstring

Error message

detailsRequiredobject

Validation error details

Unauthorized response format for authentication errors

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to authentication issues

Value in: false
errorRequiredstring

Authentication error message

Internal server error response format

TypeScript Definitions

Use the response body type in TypeScript.

successRequiredboolean

Indicates the request failed due to server error

Value in: false
errorRequiredstring

Server error message

messageRequiredstring

Detailed error message describing what went wrong

curl -X DELETE "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08" \
  -H "Authorization: Bearer <token>"
fetch("https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08", {
  headers: {
    "Authorization": "Bearer <token>"
  }
})
package main

import (
  "fmt"
  "net/http"
  "io/ioutil"
)

func main() {
  url := "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08"

  req, _ := http.NewRequest("DELETE", url, nil)
  req.Header.Add("Authorization", "Bearer <token>")
  res, _ := http.DefaultClient.Do(req)
  defer res.Body.Close()
  body, _ := ioutil.ReadAll(res.Body)

  fmt.Println(res)
  fmt.Println(string(body))
}
import requests

url = "https://api.anycrawl.dev/v1/crawl/497f6eca-6276-4993-bfeb-53cbbbba6f08"

response = requests.request("DELETE", url, headers = {
  "Authorization": "Bearer <token>"
})

print(response.text)
{
  "success": true,
  "message": "Job cancelled successfully",
  "data": {
    "job_id": "453bd7d7-5355-4d6d-a38e-d9e7eb218c3f",
    "status": "cancelled"
  }
}
{
  "success": false,
  "error": "Validation error",
  "details": {
    "issues": [
      {
        "field": "engine",
        "message": "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'",
        "code": "invalid_enum_value"
      }
    ],
    "messages": [
      "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'"
    ]
  }
}
{
  "success": false,
  "error": "Invalid API key"
}
{
  "success": false,
  "error": "Internal server error",
  "message": "Job 0ae56ed9-d9a9-4998-aea9-2ff5b51b2e4e timed out after 30000 seconds"
}