Scraping
Turn a URL into structured data and LLM-ready data
Scrape
AnyCrawl scrapes a URL, turns it into structured data and LLM-ready data. It supports multiple engines, including Cheerio, Playwright, Puppeteer, and more. It also supports multiple output formats, including HTML, Markdown, JSON, and more.
Authorization
AuthorizationRequiredBearer <token>JWT token for API authentication
In: header
Request Body
application/jsonRequiredtemplate_idRequiredstringTemplate ID to use
variablesobjectTemplate variables
urlstringOptional URL when using template
"uri"Response Body
Scraping response format (HTTP 200) - can contain either successful or failed scraping results
TypeScript Definitions
Use the response body type in TypeScript.
successRequiredbooleanIndicates the scraping request was successful
truedataRequiredobjectStandard error response format for validation errors
TypeScript Definitions
Use the response body type in TypeScript.
successRequiredbooleanIndicates the request failed
falseerrorRequiredstringError message
detailsRequiredobjectValidation error details
Unauthorized response format for authentication errors
TypeScript Definitions
Use the response body type in TypeScript.
successRequiredbooleanIndicates the request failed due to authentication issues
falseerrorRequiredstringAuthentication error message
Payment required response format with credit information
TypeScript Definitions
Use the response body type in TypeScript.
successRequiredbooleanIndicates the request failed due to insufficient credits
falseerrorRequiredstringError message
current_creditsRequirednumberCurrent credit balance of the user
Internal server error response format
TypeScript Definitions
Use the response body type in TypeScript.
successRequiredbooleanIndicates the request failed due to server error
falseerrorRequiredstringServer error message
messageRequiredstringDetailed error message describing what went wrong
curl -X POST "https://api.anycrawl.dev/v1/scrape" \
-H "Authorization: Bearer <token>" \
-H "Content-Type: application/json" \
-d '{
"template_id": "string",
"variables": {
"property1": null,
"property2": null
},
"url": "http://example.com"
}'const body = JSON.stringify({
"template_id": "string",
"variables": {
"property1": null,
"property2": null
},
"url": "http://example.com"
})
fetch("https://api.anycrawl.dev/v1/scrape", {
headers: {
"Authorization": "Bearer <token>"
},
body
})package main
import (
"fmt"
"net/http"
"io/ioutil"
"strings"
)
func main() {
url := "https://api.anycrawl.dev/v1/scrape"
body := strings.NewReader(`{
"template_id": "string",
"variables": {
"property1": null,
"property2": null
},
"url": "http://example.com"
}`)
req, _ := http.NewRequest("POST", url, body)
req.Header.Add("Authorization", "Bearer <token>")
req.Header.Add("Content-Type", "application/json")
res, _ := http.DefaultClient.Do(req)
defer res.Body.Close()
body, _ := ioutil.ReadAll(res.Body)
fmt.Println(res)
fmt.Println(string(body))
}import requests
url = "https://api.anycrawl.dev/v1/scrape"
body = {
"template_id": "string",
"variables": {
"property1": null,
"property2": null
},
"url": "http://example.com"
}
response = requests.request("POST", url, json = body, headers = {
"Authorization": "Bearer <token>",
"Content-Type": "application/json"
})
print(response.text){
"success": true,
"data": {
"url": "https://httpstat.us/200",
"status": "completed",
"jobId": "7a2e165d-8f81-4be6-9ef7-23222330a396",
"title": "",
"html": "200 OK",
"markdown": "200 OK",
"metadata": [],
"cachedAt": "2026-02-08T12:34:56.000Z",
"maxAge": 172800000,
"timestamp": "2025-05-25T07:56:44.162Z"
}
}{
"success": false,
"error": "Validation error",
"details": {
"issues": [
{
"field": "engine",
"message": "Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'",
"code": "invalid_enum_value"
}
],
"messages": [
"Invalid enum value. Expected 'playwright' | 'cheerio' | 'puppeteer', received 'cheeri1o'"
]
}
}{
"success": false,
"error": "Invalid API key"
}{
"success": false,
"error": "Insufficient credits",
"current_credits": -2
}{
"success": false,
"error": "Internal server error",
"message": "Job 0ae56ed9-d9a9-4998-aea9-2ff5b51b2e4e timed out after 30000 seconds"
}