API Reference
Endpoint Examples
- Account Summary
- Results
- Scrapers
- Scraping Runs
- AI Scraper
- X (Twitter) Scraper
- Google Scraper
- Job Board Scraper
- Facebook Marketplace Scraper
- PDF Scraper
- Realestate.com.au Scraper
- Shopee Scraper
PDF Scraper
Create and run a PDF scraper
This endpoint allows you to create and run a PDF scraper.
POST
/
api
/
scrapers
/
pdf
/
create-and-run
curl --request POST \
--url https://app.mrscraper.com/api/scrapers/pdf/create-and-run \
--header 'Authorization: Bearer <token>' \
--header 'Content-Type: application/json' \
--data '{
"name": "PDF Scraper",
"unique": true,
"expected_pdf": 200,
"keywords": [
{
"keyword": "Architectural plan PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
},
{
"keyword": "Floor plan PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
},
{
"keyword": "Building layout PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
}
]
}'
{
"message": "Scraping queued successfully",
"scraper": {
"id": 3669,
"name": "Test PDF Scraper",
"url": [
"Default"
],
"urls": [
"Default"
],
"scheduled": false,
"schedule": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z"
},
"results": [
{
"scraping_run_id": 375965,
"user_id": 5573,
"scraper_name": "Test PDF Scraper",
"scrapped_url": "Architectural plan PDF",
"scraped_url": "Architectural plan PDF",
"scraper_id": 3669,
"status": "running",
"updated_at": "2024-10-15T07:38:15.000000Z",
"created_at": "2024-10-15T07:38:15.000000Z",
"id": 1169956,
"scraper": {
"id": 3669,
"user_id": 5573,
"sharing": false,
"share_uuid": null,
"name": "Test PDF Scraper",
"type": "pdf",
"urls": "Default",
"pdf_urls": "Default",
"ai_prompt": null,
"ai_scope": null,
"headers": null,
"cookies": null,
"user_agent": null,
"disabled_resources": null,
"delay": 0,
"html_wanted": false,
"screenshot_wanted": false,
"screenshot_type": null,
"locale": null,
"scheduled": false,
"cron": null,
"cron_timezone": "UTC",
"paginate": 0,
"pagination_type": null,
"infinite_pagination_type": null,
"infinite_pagination_seconds": null,
"infinite_pagination_text": null,
"infinite_pagination_css_selector": null,
"infinite_pagination_n_selector": null,
"load_more_selector": null,
"pagination_query_parameter": null,
"pagination_next_page_selector": null,
"pagination_limit_type": null,
"pagination_max_page": null,
"max_next_page": null,
"pagination_max_variable": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z",
"cron_minutes": "*",
"cron_minutes_n_detail": null,
"cron_minutes_x_detail": null,
"cron_hours": "*",
"cron_hours_n_detail": null,
"cron_hours_x_detail": null,
"cron_day_of_month": "*",
"cron_day_of_month_x_detail": null,
"cron_month": "*",
"cron_month_x_detail": null,
"cron_day_of_week": "*",
"cron_day_of_week_x_detail": null,
"click_action_enabled": false,
"click_action_selector": null,
"click_action_wait": "no",
"workflow": [
{
"type": "options",
"data": {
"keywords": [
{
"type": "keyword",
"data": {
"keyword": "Architectural plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Floor plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Building layout PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
}
]
}
}
],
"version": 2,
"proxy_type": "rotation",
"proxy_host": null,
"proxy_port": null,
"proxy_username": null,
"proxy_password": null,
"parsers": null,
"deleted_at": null,
"external_auth": null
}
}
]
}
Authorizations
You can retrieve your token by visiting the API Tokens section inside your profile page or see https://docs.mrscraper.com/documentation/api-token for the details.
Body
application/json
Response
200 - application/json
The response is of type object
.
curl --request POST \
--url https://app.mrscraper.com/api/scrapers/pdf/create-and-run \
--header 'Authorization: Bearer <token>' \
--header 'Content-Type: application/json' \
--data '{
"name": "PDF Scraper",
"unique": true,
"expected_pdf": 200,
"keywords": [
{
"keyword": "Architectural plan PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
},
{
"keyword": "Floor plan PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
},
{
"keyword": "Building layout PDF",
"prompt": "Return '\''true'\'' if the most of the image are architectural sketches, plan, blueprint, drawing. Return '\''false'\'' if most of the image are just text documents or other"
}
]
}'
{
"message": "Scraping queued successfully",
"scraper": {
"id": 3669,
"name": "Test PDF Scraper",
"url": [
"Default"
],
"urls": [
"Default"
],
"scheduled": false,
"schedule": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z"
},
"results": [
{
"scraping_run_id": 375965,
"user_id": 5573,
"scraper_name": "Test PDF Scraper",
"scrapped_url": "Architectural plan PDF",
"scraped_url": "Architectural plan PDF",
"scraper_id": 3669,
"status": "running",
"updated_at": "2024-10-15T07:38:15.000000Z",
"created_at": "2024-10-15T07:38:15.000000Z",
"id": 1169956,
"scraper": {
"id": 3669,
"user_id": 5573,
"sharing": false,
"share_uuid": null,
"name": "Test PDF Scraper",
"type": "pdf",
"urls": "Default",
"pdf_urls": "Default",
"ai_prompt": null,
"ai_scope": null,
"headers": null,
"cookies": null,
"user_agent": null,
"disabled_resources": null,
"delay": 0,
"html_wanted": false,
"screenshot_wanted": false,
"screenshot_type": null,
"locale": null,
"scheduled": false,
"cron": null,
"cron_timezone": "UTC",
"paginate": 0,
"pagination_type": null,
"infinite_pagination_type": null,
"infinite_pagination_seconds": null,
"infinite_pagination_text": null,
"infinite_pagination_css_selector": null,
"infinite_pagination_n_selector": null,
"load_more_selector": null,
"pagination_query_parameter": null,
"pagination_next_page_selector": null,
"pagination_limit_type": null,
"pagination_max_page": null,
"max_next_page": null,
"pagination_max_variable": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z",
"cron_minutes": "*",
"cron_minutes_n_detail": null,
"cron_minutes_x_detail": null,
"cron_hours": "*",
"cron_hours_n_detail": null,
"cron_hours_x_detail": null,
"cron_day_of_month": "*",
"cron_day_of_month_x_detail": null,
"cron_month": "*",
"cron_month_x_detail": null,
"cron_day_of_week": "*",
"cron_day_of_week_x_detail": null,
"click_action_enabled": false,
"click_action_selector": null,
"click_action_wait": "no",
"workflow": [
{
"type": "options",
"data": {
"keywords": [
{
"type": "keyword",
"data": {
"keyword": "Architectural plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Floor plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Building layout PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
}
]
}
}
],
"version": 2,
"proxy_type": "rotation",
"proxy_host": null,
"proxy_port": null,
"proxy_username": null,
"proxy_password": null,
"parsers": null,
"deleted_at": null,
"external_auth": null
}
}
]
}