This endpoint allows you to create and run a PDF scraper.
You can retrieve your token by visiting the API Tokens section inside your profile page or see /documentation/api-token for the details.
The name of the scraping task.
"PDF Scraper"
The each keywords to search PDF files.
[
{
"keyword": "Architectural plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
},
{
"keyword": "Floor plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
},
{
"keyword": "Building layout PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
]Option to not include previously scraped PDF files.
true
The total expected PDF files results. This will automatically add the alternative keywords needed to achieve the expected results.
200
"Scraping queued successfully"
{
"id": 3669,
"name": "PDF Scraper",
"url": ["Default"],
"urls": ["Default"],
"scheduled": false,
"schedule": null,
"created_at": "2024-10-15T07:38:15.000000Z",
"updated_at": "2024-10-15T07:38:15.000000Z"
}[
{
"scraping_run_id": 375965,
"user_id": 26,
"scraper_name": "PDF Scraper",
"scrapped_url": "Architectural plan PDF",
"scraped_url": "Architectural plan PDF",
"scraper_id": 3669,
"status": "running",
"updated_at": "2024-09-20T06:50:17.000000Z",
"created_at": "2024-09-20T06:50:16.000000Z",
"id": 1169956,
"scraper": {
"id": 3669,
"user_id": 26,
"sharing": false,
"share_uuid": null,
"name": "PDF Scraper",
"type": "pdf",
"urls": "Default",
"pdf_urls": "Default",
"ai_prompt": null,
"ai_scope": null,
"headers": null,
"cookies": null,
"user_agent": null,
"disabled_resources": null,
"delay": 0,
"html_wanted": false,
"screenshot_wanted": false,
"screenshot_type": null,
"locale": null,
"scheduled": false,
"cron": null,
"cron_timezone": "UTC",
"paginate": 0,
"pagination_type": null,
"infinite_pagination_type": null,
"infinite_pagination_seconds": null,
"infinite_pagination_text": null,
"infinite_pagination_css_selector": null,
"infinite_pagination_n_selector": null,
"load_more_selector": null,
"pagination_query_parameter": null,
"pagination_next_page_selector": null,
"pagination_limit_type": null,
"pagination_max_page": null,
"max_next_page": null,
"pagination_max_variable": null,
"created_at": "2024-09-20T06:50:16.000000Z",
"updated_at": "2024-09-20T06:50:16.000000Z",
"cron_minutes": "*",
"cron_minutes_n_detail": null,
"cron_minutes_x_detail": null,
"cron_hours": "*",
"cron_hours_n_detail": null,
"cron_hours_x_detail": null,
"cron_day_of_month": "*",
"cron_day_of_month_x_detail": null,
"cron_month": "*",
"cron_month_x_detail": null,
"cron_day_of_week": "*",
"cron_day_of_week_x_detail": null,
"click_action_enabled": false,
"click_action_selector": null,
"click_action_wait": "no",
"workflow": [
{
"type": "options",
"data": {
"keywords": [
{
"type": "keyword",
"data": {
"keyword": "Architectural plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Floor plan PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
},
{
"type": "keyword",
"data": {
"keyword": "Building layout PDF",
"prompt": "Return 'true' if the most of the image are architectural sketches, plan, blueprint, drawing. Return 'false' if most of the image are just text documents or other"
}
}
]
}
}
],
"version": 2,
"proxy_type": "rotation",
"proxy_host": null,
"proxy_port": null,
"proxy_username": null,
"proxy_password": null,
"parsers": null,
"deleted_at": null
}
}
]