$API_TOKEN=<YOUR_API_TOKEN>
$cat > input.json << 'EOF'
<{
< "runMode": "DEVELOPMENT",
< "startUrls": [
< {
< "url": "https://crawlee.dev/js"
< }
< ],
< "respectRobotsTxtFile": true,
< "linkSelector": "a[href]",
< "globs": [
< {
< "glob": "https://crawlee.dev/js/*/*"
< }
< ],
< "pseudoUrls": [],
< "excludes": [
< {
< "glob": "/**/*.{png,jpg,jpeg,pdf}"
< }
< ],
< "pageFunction": "// The function accepts a single argument: the \"context\" object.\n// For a complete list of its properties and functions,\n// see https://apify.com/apify/web-scraper#page-function \nasync function pageFunction(context) {\n // This statement works as a breakpoint when you're trying to debug your code. Works only with Run mode: DEVELOPMENT!\n // debugger; \n\n // jQuery is handy for finding DOM elements and extracting data from them.\n // To use it, make sure to enable the \"Inject jQuery\" option.\n const $ = context.jQuery;\n const pageTitle = $('title').first().text();\n const h1 = $('h1').first().text();\n const first_h2 = $('h2').first().text();\n const random_text_from_the_page = $('p').first().text();\n\n\n // Print some information to Actor log\n context.log.info(`URL: ${context.request.url}, TITLE: ${pageTitle}`);\n\n // Manually add a new page to the queue for scraping.\n await context.enqueueRequest({ url: 'http://www.example.com' });\n\n // Return an object with the data extracted from the page.\n // It will be stored to the resulting dataset.\n return {\n url: context.request.url,\n pageTitle,\n h1,\n first_h2,\n random_text_from_the_page\n };\n}",
< "proxyConfiguration": {
< "useApifyProxy": true
< },
< "initialCookies": [],
< "waitUntil": [
< "networkidle2"
< ],
< "preNavigationHooks": "// We need to return array of (possibly async) functions here.\n// The functions accept two arguments: the \"crawlingContext\" object\n// and \"gotoOptions\".\n[\n async (crawlingContext, gotoOptions) => {\n // ...\n },\n]\n",
< "postNavigationHooks": "// We need to return array of (possibly async) functions here.\n// The functions accept a single argument: the \"crawlingContext\" object.\n[\n async (crawlingContext) => {\n // ...\n },\n]",
< "breakpointLocation": "NONE",
< "customData": {}
<}
<EOF
$curl "https://api.apify.com/v2/acts/apify~web-scraper/runs?token=$API_TOKEN" \
< -X POST \
< -d @input.json \
< -H 'Content-Type: application/json'