$API_TOKEN=<YOUR_API_TOKEN>
$cat > input.json << 'EOF'
<{
< "startUrls": [
< {
< "url": "https://crawlee.dev/js"
< }
< ],
< "globs": [
< {
< "glob": "https://crawlee.dev/js/*/*"
< }
< ],
< "pseudoUrls": [],
< "excludes": [
< {
< "glob": "/**/*.{png,jpg,jpeg,pdf}"
< }
< ],
< "linkSelector": "a",
< "respectRobotsTxtFile": true,
< "pageFunction": "async function pageFunction(context) {\n const { page, request, log } = context;\n const title = await page.title();\n log.info(`URL: ${request.url} TITLE: ${title}`);\n return {\n url: request.url,\n title\n };\n}",
< "proxyConfiguration": {
< "useApifyProxy": true
< },
< "initialCookies": [],
< "waitUntil": [
< "networkidle2"
< ],
< "preNavigationHooks": "// We need to return array of (possibly async) functions here.\n// The functions accept two arguments: the \"crawlingContext\" object\n// and \"gotoOptions\".\n[\n async (crawlingContext, gotoOptions) => {\n const { page } = crawlingContext;\n // ...\n },\n]",
< "postNavigationHooks": "// We need to return array of (possibly async) functions here.\n// The functions accept a single argument: the \"crawlingContext\" object.\n[\n async (crawlingContext) => {\n const { page } = crawlingContext;\n // ...\n },\n]",
< "customData": {}
<}
<EOF
$curl "https://api.apify.com/v2/acts/apify~puppeteer-scraper/runs?token=$API_TOKEN" \
< -X POST \
< -d @input.json \
< -H 'Content-Type: application/json'