$API_TOKEN=<YOUR_API_TOKEN>
$cat > input.json << 'EOF'
<{
< "startUrls": [
< {
< "url": "https://crawlee.dev/js"
< }
< ],
< "respectRobotsTxtFile": true,
< "globs": [
< {
< "glob": "https://crawlee.dev/js/*/*"
< }
< ],
< "pseudoUrls": [],
< "excludes": [
< {
< "glob": "/**/*.{png,jpg,jpeg,pdf}"
< }
< ],
< "linkSelector": "a[href]",
< "pageFunction": "async function pageFunction(context) {\n const { window, request, log } = context;\n\n // The \"window\" property contains the JSDOM object which is useful\n // for querying DOM elements and extracting data from them.\n const pageTitle = window.document.title;\n\n // The \"request\" property contains various information about the web page loaded. \n const url = request.url;\n \n // Use \"log\" object to print information to Actor log.\n log.info('Page scraped', { url, pageTitle });\n\n // Return an object with the data extracted from the page.\n // It will be stored to the resulting dataset.\n return {\n url,\n pageTitle\n };\n}",
< "proxyConfiguration": {
< "useApifyProxy": true
< },
< "initialCookies": [],
< "additionalMimeTypes": [],
< "preNavigationHooks": "// We need to return array of (possibly async) functions here.\n// The functions accept two arguments: the \"crawlingContext\" object\n// and \"requestAsBrowserOptions\" which are passed to the `requestAsBrowser()`\n// function the crawler calls to navigate..\n[\n async (crawlingContext, requestAsBrowserOptions) => {\n // ...\n }\n]",
< "postNavigationHooks": "// We need to return array of (possibly async) functions here.\n// The functions accept a single argument: the \"crawlingContext\" object.\n[\n async (crawlingContext) => {\n // ...\n },\n]",
< "customData": {}
<}
<EOF
$curl "https://api.apify.com/v2/acts/apify~jsdom-scraper/runs?token=$API_TOKEN" \
< -X POST \
< -d @input.json \
< -H 'Content-Type: application/json'