Actor picture

Probe Page Resources

jancurn/probe-page-resources

Sequentially loads a list of URLs in headless Chrome and analyzes HTTP resources requested by each page. Source code at https://github.com/jancurn/act-probe-page-resources

No credit card required

Author's avatarJan Čurn
  • Modified
  • Users27
  • Runs839
Actor picture
Probe Page Resources

Dockerfile

# This is a template for a Dockerfile used to run acts in Actor system.
# The base image name below is set during the act build, based on user settings.
# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
FROM apify/actor-node-puppeteer-chrome

# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
 && npm install --only=prod --no-optional \
 && echo "Installed NPM packages:" \
 && (npm list --all || true) \
 && echo "Node.js version:" \
 && node --version \
 && echo "NPM version:" \
 && npm --version

# Copy source code to container
# Do this in the last step, to have fast build if only the source code changed
COPY --chown=myuser:myuser . ./

# NOTE: The CMD is already defined by the base image.
# Uncomment this for local node inspector debugging:
# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

main.js

This file is 139 lines long. Only the first 50 are shown. Show all

const chromeLauncher = require('chrome-launcher');
const CDP = require('chrome-remote-interface');
const _ = require('underscore');
const Apify = require('apify');
const typeCheck = require('type-check').typeCheck;


// Definition of the input
const INPUT_TYPE = `{
    urls: [String],
    waitSecs: Maybe Number,
    verboseLog: Maybe Boolean,
    headers: Maybe Object     
}`;


Apify.main(async () => {
    // Fetch and check the input
    const input = await Apify.getValue('INPUT');
    if (!typeCheck(INPUT_TYPE, input)) {
        console.log('Expected input:');
        console.log(INPUT_TYPE);
        console.log('Received input:');
        console.dir(input);
        throw new Error('Received invalid input');
    }

    // Launch Chrome
    const chrome = await launchChrome({
        headless: !!process.env.APIFY_HEADLESS,
        verboseLog: input.verboseLog
    });
    const client = await CDP({ port: chrome.port });

    let currentResult = null;

    // Extract domains
    const { Network, Page } = client;

    // Add HTTP headers
    if (input.headers) {
        await Network.setExtraHTTPHeaders({ headers: input.headers });
        if (input.headers['User-Agent']) await Network.setUserAgentOverride({ userAgent: input.headers['User-Agent'] });
    }

    // Setup event handlers
    await Network.requestWillBeSent((params) => {
        //console.log("### Network.requestWillBeSent");
        //console.dir(params);

package.json

{
    "name": "apify-project",
    "version": "0.0.1",
    "description": "",
    "author": "It's not you it's me",
    "license": "ISC",
    "dependencies": {
        "chrome-launcher": "latest",
        "chrome-remote-interface": "latest",
        "underscore": "latest",
        "apify": "^2.2.2",
        "type-check": "latest"
    },
    "scripts": {
        "start": "node main.js"
    }
}