Actor picture

Probe Page Resources

jancurn/probe-page-resources

Sequentially loads a list of URLs in headless Chrome and analyzes HTTP resources requested by each page. Source code at https://github.com/jancurn/act-probe-page-resources

Author's avatarJan 膶urn
  • Modified
  • Users19
  • Runs508
Actor picture

Probe Page Resources

Based on the apify/actor-node-chrome:v0.21.10 Docker image (see docs).

This file is 139 lines long. Only the first 50 are shown. Show all

const chromeLauncher = require('chrome-launcher');
const CDP = require('chrome-remote-interface');
const _ = require('underscore');
const Apify = require('apify');
const typeCheck = require('type-check').typeCheck;


// Definition of the input
const INPUT_TYPE = `{
    urls: [String],
    waitSecs: Maybe Number,
    verboseLog: Maybe Boolean,
    headers: Maybe Object     
}`;


Apify.main(async () => {
    // Fetch and check the input
    const input = await Apify.getValue('INPUT');
    if (!typeCheck(INPUT_TYPE, input)) {
        console.log('Expected input:');
        console.log(INPUT_TYPE);
        console.log('Received input:');
        console.dir(input);
        throw new Error('Received invalid input');
    }

    // Launch Chrome
    const chrome = await launchChrome({
        headless: !!process.env.APIFY_HEADLESS,
        verboseLog: input.verboseLog
    });
    const client = await CDP({ port: chrome.port });

    let currentResult = null;

    // Extract domains
    const { Network, Page } = client;

    // Add HTTP headers
    if (input.headers) {
        await Network.setExtraHTTPHeaders({ headers: input.headers });
        if (input.headers['User-Agent']) await Network.setUserAgentOverride({ userAgent: input.headers['User-Agent'] });
    }

    // Setup event handlers
    await Network.requestWillBeSent((params) => {
        //console.log("### Network.requestWillBeSent");
        //console.dir(params);