Probe Page Resources
Pricing
Pay per usage
Go to Store
Probe Page Resources
Sequentially loads a list of URLs in headless Chrome and analyzes HTTP resources requested by each page. Source code at https://github.com/jancurn/act-probe-page-resources
0.0 (0)
Pricing
Pay per usage
3
Total users
34
Monthly users
1
Runs succeeded
97%
Last modified
2 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-puppeteer-chrome
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
main.js
1const chromeLauncher = require('chrome-launcher');2const CDP = require('chrome-remote-interface');3const _ = require('underscore');4const Apify = require('apify');5const typeCheck = require('type-check').typeCheck;6
7
8// Definition of the input9const INPUT_TYPE = `{10 urls: [String],11 waitSecs: Maybe Number,12 verboseLog: Maybe Boolean,13 headers: Maybe Object 14}`;15
16
17Apify.main(async () => {18 // Fetch and check the input19 const input = await Apify.getValue('INPUT');20 if (!typeCheck(INPUT_TYPE, input)) {21 console.log('Expected input:');22 console.log(INPUT_TYPE);23 console.log('Received input:');24 console.dir(input);25 throw new Error('Received invalid input');26 }27
28 // Launch Chrome29 const chrome = await launchChrome({30 headless: !!process.env.APIFY_HEADLESS,31 verboseLog: input.verboseLog32 });33 const client = await CDP({ port: chrome.port });34
35 let currentResult = null;36
37 // Extract domains38 const { Network, Page } = client;39
40 // Add HTTP headers41 if (input.headers) {42 await Network.setExtraHTTPHeaders({ headers: input.headers });43 if (input.headers['User-Agent']) await Network.setUserAgentOverride({ userAgent: input.headers['User-Agent'] });44 }45
46 // Setup event handlers47 await Network.requestWillBeSent((params) => {48 //console.log("### Network.requestWillBeSent");49 //console.dir(params);50
51 let req = currentResult.requests[params.requestId];52 if (!req) {53 req = currentResult.requests[params.requestId] = {};54 req.url = params.request.url;55 req.method = params.request.method;56 req.requestedAt = new Date(params.wallTime * 1000);57 } else {58 // On redirects, the Network.requestWillBeSent() is fired multiple times59 // with the same requestId and the subsequent requests contain the 'redirectResponse' field60 req.redirects = req.redirects || [];61 const redirect = _.pick(params.redirectResponse, 'url', 'status');62 redirect.location = params.redirectResponse && params.redirectResponse.headers ? params.redirectResponse.headers['location'] : null;63 req.redirects.push(redirect);64 }65 });66
67 await Network.responseReceived((params) => {68 //console.log("### Network.responseReceived");69 //console.dir(params);70
71 const req = currentResult.requests[params.requestId];72 req.status = params.response.status;73 req.mimeType = params.response.mimeType;74 req.type = params.type;75 });76
77 await Network.loadingFailed((params) => {78 //console.log("### Network.loadingFailed");79 //console.dir(params);80
81 // Note that request failures might come from the previous page82 const req = currentResult.requests[params.requestId];83 if (req) {84 req.type = params.type;85 req.errorText = params.errorText;86 req.canceled = params.canceled;87 }88 });89
90 // Enable events91 await Promise.all([Network.enable(), Page.enable()]);92
93 // Disable cache94 await Network.setCacheDisabled({ cacheDisabled: true });95
96 // Iterate and probe all URLs97 const results = [];98 for (let url of input.urls) {99 console.log(`Navigating to URL: ${url}`);100 currentResult = {101 url,102 requests: {}103 };104 results.push(currentResult);105
106 await Page.navigate({ url });107 await Page.loadEventFired();108
109 // Wait input.waitSecs seconds110 await new Promise((resolve) => setTimeout(resolve, input.waitSecs*1000 || 0));111 await Page.stopLoading();112 }113
114 // Save results115 await Apify.setValue('OUTPUT', results);116
117 // Only useful for local development118 await chrome.kill();119
120 console.log('Done');121});122
123
124// Code inspired by https://developers.google.com/web/updates/2017/04/headless-chrome125const launchChrome = async (options = {}) => {126 console.log('Launching Chrome...');127 const chrome = await chromeLauncher.launch({128 chromeFlags: [129 options.headless ? '--disable-gpu' : '',130 options.headless ? '--headless' : '',131 '--no-sandbox',132 ],133 logLevel: options.verboseLog ? 'verbose' : 'error',134 });135
136 const version = await CDP.Version({port: chrome.port});137 console.log(`Chrome launched (pid: ${chrome.pid}, port: ${chrome.port}, userAgent: ${version['User-Agent']})`);138
139 return chrome;140};
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "chrome-launcher": "latest", "chrome-remote-interface": "latest", "underscore": "latest", "apify": "^2.2.2", "type-check": "latest" }, "scripts": { "start": "node main.js" }}