
Measure Downloaded Bytes
Deprecated
Pricing
Pay per usage
Go to Store

Measure Downloaded Bytes
Deprecated
Example of how to measure downloaded data from network requests made by a webpage.
0.0 (0)
Pricing
Pay per usage
4
Total users
10
Monthly users
1
Last modified
2 years ago
.actor/Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://crawlee.dev/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node-puppeteer-chrome:16
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image. If you know you won't need headful browsers,# you can remove the XVFB start script for a micro perf gain.CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
.actor/actor.json
{ "actorSpecification": 1, "name": "measure-downloaded-bytes", "title": "Measure downloaded bytes", "description": "Example of how to measure downloaded data from network requests made by a webpage.", "version": "0.0", "input": "./input-schema.json", "dockefile": "./Dockerfile", "storages": { "dataset": { "actorSpecification": 1, "views": { "all": { "title": "Requested sources", "transformation": { "fields": [ "url", "type", "size" ] }, "display": { "component": "table", "properties": { "url": { "label": "URL", "format": "text" }, "type": { "label": "Type", "format": "text" }, "size": { "label": "Size in bytes", "format": "number" } } } } } } }}
.actor/input-schema.json
{ "title": "Measure downloaded bytes", "type": "object", "schemaVersion": 1, "properties": { "url": { "title": "Url", "type": "string", "description": "URL to measure.", "prefill": "https://apify.com", "editor": "textfield" }, "abortRequests": { "title": "Abort requests", "type": "boolean", "description": "Setting this to true will abort requests to some ad related sources", "editor": "checkbox" } }, "required": [ "url" ]}
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify", "root": true}
.gitignore
# This file tells Git which files shouldn't be added to source control
.DS_Store.ideanode_modulesstorage
main.js
1import { launchPuppeteer } from '@crawlee/puppeteer';2import { Actor } from 'apify';3import prettyBytes from 'pretty-bytes';4
5async function saveScreen(page, key = 'debug-screen.png') {6 const screenshotBuffer = await page.screenshot({ fullPage: true });7 await Actor.setValue(key, screenshotBuffer, { contentType: 'image/png' });8};9
10async function main() {11 try {12 const input = await Actor.getInput();13 const browser = await launchPuppeteer();14 const page = await browser.newPage();15 if (input.abortRequests) {16 await page.setRequestInterception(true);17 page.on('request', (request) => {18 const url = request.url();19 const filters = [20 'livefyre',21 'moatad',22 'analytics',23 'controltag',24 'chartbeat',25 ];26 const shouldAbort = filters.some((urlPart) => url.includes(urlPart));27 if (shouldAbort) request.abort();28 else request.continue();29 });30 }31 const responses = [];32 page.on('response', async(response) => {33 const url = response.url();34 let size = 0;35 try {36 const buffer = await response.buffer();37 size = buffer.byteLength;38 } catch (e) {39 // Ignore error here, response can be empty40 }41 responses.push({42 url: url,43 type: response.request().resourceType(),44 size,45 });46 });47 const startedAt = Date.now();48 console.log('Opening page', input.url);49 await page.goto(input.url, { timeout: 5 * 60 * 1000, waitUntil: 'networkidle2' });50 await saveScreen(page);51
52 let totalSize = 0;53 const byResourceType = {};54 responses.forEach(({ type, size }) => {55 if (!byResourceType[type]) byResourceType[type] = { count: 0, size: 0 };56 byResourceType[type].count++;57 byResourceType[type].size += size;58 totalSize += size;59 });60 61 await Actor.pushData(responses);62
63 console.log('Page finished loading after', Date.now() - startedAt, 'ms');64 console.log(`Responses: ${responses.length} (${prettyBytes(totalSize)})`);65 console.log('----------------');66 console.log('By resource type');67 Object.keys(byResourceType).forEach(type => {68 const data = byResourceType[type];69 console.log(`${type}: ${data.count} (${prettyBytes(data.size)})`);70 });71
72 await page.close();73 await browser.close();74 } catch (error) {75 console.error(error.message);76 }77}78
79await Actor.init();80
81await main();82
83// Exit successfully84await Actor.exit();
package.json
{ "name": "measure-downloaded-bytes-actor", "version": "0.0.2", "description": "", "author": "Jaroslav Hejlek", "license": "ISC", "type": "module", "engines": { "node": ">=16.0.0" }, "dependencies": { "apify": "^3.1.0", "crawlee": "^3.1.1", "pretty-bytes": "^6.0.0", "puppeteer": "^19.2.2" }, "devDependencies": { "@apify/eslint-config": "^0.3.1", "eslint": "^8.20.0" }, "scripts": { "start": "node main.js", "lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx", "lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }}