Measure Downloaded Bytes
View all Actors
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsMeasure Downloaded Bytes
jaroslavhejlek/measure-downloaded-bytes
Example of how to measure downloaded data from network requests made by a webpage.
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-puppeteer-chrome:16
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --omit=dev --omit=optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --omit=dev --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version \
21 && rm -r ~/.npm
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after NPM install, quick build will be really fast
25# for most source file changes.
26COPY . ./
27
28
29# Run the image. If you know you won't need headful browsers,
30# you can remove the XVFB start script for a micro perf gain.
31CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "measure-downloaded-bytes",
4 "title": "Measure downloaded bytes",
5 "description": "Example of how to measure downloaded data from network requests made by a webpage.",
6 "version": "0.0",
7 "input": "./input-schema.json",
8 "dockefile": "./Dockerfile",
9 "storages": {
10 "dataset": {
11 "actorSpecification": 1,
12 "views": {
13 "all": {
14 "title": "Requested sources",
15 "transformation": {
16 "fields": [
17 "url",
18 "type",
19 "size"
20 ]
21 },
22 "display": {
23 "component": "table",
24 "properties": {
25 "url": {
26 "label": "URL",
27 "format": "text"
28 },
29 "type": {
30 "label": "Type",
31 "format": "text"
32 },
33 "size": {
34 "label": "Size in bytes",
35 "format": "number"
36 }
37 }
38 }
39 }
40 }
41 }
42 }
43}
.actor/input-schema.json
1{
2 "title": "Measure downloaded bytes",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "url": {
7 "title": "Url",
8 "type": "string",
9 "description": "URL to measure.",
10 "prefill": "https://apify.com",
11 "editor": "textfield"
12 },
13 "abortRequests": {
14 "title": "Abort requests",
15 "type": "boolean",
16 "description": "Setting this to true will abort requests to some ad related sources",
17 "editor": "checkbox"
18 }
19 },
20 "required": [
21 "url"
22 ]
23}
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "extends": "@apify",
3 "root": true
4}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5node_modules
6storage
main.js
1import { launchPuppeteer } from '@crawlee/puppeteer';
2import { Actor } from 'apify';
3import prettyBytes from 'pretty-bytes';
4
5async function saveScreen(page, key = 'debug-screen.png') {
6 const screenshotBuffer = await page.screenshot({ fullPage: true });
7 await Actor.setValue(key, screenshotBuffer, { contentType: 'image/png' });
8};
9
10async function main() {
11 try {
12 const input = await Actor.getInput();
13 const browser = await launchPuppeteer();
14 const page = await browser.newPage();
15 if (input.abortRequests) {
16 await page.setRequestInterception(true);
17 page.on('request', (request) => {
18 const url = request.url();
19 const filters = [
20 'livefyre',
21 'moatad',
22 'analytics',
23 'controltag',
24 'chartbeat',
25 ];
26 const shouldAbort = filters.some((urlPart) => url.includes(urlPart));
27 if (shouldAbort) request.abort();
28 else request.continue();
29 });
30 }
31 const responses = [];
32 page.on('response', async(response) => {
33 const url = response.url();
34 let size = 0;
35 try {
36 const buffer = await response.buffer();
37 size = buffer.byteLength;
38 } catch (e) {
39 // Ignore error here, response can be empty
40 }
41 responses.push({
42 url: url,
43 type: response.request().resourceType(),
44 size,
45 });
46 });
47 const startedAt = Date.now();
48 console.log('Opening page', input.url);
49 await page.goto(input.url, { timeout: 5 * 60 * 1000, waitUntil: 'networkidle2' });
50 await saveScreen(page);
51
52 let totalSize = 0;
53 const byResourceType = {};
54 responses.forEach(({ type, size }) => {
55 if (!byResourceType[type]) byResourceType[type] = { count: 0, size: 0 };
56 byResourceType[type].count++;
57 byResourceType[type].size += size;
58 totalSize += size;
59 });
60
61 await Actor.pushData(responses);
62
63 console.log('Page finished loading after', Date.now() - startedAt, 'ms');
64 console.log(`Responses: ${responses.length} (${prettyBytes(totalSize)})`);
65 console.log('----------------');
66 console.log('By resource type');
67 Object.keys(byResourceType).forEach(type => {
68 const data = byResourceType[type];
69 console.log(`${type}: ${data.count} (${prettyBytes(data.size)})`);
70 });
71
72 await page.close();
73 await browser.close();
74 } catch (error) {
75 console.error(error.message);
76 }
77}
78
79await Actor.init();
80
81await main();
82
83// Exit successfully
84await Actor.exit();
package.json
1{
2 "name": "measure-downloaded-bytes-actor",
3 "version": "0.0.2",
4 "description": "",
5 "author": "Jaroslav Hejlek",
6 "license": "ISC",
7 "type": "module",
8 "engines": {
9 "node": ">=16.0.0"
10 },
11 "dependencies": {
12 "apify": "^3.1.0",
13 "crawlee": "^3.1.1",
14 "pretty-bytes": "^6.0.0",
15 "puppeteer": "^19.2.2"
16 },
17 "devDependencies": {
18 "@apify/eslint-config": "^0.3.1",
19 "eslint": "^8.20.0"
20 },
21 "scripts": {
22 "start": "node main.js",
23 "lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
24 "lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
25 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
26 }
27}
Developer
Maintained by Community
Categories