Measure Downloaded Bytes avatar
Measure Downloaded Bytes
Deprecated

Pricing

Pay per usage

Go to Store
Measure Downloaded Bytes

Measure Downloaded Bytes

Deprecated
jaroslavhejlek/measure-downloaded-bytes

Developed by

Jaroslav Hejlek

Maintained by Community

Example of how to measure downloaded data from network requests made by a webpage.

0.0 (0)

Pricing

Pay per usage

4

Monthly users

1

Last modified

2 years ago

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-puppeteer-chrome:16
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY --chown=myuser package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14    && npm install --omit=dev --omit=optional \
15    && echo "Installed NPM packages:" \
16    && (npm list --omit=dev --all || true) \
17    && echo "Node.js version:" \
18    && node --version \
19    && echo "NPM version:" \
20    && npm --version \
21    && rm -r ~/.npm
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after NPM install, quick build will be really fast
25# for most source file changes.
26COPY --chown=myuser . ./
27
28
29# Run the image. If you know you won't need headful browsers,
30# you can remove the XVFB start script for a micro perf gain.
31CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "measure-downloaded-bytes",
4    "title": "Measure downloaded bytes",
5    "description": "Example of how to measure downloaded data from network requests made by a webpage.",
6    "version": "0.0",
7    "input": "./input-schema.json",
8    "dockefile": "./Dockerfile",
9    "storages": {
10        "dataset": {
11            "actorSpecification": 1,
12            "views": {
13                "all": {
14                    "title": "Requested sources",
15                    "transformation": {
16                        "fields": [
17                            "url",
18                            "type",
19                            "size"
20                        ]
21                    },
22                    "display": {
23                        "component": "table",
24                        "properties": {
25                            "url": {
26                                "label": "URL",
27                                "format": "text"
28                            },
29                            "type": {
30                                "label": "Type",
31                                "format": "text"
32                            },
33                            "size": {
34                                "label": "Size in bytes",
35                                "format": "number"
36                            }
37                        }
38                    }
39                }
40            }
41        }
42    }
43}

.actor/input-schema.json

1{
2  "title": "Measure downloaded bytes",
3  "type": "object",
4  "schemaVersion": 1,
5  "properties": {
6    "url": {
7      "title": "Url",
8      "type": "string",
9      "description": "URL to measure.",
10      "prefill": "https://apify.com",
11      "editor": "textfield"
12    },
13    "abortRequests": {
14      "title": "Abort requests",
15      "type": "boolean",
16      "description": "Setting this to true will abort requests to some ad related sources",
17      "editor": "checkbox"
18    }
19  },
20  "required": [
21    "url"
22  ]
23}

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "extends": "@apify",
3    "root": true
4}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5node_modules
6storage

main.js

1import { launchPuppeteer } from '@crawlee/puppeteer';
2import { Actor } from 'apify';
3import prettyBytes from 'pretty-bytes';
4
5async function saveScreen(page, key = 'debug-screen.png') {
6    const screenshotBuffer = await page.screenshot({ fullPage: true });
7    await Actor.setValue(key, screenshotBuffer, { contentType: 'image/png' });
8};
9
10async function main() {
11    try {
12        const input = await Actor.getInput();
13        const browser = await launchPuppeteer();
14        const page = await browser.newPage();
15        if (input.abortRequests) {
16            await page.setRequestInterception(true);
17            page.on('request', (request) => {
18                const url = request.url();
19                const filters = [
20                    'livefyre',
21                    'moatad',
22                    'analytics',
23                    'controltag',
24                    'chartbeat',
25                ];
26                const shouldAbort = filters.some((urlPart) => url.includes(urlPart));
27                if (shouldAbort) request.abort();
28                else request.continue();
29            });
30        }
31        const responses = [];
32        page.on('response', async(response) => {
33            const url = response.url();
34            let size = 0;
35            try {
36                const buffer = await response.buffer();
37                size = buffer.byteLength;
38            } catch (e) {
39                // Ignore error here, response can be empty
40            }
41            responses.push({
42                url: url,
43                type: response.request().resourceType(),
44                size,
45            });
46        });
47        const startedAt = Date.now();
48        console.log('Opening page', input.url);
49        await page.goto(input.url, { timeout: 5 * 60 * 1000, waitUntil: 'networkidle2' });
50        await saveScreen(page);
51
52        let totalSize = 0;
53        const byResourceType = {};
54        responses.forEach(({ type, size }) => {
55            if (!byResourceType[type]) byResourceType[type] = { count: 0, size: 0 };
56            byResourceType[type].count++;
57            byResourceType[type].size += size;
58            totalSize += size;
59        });
60        
61        await Actor.pushData(responses);
62
63        console.log('Page finished loading after', Date.now() - startedAt, 'ms');
64        console.log(`Responses: ${responses.length} (${prettyBytes(totalSize)})`);
65        console.log('----------------');
66        console.log('By resource type');
67        Object.keys(byResourceType).forEach(type => {
68            const data = byResourceType[type];
69            console.log(`${type}: ${data.count} (${prettyBytes(data.size)})`);
70        });
71
72        await page.close();
73        await browser.close();
74    } catch (error) {
75        console.error(error.message);
76    }
77}
78
79await Actor.init();
80
81await main();
82
83// Exit successfully
84await Actor.exit();

package.json

1{
2    "name": "measure-downloaded-bytes-actor",
3    "version": "0.0.2",
4    "description": "",
5    "author": "Jaroslav Hejlek",
6    "license": "ISC",
7    "type": "module",
8    "engines": {
9        "node": ">=16.0.0"
10    },
11    "dependencies": {
12        "apify": "^3.1.0",
13        "crawlee": "^3.1.1",
14        "pretty-bytes": "^6.0.0",
15        "puppeteer": "^19.2.2"
16    },
17    "devDependencies": {
18        "@apify/eslint-config": "^0.3.1",
19        "eslint": "^8.20.0"
20    },
21    "scripts": {
22        "start": "node main.js",
23        "lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
24        "lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
25        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
26    }
27}

Pricing

Pricing model

Pay per usage

This Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.