Actor Costs avatar
Actor Costs
Try for free

No credit card required

View all Actors
Actor Costs

Actor Costs

lukaskrivka/actor-costs
Try for free

No credit card required

Get costs and usage stats for your actor use aggregated daily. The actor also provides summary stats for the whole period.

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32    && npm install --omit=dev --omit=optional \
33    && echo "Installed NPM packages:" \
34    && (npm list --omit=dev --all || true) \
35    && echo "Node.js version:" \
36    && node --version \
37    && echo "NPM version:" \
38    && npm --version \
39    && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "actor-costs",
4	"title": "Project Cheerio Crawler Typescript",
5	"description": "Crawlee and Cheerio project in typescript.",
6	"version": "0.0",
7	"meta": {
8		"templateId": "ts-crawlee-cheerio"
9	},
10	"input": "./input_schema.json",
11	"dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2    "title": "CheerioCrawler Template",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "actorIdOrName": {
7            "title": "Actor ID or full name",
8            "type": "string",
9            "description": "Actor ID or full name",
10            "editor": "textfield",
11            "prefill": "apify/web-scraper"
12        },
13        "onlyRunsNewerThan": {
14            "title": "Only runs newer than date",
15            "type": "string",
16            "description": "Measured by when the run was started. Use JSON input to specify date with a time in ISO format, e.g. \"2024-01-01T12:00:00\"",
17            "editor": "datepicker"
18        },
19        "onlyRunsOlderThan": {
20            "title": "Only runs older than date",
21            "type": "string",
22            "description": "Measured by when the run was started. Use JSON input to specify date with a time in ISO format, e.g. \"2024-01-01T12:00:00\"",
23            "editor": "datepicker"
24        },
25        "getCostBreakdown": {
26            "title": "Get cost breakdown by usage type (1000x slower!)",
27            "type": "boolean",
28            "description": "Very slow since we need to request each run separately",
29            "default": false
30        }
31    },
32    "required": ["actorIdOrName"]
33}

src/main.ts

1import { Actor, log } from 'apify';
2import { useState } from 'crawlee';
3import { processRuns } from './process-runs.js';
4
5interface Input {
6    actorIdOrName: string;
7    onlyRunsNewerThan?: string;
8    onlyRunsOlderThan?: string;
9    getCostBreakdown?: boolean;
10}
11
12interface DateAggregation {
13    date: string,
14    runCount: number,
15    cost: number,
16    costDetail: Record<string, number>,
17    firstRunDate: string,
18    lastRunDate: string,
19    buildNumbers: Record<string, number>,
20    statuses: Record<string, number>,
21    origins: Record<string, number>,
22}
23
24type DateAggregations = Record<string, DateAggregation>;
25
26// { date: stats }
27export interface State {
28    dateAggregations: DateAggregations;
29    lastProcessedRunId: string | null;
30    lastProcessedOffset: number;
31}
32
33// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
34await Actor.init();
35
36const { actorIdOrName, onlyRunsNewerThan, onlyRunsOlderThan, getCostBreakdown = false } = (await Actor.getInput<Input>())!;
37
38let onlyRunsNewerThanDate;
39
40if (onlyRunsNewerThan) {
41    onlyRunsNewerThanDate = new Date(onlyRunsNewerThan);
42    if (Number.isNaN(onlyRunsNewerThanDate.getTime())) {
43        throw Actor.fail('Invalid date format for onlyRunsNewerThan, use YYYY-MM-DD or with time YYYY-MM-DDTHH:mm:ss');
44    }
45}
46
47let onlyRunsOlderThanDate;
48
49if (onlyRunsOlderThan) {
50    onlyRunsOlderThanDate = new Date(onlyRunsOlderThan);
51    if (Number.isNaN(onlyRunsOlderThanDate.getTime())) {
52        throw Actor.fail('Invalid date format for onlyRunsOlderThan, use YYYY-MM-DD or with time YYYY-MM-DDTHH:mm:ss');
53    }
54}
55
56const runsClient = Actor.apifyClient.actor(actorIdOrName).runs();
57
58const state = await useState<State>(
59    'STATE',
60    { lastProcessedOffset: 0, lastProcessedRunId: null, dateAggregations: {} },
61);
62
63const LIMIT = 1000;
64let offset = state.lastProcessedOffset;
65for (; ;) {
66    const runs = await runsClient.list({ desc: true, limit: 1000, offset }).then((res) => res.items);
67
68    log.info(`Loaded ${runs.length} runs (offset from now: ${offset}), newest: ${runs[0]?.startedAt}, `
69        + `oldest: ${runs[runs.length - 1]?.startedAt} processing them now`);
70
71    const { stopLoop } = await processRuns({
72        runs,
73        state,
74        onlyRunsOlderThanDate,
75        onlyRunsNewerThanDate,
76        getCostBreakdown,
77    });
78
79    state.lastProcessedOffset = offset;
80
81    if (stopLoop) {
82        log.warning(`Reached onlyRunsNewerThanDate ${onlyRunsNewerThanDate}, stopping loading runs`);
83        break;
84    }
85
86    if (runs.length < LIMIT) {
87        log.warning('No more runs to process, stopping loading runs');
88        break;
89    }
90
91    offset += LIMIT;
92}
93
94const totalStats: Omit<DateAggregation, 'date'> = {
95    runCount: 0,
96    cost: 0,
97    costDetail: {},
98    firstRunDate: '',
99    lastRunDate: '',
100    buildNumbers: {},
101    statuses: {},
102    origins: {},
103};
104
105await Actor.pushData(Object.values(state.dateAggregations)
106    .map((aggregation: DateAggregation) => {
107        totalStats.runCount += aggregation.runCount;
108        totalStats.cost += aggregation.cost;
109        if (!totalStats.lastRunDate) {
110            totalStats.lastRunDate = aggregation.lastRunDate;
111        }
112        totalStats.firstRunDate = aggregation.firstRunDate;
113        for (const [buildNumber, count] of Object.entries(aggregation.buildNumbers)) {
114            totalStats.buildNumbers[buildNumber] = (totalStats.buildNumbers[buildNumber] ?? 0) + count;
115        }
116        for (const [status, count] of Object.entries(aggregation.statuses)) {
117            totalStats.statuses[status] = (totalStats.statuses[status] ?? 0) + count;
118        }
119        for (const [origin, count] of Object.entries(aggregation.origins)) {
120            totalStats.origins[origin] = (totalStats.origins[origin] ?? 0) + count;
121        }
122
123        const cleanedCostDetail: Record<string, number> = {};
124
125        for (const [usageType, usageUsd] of Object.entries(aggregation.costDetail)) {
126            cleanedCostDetail[usageType] = Number(usageUsd.toFixed(4));
127            totalStats.costDetail[usageType] ??= 0
128            totalStats.costDetail[usageType] += Number(usageUsd.toFixed(4))
129        }
130
131        return { ...aggregation, cost: Number(aggregation.cost.toFixed(4)), costDetail: cleanedCostDetail };
132    }));
133
134await Actor.setValue('STATE', state);
135await Actor.setValue('TOTAL_STATS', totalStats);
136
137const store = await Actor.openKeyValueStore();
138const url = store.getPublicUrl('TOTAL_STATS');
139await Actor.exit(`Total stats for whole period are available at ${url}`);

src/process-runs.ts

1import { Actor, log } from 'apify';
2
3import type { ActorRunListItem, ActorRun } from 'apify-client';
4import { sleep } from 'crawlee';
5import type { State } from './main.js';
6
7interface ProcessRunsInputs {
8    runs: ActorRunListItem[];
9    state: State;
10    onlyRunsOlderThanDate?: Date;
11    onlyRunsNewerThanDate?: Date;
12    getCostBreakdown: boolean;
13}
14
15let isMigrating = false;
16Actor.on('migrating', () => {
17    isMigrating = true;
18});
19
20let foundLastProcessedRun = false;
21
22export const processRuns = async ({ runs, state, onlyRunsOlderThanDate, onlyRunsNewerThanDate, getCostBreakdown }: ProcessRunsInputs): Promise<{ stopLoop: boolean }> => {
23    // Runs are in decs mode
24    for (let run of runs) {
25        if (getCostBreakdown) {
26            run = (await Actor.apifyClient.run(run.id).get())! as ActorRun
27        }
28
29        if (isMigrating) {
30            log.warning('Actor is migrating, pausing all processing and storing last state to continue where we left of');
31            state.lastProcessedRunId = run.id;
32            await sleep(999999);
33        }
34
35        // If we load after migration, we need to find run we already processed
36        if (state.lastProcessedRunId && !foundLastProcessedRun) {
37            const isLastProcessed = state.lastProcessedRunId === run.id;
38            if (isLastProcessed) {
39                foundLastProcessedRun = true;
40                state.lastProcessedRunId = null;
41            } else {
42                log.warning(`Skipping run we already processed before migration ${run.id}`);
43                continue;
44            }
45        }
46
47        if (onlyRunsOlderThanDate && run.startedAt > onlyRunsOlderThanDate) {
48            continue;
49        }
50        if (onlyRunsNewerThanDate && run.startedAt < onlyRunsNewerThanDate) {
51            // We are going from present to past so at this point we can exit
52            return { stopLoop: true };
53        }
54
55        const runDate = run.startedAt.toISOString().split('T')[0];
56        state.dateAggregations[runDate] ??= {
57            date: runDate,
58            runCount: 0,
59            cost: 0,
60            costDetail: {},
61            firstRunDate: run.startedAt.toISOString(),
62            lastRunDate: run.startedAt.toISOString(),
63            buildNumbers: {},
64            statuses: {},
65            origins: {},
66        };
67
68        state.dateAggregations[runDate].runCount++;
69        state.dateAggregations[runDate].cost += run.usageTotalUsd ?? 0;
70
71
72        if ((run as ActorRun).usageUsd) {
73            for (const [usageType, usageUsd] of Object.entries((run as ActorRun).usageUsd as Record<string, number>)) {
74                state.dateAggregations[runDate].costDetail[usageType] ??= 0;
75                state.dateAggregations[runDate].costDetail[usageType] += usageUsd;
76            }
77        }
78
79        // lastRunDate is always the first we encounter because we go desc so we don't have to update it
80        state.dateAggregations[runDate].firstRunDate = run.startedAt.toISOString();
81
82        state.dateAggregations[runDate].buildNumbers[run.buildNumber] ??= 0;
83        state.dateAggregations[runDate].buildNumbers[run.buildNumber]++;
84
85        state.dateAggregations[runDate].statuses[run.status] ??= 0;
86        state.dateAggregations[runDate].statuses[run.status]++;
87
88        state.dateAggregations[runDate].origins[run.meta.origin] ??= 0;
89        state.dateAggregations[runDate].origins[run.meta.origin]++;
90    }
91
92    return { stopLoop: false };
93};

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ]
20}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv

package.json

1{
2	"name": "actor-costs",
3	"version": "0.0.1",
4	"type": "module",
5	"description": "This is a boilerplate of an Apify actor.",
6	"engines": {
7		"node": ">=18.0.0"
8	},
9	"dependencies": {
10		"apify": "^3.1.10",
11		"crawlee": "^3.5.4"
12	},
13	"devDependencies": {
14		"@apify/eslint-config-ts": "^0.3.0",
15		"@apify/tsconfig": "^0.1.0",
16		"@typescript-eslint/eslint-plugin": "^6.7.2",
17		"@typescript-eslint/parser": "^6.7.2",
18		"eslint": "^8.50.0",
19		"tsx": "^4.6.2",
20		"typescript": "^5.5"
21	},
22	"scripts": {
23		"start": "npm run start:dev",
24		"start:prod": "node dist/main.js",
25		"start:dev": "tsx src/main.ts",
26		"build": "tsc",
27		"lint": "eslint ./src --ext .ts",
28		"lint:fix": "eslint ./src --ext .ts --fix",
29		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
30	},
31	"author": "It's not you it's me",
32	"license": "ISC"
33}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "module": "NodeNext",
5        "moduleResolution": "NodeNext",
6        "target": "ES2022",
7        "outDir": "dist",
8        "noUnusedLocals": false,
9        "skipLibCheck": true,
10        "lib": ["DOM"]
11    },
12    "include": [
13        "./src/**/*"
14    ]
15}
Developer
Maintained by Community
Actor metrics
  • 2 monthly users
  • 3 stars
  • 84.6% runs succeeded
  • Created in May 2024
  • Modified 29 days ago