Actor Costs
Try for free
No credit card required
Go to Store
Actor Costs
lukaskrivka/actor-costs
Try for free
No credit card required
Get costs and usage stats for your actor use aggregated daily. The actor also provides summary stats for the whole period.
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32 && npm install --omit=dev --omit=optional \
33 && echo "Installed NPM packages:" \
34 && (npm list --omit=dev --all || true) \
35 && echo "Node.js version:" \
36 && node --version \
37 && echo "NPM version:" \
38 && npm --version \
39 && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "actor-costs",
4 "title": "Project Cheerio Crawler Typescript",
5 "description": "Crawlee and Cheerio project in typescript.",
6 "version": "0.0",
7 "meta": {
8 "templateId": "ts-crawlee-cheerio"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile"
12}
.actor/input_schema.json
1{
2 "title": "CheerioCrawler Template",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "actorIdOrName": {
7 "title": "Actor ID or full name",
8 "type": "string",
9 "description": "Actor ID or full name",
10 "editor": "textfield",
11 "prefill": "apify/web-scraper"
12 },
13 "onlyRunsNewerThan": {
14 "title": "Only runs newer than date",
15 "type": "string",
16 "description": "Measured by when the run was started. Use JSON input to specify date with a time in ISO format, e.g. \"2024-01-01T12:00:00\"",
17 "editor": "datepicker"
18 },
19 "onlyRunsOlderThan": {
20 "title": "Only runs older than date",
21 "type": "string",
22 "description": "Measured by when the run was started. Use JSON input to specify date with a time in ISO format, e.g. \"2024-01-01T12:00:00\"",
23 "editor": "datepicker"
24 },
25 "getCostBreakdown": {
26 "title": "Get cost breakdown by usage type (1000x slower!)",
27 "type": "boolean",
28 "description": "Very slow since we need to request each run separately",
29 "default": false
30 },
31 "getDatasetItemCount": {
32 "title": "Get dataset item count (1000x slower!)",
33 "type": "boolean",
34 "description": "Very slow since we need to request each run separately",
35 "default": false
36 }
37 },
38 "required": ["actorIdOrName"]
39}
src/main.ts
1import { Actor, log } from 'apify';
2import { useState } from 'crawlee';
3import { processRuns } from './process-runs.js';
4
5interface Input {
6 actorIdOrName: string;
7 onlyRunsNewerThan?: string;
8 onlyRunsOlderThan?: string;
9 getCostBreakdown?: boolean;
10 getDatasetItemCount?: boolean;
11}
12
13interface DateAggregation {
14 date: string,
15 runCount: number,
16 cost: number,
17 // Only when requested in input
18 datasetItems?: number,
19 costDetail: Record<string, number>,
20 firstRunDate: string,
21 lastRunDate: string,
22 buildNumbers: Record<string, number>,
23 statuses: Record<string, number>,
24 origins: Record<string, number>,
25}
26
27type DateAggregations = Record<string, DateAggregation>;
28
29// { date: stats }
30export interface State {
31 dateAggregations: DateAggregations;
32 lastProcessedRunId: string | null;
33 lastProcessedOffset: number;
34}
35
36// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
37await Actor.init();
38
39const {
40 actorIdOrName,
41 onlyRunsNewerThan,
42 onlyRunsOlderThan,
43 getCostBreakdown = false,
44 getDatasetItemCount = false
45} = (await Actor.getInput<Input>())!;
46
47let onlyRunsNewerThanDate;
48
49if (onlyRunsNewerThan) {
50 onlyRunsNewerThanDate = new Date(onlyRunsNewerThan);
51 if (Number.isNaN(onlyRunsNewerThanDate.getTime())) {
52 throw Actor.fail('Invalid date format for onlyRunsNewerThan, use YYYY-MM-DD or with time YYYY-MM-DDTHH:mm:ss');
53 }
54}
55
56let onlyRunsOlderThanDate;
57
58if (onlyRunsOlderThan) {
59 onlyRunsOlderThanDate = new Date(onlyRunsOlderThan);
60 if (Number.isNaN(onlyRunsOlderThanDate.getTime())) {
61 throw Actor.fail('Invalid date format for onlyRunsOlderThan, use YYYY-MM-DD or with time YYYY-MM-DDTHH:mm:ss');
62 }
63}
64
65const runsClient = Actor.apifyClient.actor(actorIdOrName).runs();
66
67const state = await useState<State>(
68 'STATE',
69 { lastProcessedOffset: 0, lastProcessedRunId: null, dateAggregations: {} },
70);
71
72const LIMIT = 1000;
73let offset = state.lastProcessedOffset;
74for (; ;) {
75 const runs = await runsClient.list({ desc: true, limit: 1000, offset }).then((res) => res.items);
76
77 log.info(`Loaded ${runs.length} runs (offset from now: ${offset}), newest: ${runs[0]?.startedAt}, `
78 + `oldest: ${runs[runs.length - 1]?.startedAt} processing them now`);
79
80 const { stopLoop } = await processRuns({
81 runs,
82 state,
83 onlyRunsOlderThanDate,
84 onlyRunsNewerThanDate,
85 getCostBreakdown,
86 getDatasetItemCount,
87 });
88
89 state.lastProcessedOffset = offset;
90
91 if (stopLoop) {
92 log.warning(`Reached onlyRunsNewerThanDate ${onlyRunsNewerThanDate}, stopping loading runs`);
93 break;
94 }
95
96 if (runs.length < LIMIT) {
97 log.warning('No more runs to process, stopping loading runs');
98 break;
99 }
100
101 offset += LIMIT;
102}
103
104const totalStats: Omit<DateAggregation, 'date'> = {
105 runCount: 0,
106 cost: 0,
107 costDetail: {},
108 firstRunDate: '',
109 lastRunDate: '',
110 buildNumbers: {},
111 statuses: {},
112 origins: {},
113};
114
115await Actor.pushData(Object.values(state.dateAggregations)
116 .map((aggregation: DateAggregation) => {
117 totalStats.runCount += aggregation.runCount;
118 totalStats.cost += aggregation.cost;
119 if (aggregation.datasetItems) {
120 if (!totalStats.datasetItems) {
121 totalStats.datasetItems = 0;
122 }
123 totalStats.datasetItems += aggregation.datasetItems;
124 }
125 if (!totalStats.lastRunDate) {
126 totalStats.lastRunDate = aggregation.lastRunDate;
127 }
128 totalStats.firstRunDate = aggregation.firstRunDate;
129 for (const [buildNumber, count] of Object.entries(aggregation.buildNumbers)) {
130 totalStats.buildNumbers[buildNumber] = (totalStats.buildNumbers[buildNumber] ?? 0) + count;
131 }
132 for (const [status, count] of Object.entries(aggregation.statuses)) {
133 totalStats.statuses[status] = (totalStats.statuses[status] ?? 0) + count;
134 }
135 for (const [origin, count] of Object.entries(aggregation.origins)) {
136 totalStats.origins[origin] = (totalStats.origins[origin] ?? 0) + count;
137 }
138
139 const cleanedCostDetail: Record<string, number> = {};
140
141 for (const [usageType, usageUsd] of Object.entries(aggregation.costDetail)) {
142 cleanedCostDetail[usageType] = Number(usageUsd.toFixed(4));
143 totalStats.costDetail[usageType] ??= 0
144 totalStats.costDetail[usageType] += Number(usageUsd.toFixed(4))
145 }
146
147 return { ...aggregation, cost: Number(aggregation.cost.toFixed(4)), costDetail: cleanedCostDetail };
148 }));
149
150await Actor.setValue('STATE', state);
151await Actor.setValue('TOTAL_STATS', totalStats);
152
153const store = await Actor.openKeyValueStore();
154const url = store.getPublicUrl('TOTAL_STATS');
155await Actor.exit(`Total stats for whole period are available at ${url}`);
src/process-runs.ts
1import { Actor, log } from 'apify';
2
3import type { ActorRunListItem, ActorRun } from 'apify-client';
4import { sleep } from 'crawlee';
5import type { State } from './main.js';
6
7interface ProcessRunsInputs {
8 runs: ActorRunListItem[];
9 state: State;
10 onlyRunsOlderThanDate?: Date;
11 onlyRunsNewerThanDate?: Date;
12 getCostBreakdown: boolean;
13 getDatasetItemCount: boolean;
14}
15
16let isMigrating = false;
17Actor.on('migrating', () => {
18 isMigrating = true;
19});
20
21let foundLastProcessedRun = false;
22
23export const processRuns = async ({ runs, state, onlyRunsOlderThanDate, onlyRunsNewerThanDate, getCostBreakdown, getDatasetItemCount }: ProcessRunsInputs): Promise<{ stopLoop: boolean }> => {
24 // Runs are in decs mode
25 for (let run of runs) {
26 if (getCostBreakdown) {
27 run = (await Actor.apifyClient.run(run.id).get())! as ActorRun
28 }
29
30 let cleanItemCount = null;
31 if (getDatasetItemCount) {
32 cleanItemCount = await Actor.apifyClient.dataset(run.defaultDatasetId).get().then((res) => res!.cleanItemCount);
33 }
34
35 if (isMigrating) {
36 log.warning('Actor is migrating, pausing all processing and storing last state to continue where we left of');
37 state.lastProcessedRunId = run.id;
38 await sleep(999999);
39 }
40
41 // If we load after migration, we need to find run we already processed
42 if (state.lastProcessedRunId && !foundLastProcessedRun) {
43 const isLastProcessed = state.lastProcessedRunId === run.id;
44 if (isLastProcessed) {
45 foundLastProcessedRun = true;
46 state.lastProcessedRunId = null;
47 } else {
48 log.warning(`Skipping run we already processed before migration ${run.id}`);
49 continue;
50 }
51 }
52
53 if (onlyRunsOlderThanDate && run.startedAt > onlyRunsOlderThanDate) {
54 continue;
55 }
56 if (onlyRunsNewerThanDate && run.startedAt < onlyRunsNewerThanDate) {
57 // We are going from present to past so at this point we can exit
58 return { stopLoop: true };
59 }
60
61 const runDate = run.startedAt.toISOString().split('T')[0];
62 state.dateAggregations[runDate] ??= {
63 date: runDate,
64 runCount: 0,
65 cost: 0,
66 costDetail: {},
67 firstRunDate: run.startedAt.toISOString(),
68 lastRunDate: run.startedAt.toISOString(),
69 buildNumbers: {},
70 statuses: {},
71 origins: {},
72 };
73
74 state.dateAggregations[runDate].runCount++;
75 state.dateAggregations[runDate].cost += run.usageTotalUsd ?? 0;
76
77
78 if ((run as ActorRun).usageUsd) {
79 for (const [usageType, usageUsd] of Object.entries((run as ActorRun).usageUsd as Record<string, number>)) {
80 state.dateAggregations[runDate].costDetail[usageType] ??= 0;
81 state.dateAggregations[runDate].costDetail[usageType] += usageUsd;
82 }
83 }
84
85 // lastRunDate is always the first we encounter because we go desc so we don't have to update it
86 state.dateAggregations[runDate].firstRunDate = run.startedAt.toISOString();
87
88 state.dateAggregations[runDate].buildNumbers[run.buildNumber] ??= 0;
89 state.dateAggregations[runDate].buildNumbers[run.buildNumber]++;
90
91 state.dateAggregations[runDate].statuses[run.status] ??= 0;
92 state.dateAggregations[runDate].statuses[run.status]++;
93
94 state.dateAggregations[runDate].origins[run.meta.origin] ??= 0;
95 state.dateAggregations[runDate].origins[run.meta.origin]++;
96
97 if (getDatasetItemCount && cleanItemCount !== null) {
98 state.dateAggregations[runDate].datasetItems ??= 0;
99 state.dateAggregations[runDate].datasetItems += cleanItemCount;
100 }
101 }
102
103 return { stopLoop: false };
104};
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "root": true,
3 "env": {
4 "browser": true,
5 "es2020": true,
6 "node": true
7 },
8 "extends": [
9 "@apify/eslint-config-ts"
10 ],
11 "parserOptions": {
12 "project": "./tsconfig.json",
13 "ecmaVersion": 2020
14 },
15 "ignorePatterns": [
16 "node_modules",
17 "dist",
18 "**/*.d.ts"
19 ]
20}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv
package.json
1{
2 "name": "actor-costs",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is a boilerplate of an Apify actor.",
6 "engines": {
7 "node": ">=18.0.0"
8 },
9 "dependencies": {
10 "apify": "^3.1.10",
11 "crawlee": "^3.5.4"
12 },
13 "devDependencies": {
14 "@apify/eslint-config-ts": "^0.3.0",
15 "@apify/tsconfig": "^0.1.0",
16 "@typescript-eslint/eslint-plugin": "^6.7.2",
17 "@typescript-eslint/parser": "^6.7.2",
18 "eslint": "^8.50.0",
19 "tsx": "^4.6.2",
20 "typescript": "^5.5"
21 },
22 "scripts": {
23 "start": "npm run start:dev",
24 "start:prod": "node dist/main.js",
25 "start:dev": "tsx src/main.ts",
26 "build": "tsc",
27 "lint": "eslint ./src --ext .ts",
28 "lint:fix": "eslint ./src --ext .ts --fix",
29 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
30 },
31 "author": "It's not you it's me",
32 "license": "ISC"
33}
tsconfig.json
1{
2 "extends": "@apify/tsconfig",
3 "compilerOptions": {
4 "module": "NodeNext",
5 "moduleResolution": "NodeNext",
6 "target": "ES2022",
7 "outDir": "dist",
8 "noUnusedLocals": false,
9 "skipLibCheck": true,
10 "lib": ["DOM"]
11 },
12 "include": [
13 "./src/**/*"
14 ]
15}
Developer
Maintained by Community
Actor Metrics
1 monthly user
-
3 stars
>99% runs succeeded
Created in May 2024
Modified 3 months ago
Categories