Copy to bucket avatar
Copy to bucket

Deprecated

Pricing

Pay per usage

Go to Store
Copy to bucket

Copy to bucket

Deprecated

Developed by

David Cumings

David Cumings

Maintained by Community

0.0 (0)

Pricing

Pay per usage

2

Total users

3

Monthly users

1

Last modified

3 years ago

Dockerfile

# This is a template for a Dockerfile used to run acts in Actor system.
# The base image name below is set during the act build, based on user settings.
# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
FROM apify/actor-node-basic:v0.21.10
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Copy source code to container
# Do this in the last step, to have fast build if only the source code changed
COPY . ./
# NOTE: The CMD is already defined by the base image.
# Uncomment this for local node inspector debugging:
# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

{
"name": "apify-project",
"version": "0.0.1",
"description": "",
"author": "It's not you it's me",
"license": "ISC",
"dependencies": {
"apify": "0.21.10",
"aws-sdk": "latest",
"left-pad": "latest",
"type-check": "latest"
},
"scripts": {
"start": "node main.js"
}
}

main.js

1const Apify = require('apify');
2const AWS = require('aws-sdk');
3const leftPad = require('left-pad');
4const typeCheck = require('type-check').typeCheck;
5
6
7// Definition of the input
8const INPUT_TYPE = `{
9 _id: Maybe String,
10 actId: Maybe String,
11 data: Maybe String,
12}`;
13
14const DEFAULT_STATE = {
15 offset: 0,
16 storeCount: 0,
17};
18
19Apify.main(async () => {
20 // Get input of your act
21 const input = await Apify.getValue('INPUT');
22
23 // Check input params
24 if (!typeCheck(INPUT_TYPE, input)) {
25 console.log('Expected input:');
26 console.log(INPUT_TYPE);
27 console.log('Received input:');
28 console.dir(input);
29 throw new Error("Received invalid input");
30 }
31
32 const finishWebhookData = JSON.parse(input.data);
33
34 let state = await Apify.getValue('STATE') || DEFAULT_STATE;
35
36 // Set default values
37 const itemsPerFile = finishWebhookData.itemsPerFile || 1000;
38 const executionResultsParams = finishWebhookData.executionResultsParams || {};
39 const awsS3Params = finishWebhookData.awsS3Params || {};
40
41 // Downloa data and save them to s3
42 const s3 = new AWS.S3(awsS3Params);
43 let lastCount = 0;
44 while (true) {
45 const executionResultsList = await Apify.client.crawlers.getExecutionResults(Object.assign(executionResultsParams, { executionId: input._id, limit: itemsPerFile, offset: state.offset }));
46 lastCount = parseInt(executionResultsList.count);
47
48 if (lastCount === 0) break;
49
50 const rawResults = (executionResultsParams.format && executionResultsParams.format !== 'json') ? executionResultsList.items : JSON.stringify(executionResultsList.items);
51 const file = Buffer.from(rawResults);
52 const fileName = `${input._id}_${leftPad(state.storeCount+1, 9, '0')}.${executionResultsParams.format || 'json'}`;
53 await s3.putObject({
54 Key: 'streaming/files/'+fileName,
55 Body: file
56 }).promise();
57
58 // Update Act state
59 state.offset += lastCount;
60 state.storeCount++;
61 await Apify.setValue('STATE', state);
62 console.log(`Saved ${lastCount} web pages to file ${awsS3Params.params.Bucket}/${fileName}`);
63 }
64
65 console.log('Act finished');
66});