Copy to bucket avatar

Copy to bucket

Deprecated
Go to Store
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
Copy to bucket

Copy to bucket

bbee/copytobucket

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-basic:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY  . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "0.21.10",
9        "aws-sdk": "latest",
10        "left-pad": "latest",
11        "type-check": "latest"
12    },
13    "scripts": {
14        "start": "node main.js"
15    }
16}

main.js

1const Apify = require('apify');
2const AWS = require('aws-sdk');
3const leftPad = require('left-pad');
4const typeCheck = require('type-check').typeCheck;
5
6
7// Definition of the input
8const INPUT_TYPE = `{
9    _id: Maybe String,
10    actId: Maybe String,
11    data: Maybe String,
12}`;
13
14const DEFAULT_STATE = {
15    offset: 0,
16    storeCount: 0,
17};
18
19Apify.main(async () => {
20    // Get input of your act
21    const input = await Apify.getValue('INPUT');
22
23    // Check input params
24    if (!typeCheck(INPUT_TYPE, input)) {
25        console.log('Expected input:');
26        console.log(INPUT_TYPE);
27        console.log('Received input:');
28        console.dir(input);
29        throw new Error("Received invalid input");
30    }
31
32    const finishWebhookData = JSON.parse(input.data);
33
34    let state = await Apify.getValue('STATE') || DEFAULT_STATE;
35
36    // Set default values
37    const itemsPerFile = finishWebhookData.itemsPerFile || 1000;
38    const executionResultsParams = finishWebhookData.executionResultsParams || {};
39    const awsS3Params = finishWebhookData.awsS3Params || {};
40
41    // Downloa data and save them to s3
42    const s3 = new AWS.S3(awsS3Params);
43    let lastCount = 0;
44    while (true) {
45        const executionResultsList = await Apify.client.crawlers.getExecutionResults(Object.assign(executionResultsParams, { executionId: input._id, limit: itemsPerFile, offset: state.offset }));
46        lastCount = parseInt(executionResultsList.count);
47
48        if (lastCount === 0) break;
49
50        const rawResults = (executionResultsParams.format && executionResultsParams.format !== 'json') ? executionResultsList.items : JSON.stringify(executionResultsList.items);
51        const file = Buffer.from(rawResults);
52        const fileName = `${input._id}_${leftPad(state.storeCount+1, 9, '0')}.${executionResultsParams.format || 'json'}`;
53        await s3.putObject({
54            Key: 'streaming/files/'+fileName,
55            Body: file
56        }).promise();
57
58        // Update Act state
59        state.offset += lastCount;
60        state.storeCount++;
61        await Apify.setValue('STATE', state);
62        console.log(`Saved ${lastCount} web pages to file ${awsS3Params.params.Bucket}/${fileName}`);
63    }
64
65    console.log('Act finished');
66});
Developer
Maintained by Community