Copy to bucket
Deprecated
Pricing
Pay per usage
Go to Store
Copy to bucket
Deprecated
0.0 (0)
Pricing
Pay per usage
2
Total users
3
Monthly users
1
Last modified
3 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-basic:v0.21.10
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "0.21.10", "aws-sdk": "latest", "left-pad": "latest", "type-check": "latest" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require('apify');2const AWS = require('aws-sdk');3const leftPad = require('left-pad');4const typeCheck = require('type-check').typeCheck;5
6
7// Definition of the input8const INPUT_TYPE = `{9 _id: Maybe String,10 actId: Maybe String,11 data: Maybe String,12}`;13
14const DEFAULT_STATE = {15 offset: 0,16 storeCount: 0,17};18
19Apify.main(async () => {20 // Get input of your act21 const input = await Apify.getValue('INPUT');22
23 // Check input params24 if (!typeCheck(INPUT_TYPE, input)) {25 console.log('Expected input:');26 console.log(INPUT_TYPE);27 console.log('Received input:');28 console.dir(input);29 throw new Error("Received invalid input");30 }31
32 const finishWebhookData = JSON.parse(input.data);33
34 let state = await Apify.getValue('STATE') || DEFAULT_STATE;35
36 // Set default values37 const itemsPerFile = finishWebhookData.itemsPerFile || 1000;38 const executionResultsParams = finishWebhookData.executionResultsParams || {};39 const awsS3Params = finishWebhookData.awsS3Params || {};40
41 // Downloa data and save them to s342 const s3 = new AWS.S3(awsS3Params);43 let lastCount = 0;44 while (true) {45 const executionResultsList = await Apify.client.crawlers.getExecutionResults(Object.assign(executionResultsParams, { executionId: input._id, limit: itemsPerFile, offset: state.offset }));46 lastCount = parseInt(executionResultsList.count);47
48 if (lastCount === 0) break;49
50 const rawResults = (executionResultsParams.format && executionResultsParams.format !== 'json') ? executionResultsList.items : JSON.stringify(executionResultsList.items);51 const file = Buffer.from(rawResults);52 const fileName = `${input._id}_${leftPad(state.storeCount+1, 9, '0')}.${executionResultsParams.format || 'json'}`;53 await s3.putObject({54 Key: 'streaming/files/'+fileName,55 Body: file56 }).promise();57
58 // Update Act state59 state.offset += lastCount;60 state.storeCount++;61 await Apify.setValue('STATE', state);62 console.log(`Saved ${lastCount} web pages to file ${awsS3Params.params.Bucket}/${fileName}`);63 }64
65 console.log('Act finished');66});