Big Query
- juansgaitan/big-query
- Modified
- Users 14
- Runs 485
- Created by
Juan Gaitán Villamizar
Append a CSV file to a Google bigQuery table. Create a "Service credentials" at the https://console.cloud.google.com/, copy & paste the JSON file into the variable value for 'CREDENTIALS' and set it as a secret.
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.
# The base image name below is set during the act build, based on user settings.
# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
FROM apify/actor-node-basic:v0.21.10
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Copy source code to container
# Do this in the last step, to have fast build if only the source code changed
COPY . ./
# NOTE: The CMD is already defined by the base image.
# Uncomment this for local node inspector debugging:
# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{
"name": "apify-project",
"version": "0.0.1",
"description": "",
"author": "It's not you it's me",
"license": "ISC",
"dependencies": {
"apify": "0.21.10",
"json2csv": "latest",
"@google-cloud/bigquery": "latest"
},
"scripts": {
"start": "node main.js"
}
}
main.js
This file is 224 lines long. Only the first 50 are shown. Show all
// Source file for Hosted source in 'Source type'
const fs = require('fs');
const Apify = require('apify');
const json2csv = require('json2csv'); // eslint-disable-line
const BigQuery = require('@google-cloud/bigquery'); // eslint-disable-line
const { log, error } = console;
let isStoreIdSet = false;
async function storeOrGetResults(key, items = [], filterKey) {
if (!isStoreIdSet || !key) {
throw new Error(`Error while storing or getting results. Missing ${key ?
'storeId in store' : 'key value'}.`);
}
const { keyValueStores } = Apify.client;
const record = await keyValueStores.getRecord({ key });
const storeRecord = record && record.body ? record.body : [];
let previous = typeof storeRecord === 'string' ? JSON.parse(storeRecord) : storeRecord;
if (items.length === 0) {
return { previous };
}
const current = items.slice();
if (current.length && previous.length && filterKey) {
const cache = current.reduce((object, item) => (
Object.assign(object, { [item[filterKey]]: true })
), {});
previous = previous.filter(item => !cache[item[filterKey]]);
}
const next = previous.concat(current);
if (previous.length !== current.length) {
await keyValueStores.putRecord({
key,
body: JSON.stringify(next)
});
} else {
log('No state modifications required.');
}
log('Previous results:', previous.length);
log('Current results:', current.length);
log('Next results:', next.length);
return { previous, current, next };
}
async function createDataset(datasetName, bigquery) {
const [datasets] = await bigquery.getDatasets();