BQ

Big Query

Append a CSV file to a Google bigQuery table. Create a "Service credentials" at the https://console.cloud.google.com/, copy & paste the JSON file into the variable value for 'CREDENTIALS' and set it as a secret.

Dockerfile

# This is a template for a Dockerfile used to run acts in Actor system.
# The base image name below is set during the act build, based on user settings.
# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
FROM apify/actor-node-basic:v0.21.10

# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
 && npm install --only=prod --no-optional \
 && echo "Installed NPM packages:" \
 && (npm list --all || true) \
 && echo "Node.js version:" \
 && node --version \
 && echo "NPM version:" \
 && npm --version

# Copy source code to container
# Do this in the last step, to have fast build if only the source code changed
COPY  . ./

# NOTE: The CMD is already defined by the base image.
# Uncomment this for local node inspector debugging:
# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

{
    "name": "apify-project",
    "version": "0.0.1",
    "description": "",
    "author": "It's not you it's me",
    "license": "ISC",
    "dependencies": {
        "apify": "0.21.10",
        "json2csv": "latest",
        "@google-cloud/bigquery": "latest"
    },
    "scripts": {
        "start": "node main.js"
    }
}

main.js

This file is 224 lines long. Only the first 50 are shown. Show all

// Source file for Hosted source in 'Source type'
const fs = require('fs');
const Apify = require('apify');
const json2csv = require('json2csv'); // eslint-disable-line
const BigQuery = require('@google-cloud/bigquery'); // eslint-disable-line

const { log, error } = console;

let isStoreIdSet = false;
async function storeOrGetResults(key, items = [], filterKey) {
  if (!isStoreIdSet || !key) {
    throw new Error(`Error while storing or getting results. Missing ${key ?
      'storeId in store' : 'key value'}.`);
  }

  const { keyValueStores } = Apify.client;
  const record = await keyValueStores.getRecord({ key });
  const storeRecord = record && record.body ? record.body : [];
  let previous = typeof storeRecord === 'string' ? JSON.parse(storeRecord) : storeRecord;

  if (items.length === 0) {
    return { previous };
  }

  const current = items.slice();
  if (current.length && previous.length && filterKey) {
    const cache = current.reduce((object, item) => (
      Object.assign(object, { [item[filterKey]]: true })
    ), {});
    previous = previous.filter(item => !cache[item[filterKey]]);
  }

  const next = previous.concat(current);
  if (previous.length !== current.length) {
    await keyValueStores.putRecord({
      key,
      body: JSON.stringify(next)
    });
  } else {
    log('No state modifications required.');
  }

  log('Previous results:', previous.length);
  log('Current results:', current.length);
  log('Next results:', next.length);
  return { previous, current, next };
}

async function createDataset(datasetName, bigquery) {
  const [datasets] = await bigquery.getDatasets();