Actor picture

Firestore Import

drobnikj/firestore-import

Imports dataset items to Firestone DB.

Author's avatarJakub Drobník
  • Modified
  • Users10
  • Runs105
Actor picture

Firestore Import

.eslintrc

{
  "extends": "@apify"
}

.gitignore

apify_storage
node_modules

.npmignore

# This file tells Git which files shouldn't be added to source control

.idea
node_modules

Dockerfile

# Dockerfile contains instructions how to build a Docker image that will contain
# all the code and configuration needed to run your actor. For a full
# Dockerfile reference, see https://docs.docker.com/engine/reference/builder/

# First, specify the base Docker image. Apify provides the following base images
# for your convenience:
#  apify/actor-node-basic (Node.js 10 on Alpine Linux, small and fast image)
#  apify/actor-node-chrome (Node.js 10 + Chrome on Debian)
#  apify/actor-node-chrome-xvfb (Node.js 10 + Chrome + Xvfb on Debian)
# For more information, see https://apify.com/docs/actor#base-images
# Note that you can use any other image from Docker Hub.
FROM apify/actor-node-basic

# Second, copy just package.json and package-lock.json since they are the only files
# that affect NPM install in the next step
COPY package*.json ./

# Install NPM packages, skip optional and development dependencies to keep the
# image small. Avoid logging too much and print the dependency tree for debugging
RUN npm --quiet set progress=false \
 && npm install --only=prod --no-optional \
 && echo "Installed NPM packages:" \
 && npm list \
 && echo "Node.js version:" \
 && node --version \
 && echo "NPM version:" \
 && npm --version

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for simple source file changes.
COPY . ./

# Specify how to run the source code
CMD npm start

INPUT_SCHEMA.json

{
  "title": "Firestore Import input",
  "description": "Imports dataset to Firestore DB",
  "type": "object",
  "schemaVersion": 1,
  "properties": {
    "datasetId": {
      "title": "Dataset",
      "type": "string",
      "description": "Dataset ID of dataset you want to import to Firestore",
      "editor": "textfield"
    },
    "apiKey": {
      "title": "Api key",
      "type": "string",
      "description": "Firestore API key",
      "editor": "textfield"
    },
    "authDomain": {
      "title": "Auth domain",
      "type": "string",
      "description": "Firestore authentication domain",
      "editor": "textfield"
    },
    "projectId": {
      "title": "Project ID",
      "type": "string",
      "description": "Firestore project ID",
      "editor": "textfield"
    },
    "collectionName": {
      "title": "Collection name",
      "type": "string",
      "description": "Firestore collection name",
      "editor": "textfield"
    }
  },
  "required": ["datasetId", "apiKey", "authDomain", "projectId", "collectionName"]
}

README.md

# Firestore import

It imports data from Apify dataset to Firestore DB.

## Usage
It can be used from any Apify actor or task webhook, even from Legacy PhantomJS Crawler task Finish webhook URL.

Webhook URL: `https://api.apify.com/v2/acts/drobnikj~firestore-import/runs?token=<yourApifyApiToken>`

### Actor/Task webhook
Set up a webhook with following payload template:
```
{
    "datasetId": {{resource.defaultDatasetId}},
    "apiKey": "<firestoreApiKey>",
    "authDomain": "<firestoreAuthDomain>",
    "projectId": "<firestoreProjectId>",
    "collectionName": "<firestoreCollectionName>"
}
```

### Actor
You can call Apify.call() with following options.
```
await Apify.call('drobnikj/firestore-import', {
    "datasetId": "<datasetId>",
    "apiKey": "<firestoreApiKey>",
    "authDomain": "<firestoreAuthDomain>",
    "projectId": "<firestoreProjectId>",
    "collectionName": "<firestoreCollectionName>" 
})
```

### Legacy PhantomJS Crawler task with Finish webhook URL
Set up Finish webhook data with the following JSON object:
```
{
    "apiKey": "<firestoreApiKey>",
    "authDomain": "<firestoreAuthDomain>",
    "projectId": "<firestoreProjectId>",
    "collectionName": "<firestoreCollectionName>"
}
```

apify.json

{
	"name": "firestore-import",
	"version": "0.0",
	"buildTag": "latest",
	"env": null,
	"template": "hello_world"
}

main.js

const Apify = require('apify');
const firebase = require('firebase');

Apify.main(async () => {
    const input = await Apify.getInput();

    const { data } = input;
    let islegacyPhantomJSTask = false;
    if (data && typeof data === 'string') {
        // It runs from legacy phantomjs crawler task finished webhook
        const legacyInput = JSON.parse(data);
        Object.assign(input, legacyInput);
        islegacyPhantomJSTask = true;
    }

    const { datasetId, apiKey, authDomain, projectId, collectionName } = input;

    if (!datasetId) {
        throw new Error('DatasetId is required on input.');
    }

    firebase.initializeApp({
        apiKey,
        authDomain,
        projectId,
    });

    // Initialize Cloud Firestore through Firebase
    const db = firebase.firestore();
    console.log(`Start importing dataset ${datasetId} to firestore.`);
    const dataset = await Apify.openDataset(datasetId, { forceCloud: true });
    const datasetInfo = await dataset.getInfo();
    // Import dataset from actor/task
    const limit = 1000;
    let counter = 0;
    for (let offset = 0; offset < datasetInfo.itemCount; offset += limit) {
        const pagination = await dataset.getData({
            simplified: islegacyPhantomJSTask,
            clean: !islegacyPhantomJSTask,
            limit,
            offset,
        });
        console.log(`Get dataset items offset: ${pagination.offset}`);
        for (const item of pagination.items) {
            try {
                await db.collection(collectionName).add(item);
                counter++;
            } catch (err) {
                console.log(`Cannot import item ${JSON.stringify(item)}: ${err.message}`);
            }
        }
    }

    console.log(`Imported ${counter} from dataset ${datasetId}.`);

    console.log('Done!');
});

package-lock.json

This file is 4793 lines long. Only the first 50 are shown. Show all

{
	"name": "firestore-import",
	"version": "0.0.1",
	"lockfileVersion": 1,
	"requires": true,
	"dependencies": {
		"@apify/eslint-config": {
			"version": "0.0.3",
			"resolved": "https://registry.npmjs.org/@apify/eslint-config/-/eslint-config-0.0.3.tgz",
			"integrity": "sha512-WbjC0Xv1bEWN9DcOayv5y4Zygv4N8zPq/XZQygHfq+As+P6sxK5sSrAQzHIHWd+9jNX4TI9iJDcPqFzxsAxTgw==",
			"dev": true,
			"requires": {
				"eslint-config-airbnb": "^17.1.1",
				"eslint-config-airbnb-base": "^13.2.0",
				"eslint-plugin-import": "^2.18.2",
				"eslint-plugin-jsx-a11y": "^6.2.3",
				"eslint-plugin-promise": "^4.2.1",
				"eslint-plugin-react": "^7.14.3"
			}
		},
		"@apify/http-request": {
			"version": "1.1.2",
			"resolved": "https://registry.npmjs.org/@apify/http-request/-/http-request-1.1.2.tgz",
			"integrity": "sha512-u3MyDXQ4H2CkITJNr2HNwIQZBTsdkewBd6jJaXSBSzaJ5TdBlY+OKfBpVY3kTb87yl5Ses443cAxen5B8Cy3ZQ==",
			"requires": {
				"got": "^9.6.0",
				"proxy-agent": "^3.1.0",
				"underscore": "^1.9.1"
			}
		},
		"@apify/ps-tree": {
			"version": "1.1.3",
			"resolved": "https://registry.npmjs.org/@apify/ps-tree/-/ps-tree-1.1.3.tgz",
			"integrity": "sha512-+hIr8EaTRd9fsOiNNzf1Fi8Tm9qs8cdPBZjuq5fXDV6SOCdi2ZyQlcQSzc8lY0hb+UhBib1WPixtCdKLL169WA==",
			"requires": {
				"event-stream": "3.3.4"
			}
		},
		"@babel/code-frame": {
			"version": "7.5.5",
			"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.5.5.tgz",
			"integrity": "sha512-27d4lZoomVyo51VegxI20xZPuSHusqbQag/ztrBC7wegWoQ1nLREPVSKSW8byhTlzTKyNE4ifaTA6lCp7JjpFw==",
			"dev": true,
			"requires": {
				"@babel/highlight": "^7.0.0"
			}
		},
		"@babel/highlight": {
			"version": "7.5.0",
			"resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.5.0.tgz",

package.json

{
	"name": "firestore-import",
	"version": "0.0.1",
	"description": "This is a boilerplate of an Apify actor.",
	"dependencies": {
		"apify": "^0.16.0",
		"firebase": "^7.2.0"
	},
	"devDependencies": {
		"@apify/eslint-config": "0.0.3",
		"eslint": "^6.5.1"
	},
	"scripts": {
		"start": "node main.js",
		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
	},
	"author": "It's not you it's me",
	"license": "ISC"
}