Example Process Crawl Results


Iterates through all results from a crawler run and count them. Needs to be called from the crawler's finish webhook by adding an URL to finish the webhook of your crawler. Use this actor as a starting point to develop custom post-processing of data from the crawler.

Based on the apify/actor-node-basic:v0.21.10 Docker image (see docs).

const Apify = require('apify');
const _ = require('underscore');

Apify.main(async () => {
    // Get act input and validate it
    const input = await Apify.getValue('INPUT');
    if (!input || !input._id) {
        throw new Error('Input is missing the "_id" attribute. Did you start it from crawler finish webhook?');
    const executionId = input._id;
    // Print info about crawler run
    const crawlerRunDetails = await Apify.client.crawlers.getExecutionDetails({ executionId });
    if (!crawlerRunDetails) {
        throw new Error(`There is no crawler run with ID: "${executionId}"`);
    console.log(`Details of the crawler run (ID: ${executionId}):`);
    // Iterate through all crawler results and count them
    // Here is the place where you can add something more adventurous :)
    console.log(`Counting results from crawler run...`);
    const limit = 100;
    let offset = 0;
    let totalItems = 0;
    let results;
    do {
        results = await Apify.client.crawlers.getExecutionResults({ 
        offset += results.count;
        totalItems += results.items.length;
    } while (results.count > 0);
    // Save results
    console.log(`Found ${totalItems} records`);
    await Apify.setValue('OUTPUT', {