Actor picture

HAR files for URL list

apify/har-files-for-url-list

Generates an HTTP Archive (HAR) file for web pages specified by a list of URLs in JSON or CSV file. Optionally, the pages can be loaded using proxies from a specific country. The resulting HAR files are stored in the key-value store or in the dataset.

No credit card required

Author's avatarApify Technologies
  • Modified
  • Users17
  • Runs136
Actor picture

HAR files for URL list

Based on the apify/actor-node-chrome:v0.21.10 Docker image (see docs).

This file is 130 lines long. Only the first 50 are shown. Show all

const Apify = require('apify');
const PuppeteerHar = require('puppeteer-har');
const crypto = require('crypto');
const typeis = require('type-is');

// Some pages might take a long time to open
const NAVIGATION_TIMEOUT_SECS = 120;

const COUNTRY_CODE_TO_PROXY_GROUP = {
    UK: "LaxcWp84azNk7xa7P",
    US: "TGByFdWkWE5cAnmDg",
    CZ: "di5BsrawgufWetctm",
    DK: "r8jdq6jyPjpa9c3Lw",
    BR: "so5Zy6Xc62Faszhc4",
    VN: "nYx7ojRzbyaTm9HZ7"
};

const getKeyValueStoreUrl = (recordKey) => {
    return `https://api.apify.com/v2/key-value-stores/${process.env.APIFY_DEFAULT_KEY_VALUE_STORE_ID}/records/${recordKey}`;
};

// Saves resulting HAR file to the key-value store,
// and adds a row to the dataset with link to it and info about the page 
const saveResultingHar = async (request, reportErrors) => {
    let resultingHar = null;
    try {
        resultingHar = await request.myHar.stop();
        await assignContentToRequests(request, request.myResponses, resultingHar);
    } catch(e) {
        // request.myHar.stop() sometimes fails
        request.pushErrorMessage(e);    
    }
    
    delete request.myHar;
    delete request.myResponses;
    
    const fileName = crypto.createHash("sha256").update(request.uniqueKey).digest("base64").replace(/[+/]/g,"x").substr(0,17) + '.har';
    if (resultingHar) await Apify.setValue(fileName, resultingHar);
    await Apify.pushData({
        pageUrl: request.url,
        harFileUrl: resultingHar ? getKeyValueStoreUrl(fileName) : null,
        errorMessages: reportErrors ? (request.errorMessages || undefined) : undefined,
    });
    
    console.log(`HAR of ${request.url} saved successfully.`);
    
    // Make sure the request won't be repeated
    request.myDone = true;
};