# Specify the base Docker image. You can read more about
# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:20

# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Node.js version:" \
    && node --version \
    && echo "NPM version:" \
    && npm --version \
    && rm -r ~/.npm

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./


# Run the image.
CMD npm start --silent

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "my-actor",
    "title": "Project Cheerio Crawler Javascript",
    "description": "Crawlee and Cheerio project in javascript.",
    "version": "0.0",
    "meta": {
        "templateId": "js-crawlee-cheerio"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "CheerioCrawler Template",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with.",
            "editor": "requestListSources",
            "prefill": [
                {
                    "url": "https://2gis.ru/moscow/firm/70000001023063516"
                }
            ]
        },
        "maxRequestsPerCrawl": {
            "title": "Max Requests per Crawl",
            "type": "integer",
            "description": "Maximum number of requests that can be made by this crawler.",
            "default": 100
        }
    }
}

src/main.js

1// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/)
2import { Actor } from 'apify';
3// Crawlee - web scraping and browser automation library (Read more at https://crawlee.dev)
4import { CheerioCrawler, Dataset } from 'crawlee';
5// this is ESM project, and as such, it requires you to specify extensions in your relative imports
6// read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions
7// import { router } from './routes.js';
8
9// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
10await Actor.init();
11
12// Structure of input is defined in input_schema.json
13const {
14    startUrls = ['https://crawlee.dev'],
15    maxRequestsPerCrawl = 100,
16} = await Actor.getInput() ?? {};
17
18const proxyConfiguration = await Actor.createProxyConfiguration();
19
20const crawler = new CheerioCrawler({
21    proxyConfiguration,
22    maxRequestsPerCrawl,
23    async requestHandler({ request, $, log }) {
24        // Extract title from the page.
25        const title = $('title').text();
26        log.info(`${title}`, { url: request.loadedUrl });
27
28        $('a[href*="link.2gis"]').each((index, el) => {
29            const innerText = $(el).text();
30
31            if(innerText.includes('.') && !innerText.includes('hh')) {
32                Actor.pushData({site: innerText, title: title})
33            }
34        })
35    },
36});
37
38await crawler.run(startUrls);
39
40// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit()
41await Actor.exit();

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
node_modules

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
    "extends": "@apify",
    "root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control

.DS_Store
.idea
dist
node_modules
apify_storage
storage

package.json

{
    "name": "crawlee-cheerio-javascript",
    "version": "0.0.1",
    "type": "module",
    "description": "This is a boilerplate of an Apify actor.",
    "engines": {
        "node": ">=18.0.0"
    },
    "dependencies": {
        "apify": "^3.2.6",
        "crawlee": "^3.11.5",
        "lodash": "^4.17.21"
    },
    "devDependencies": {
        "@apify/eslint-config": "^0.4.0",
        "eslint": "^8.50.0"
    },
    "scripts": {
        "start": "node src/main.js",
        "lint": "eslint ./src --ext .js,.jsx",
        "lint:fix": "eslint ./src --ext .js,.jsx --fix",
        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
    },
    "author": "It's not you it's me",
    "license": "ISC"
}

⚡️ Linkedin Jobs Scraper

future_creator/linkedin-jobs-scraper

This LinkedIn Jobs scraper efficiently gathers detailed information about job vacancies on LinkedIn based on your specified criteria. Easily configure filters to match LinkedIn's search capabilities, ensuring precise and comprehensive selection of relevant job listings.

Future Creator

285

Wellfound API Job Scraper [NEW 07/2025]

clearpath/wellfound-api-job-scraper

Wellfound API scraper (AngelList) extracts comprehensive job listings, company data, salaries & market insights. Ultra-fast, memory-efficient, cost-effective.

ClearPath

Google Maps Scraper

compass/crawler-google-places

Extract data from thousands of Google Maps locations and businesses, including reviews, reviewer details, images, contact info, opening hours, location, prices & more. Export scraped data, run the scraper via API, schedule and monitor runs, or integrate with other tools.

Compass

144K

4.3

ImmoScout24 API Lite

clearpath/immoscout24-api-lite

ImmobilienScout24.de API scraper for German real estate monitoring. Lightning-fast. Track new rental listings with real-time Telegram alerts. 90% cheaper than browser scrapers. Perfect ImmoScout24.de automation tool for apartment hunting and property data extraction.

ClearPath

Businessesforsale Scraper

memo23/businessesforsale-scraper

Scrapes BusinessesForSale.com to extract business listings with JSON-LD data, search metadata (criteria, tags, age filters, listings count), and search context. Built with TypeScript/Cheerio for reliable data extraction and market research.

Muhamed Didovic

Check24 Rental Car Details Scraper

ecomscrape/check24-rental-car-details-scraper

Extract detailed rental car data from Check24.de with our powerful scraper. Access pricing, vehicle specifications, supplier information, and comprehensive booking details from Germany's leading comparison platform for informed business decisions.

ecomscrape

Fortress.com.hk Product Search Scraper

ecomscrape/fortress-product-search-scraper

Extract comprehensive product data from Fortress.com.hk with our advanced scraper. Get pricing, specifications, availability, and detailed product information from Hong Kong's leading electronics retailer for market analysis and competitive intelligence.

ecomscrape

LinkedIn Company Employees Scraper ✅ No Cookies 📧 $4/12 per 1k

harvestapi/linkedin-company-employees

Extract all LinkedIn Company employees with filters and detailed profile information, including complete work experience, and more. No cookies or account required. This actor can try to find contact emails.

HarvestAPI

209

Angi (Angie's List) Scraper

babak/angi-angie-s-list-scraper

Angi (Angie’s List) Scraper allows you to extract Angie's List companies' data. Using our search fields, you'll get general details, contact info, and reviews for market and advertising research, competitive and customer feedback analysis, product development, and business partnership.

V Hzrok

Zach's "Webpage Content To Markdown" Scraper

dyf/webpage-to-markdown

Scrape a webpage and parse to markdown. Packed with features to ensure high success rate and low cost. Includes 2 modes of operation so that you can optimize for either cost (as cheap as possible) or yield (as many successful results as possible).

Double Your Freelancing

LinkedIn Job Postings Scraper

eunit/linkedin-job-postings-scraper

Designed for both personal and professional use, simply enter your job search keywords and location to extract tailored job titles, companies names, locations, job URL and more. This scraper handles infinite scrolling and exports data to any format such CSV, JSON, XML and more. Try it today 🚀