# Specify the base Docker image. You can read more about
# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:20

# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Node.js version:" \
    && node --version \
    && echo "NPM version:" \
    && npm --version \
    && rm -r ~/.npm

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./


# Run the image.
CMD npm start --silent

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "my-actor",
    "title": "Project Cheerio Crawler Javascript",
    "description": "Crawlee and Cheerio project in javascript.",
    "version": "0.0",
    "meta": {
        "templateId": "js-crawlee-cheerio"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "CheerioCrawler Template",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with.",
            "editor": "requestListSources",
            "prefill": [
                {
                    "url": "https://crawlee.dev"
                }
            ]
        },
        "maxRequestsPerCrawl": {
            "title": "Max Requests per Crawl",
            "type": "integer",
            "description": "Maximum number of requests that can be made by this crawler.",
            "default": 100
        }
    }
}

src/main.js

1// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/)
2import { Actor } from 'apify';
3import _ from 'lodash';
4// Crawlee - web scraping and browser automation library (Read more at https://crawlee.dev)
5import { CheerioCrawler } from 'crawlee';
6// this is ESM project, and as such, it requires you to specify extensions in your relative imports
7// read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions
8// import { router } from './routes.js';
9
10// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
11await Actor.init();
12
13// Structure of input is defined in input_schema.json
14const {
15    startUrls = ['https://crawlee.dev'],
16    maxRequestsPerCrawl = 100,
17} = await Actor.getInput() ?? {};
18
19const proxyConfiguration = await Actor.createProxyConfiguration();
20
21const crawler = new CheerioCrawler({
22    proxyConfiguration,
23    maxRequestsPerCrawl,
24    async requestHandler({ enqueueLinks, request, $, log }) {
25        // log.info('enqueueing new URLs');
26        // await enqueueLinks();
27
28        // Extract title from the page.
29        const title = $('title').text();
30        log.info(`${title}`, { url: request.loadedUrl });
31
32        const telegramLinks = $('a[href*="t.me"]');
33        const vkLinks = $('a[href*="vk"]');
34        const waLinks = $('a[href*="wa.me"], a[href*="api.whatsapp"]');
35
36        const data = [];
37
38        [telegramLinks, vkLinks, waLinks].forEach((links) => {
39            links.each((idx, link) => {
40                data.push({contact: $(link).attr('href')})
41            })
42        })
43
44        const uniqContacts = _.uniqBy(data, 'contact');
45
46        Actor.pushData({url: request.loadedUrl, contacts: uniqContacts})
47    },
48});
49
50await crawler.run(startUrls);
51
52// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit()
53await Actor.exit();

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
node_modules

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
    "extends": "@apify",
    "root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control

.DS_Store
.idea
dist
node_modules
apify_storage
storage

package.json

{
    "name": "crawlee-cheerio-javascript",
    "version": "0.0.1",
    "type": "module",
    "description": "This is a boilerplate of an Apify actor.",
    "engines": {
        "node": ">=18.0.0"
    },
    "dependencies": {
        "apify": "^3.2.6",
        "crawlee": "^3.11.5"
    },
    "devDependencies": {
        "@apify/eslint-config": "^0.4.0",
        "eslint": "^8.50.0",
        "lodash": "^4.17.21"
    },
    "scripts": {
        "start": "node src/main.js",
        "lint": "eslint ./src --ext .js,.jsx",
        "lint:fix": "eslint ./src --ext .js,.jsx --fix",
        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
    },
    "author": "It's not you it's me",
    "license": "ISC"
}

Whatsapp Scraper Profile By Number

antonio_cesar/whatsapp-scraper-profile

WhatsApp Scraper Profile By Number allows you to extract public profile information directly from a WhatsApp number. $ 0,01 per request.

Antônio César

5.0

Whatsapp Profiles Scraper

inutil_labs/wscrp-free

Scrape the Whatsapp network and fetch profile info of any user, including: Profile picture, About status, or check if it's a Whatsapp for Business or personal account.

inUtil Labs

3.5K

5.0

VK Extractor

jupri/vkontakte

💫 Scrape VK.com

cat

Whatsapp Scraper

inutil_labs/whatsapp-scraper

Fetch Whatsapp profiles data, including: display picture, name and status.

inUtil Labs

1.0

Whatsapp scraper

curious_coder/whatsapp-scraper

Scrape whatsapp numbers in bulk and extract complete details including name, email, website, address, description, profile picture, etc

Curious Coder

779

4.1

WhatsApp group links Scraper

danny.hub/whatsapp-url

Extract WhatsApp group URLs from all social media, our search machine will extract WhatsApp group URLs from Facebook/LinkedIn/Instagram/Tik Tok/YouTube/twitter/Reddit/Pinterest. Join your target WhatsApp groups and get all members WhatsApp or phone numbers. Endless Leads!!

Dannyswift.hub

685

2.9

WhatsApp Messages Scraper

extremescrapes/whatsapp-messages-scraper

Scrape and monitor WhatsApp group or private chat messages in real time. Scan the QR code to connect and start collecting structured WhatsApp message data.

Extreme Scrapes

144

Whatsapp Bulk Message Sender

bhansalisoft/whatsapp-bulk-message-sender

Whatsapp Bulk Message Sender : Send Whatsapp message to Bulk Whatsapp numbers from your personal whatsapp without business api

bhansalisoft

4.0

VK People Scraper

easyapi/vk-people-scraper

Scrape VK.com user profiles based on search keywords. Extract detailed user information including usernames, profile URLs, locations, and avatar images. Perfect for lead generation, market research, and social media analysis.