# Specify the base Docker image. You can read more about
# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:20

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Node.js version:" \
    && node --version \
    && echo "NPM version:" \
    && npm --version \
    && rm -r ~/.npm

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./


# Run the image.
CMD npm start --silent

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "my-actor",
    "title": "Project Cheerio Crawler Javascript",
    "description": "Crawlee and Cheerio project in javascript.",
    "version": "0.0",
    "meta": {
        "templateId": "js-crawlee-cheerio"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "CheerioCrawler Template",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with.",
            "editor": "requestListSources",
            "prefill": [
                {
                    "url": "https://crawlee.dev"
                }
            ]
        },
        "maxRequestsPerCrawl": {
            "title": "Max Requests per Crawl",
            "type": "integer",
            "description": "Maximum number of requests that can be made by this crawler.",
            "default": 100
        }
    }
}

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
node_modules

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
    "extends": "@apify",
    "root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control

.DS_Store
.idea
dist
node_modules
apify_storage
storage

package.json

{
    "name": "crawlee-cheerio-javascript",
    "version": "0.0.1",
    "type": "module",
    "description": "This is a boilerplate of an Apify actor.",
    "engines": {
        "node": ">=18.0.0"
    },
    "dependencies": {
        "apify": "^3.1.10",
        "crawlee": "^3.5.4"
    },
    "devDependencies": {
        "@apify/eslint-config": "^0.4.0",
        "eslint": "^8.50.0"
    },
    "scripts": {
        "start": "node src/main.js",
        "lint": "eslint ./src --ext .js,.jsx",
        "lint:fix": "eslint ./src --ext .js,.jsx --fix",
        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
    },
    "author": "It's not you it's me",
    "license": "ISC"
}

src/main.js

1// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/)
2import { Actor } from 'apify';
3// Crawlee - web scraping and browser automation library (Read more at https://crawlee.dev)
4import { CheerioCrawler, Dataset } from 'crawlee';
5// this is ESM project, and as such, it requires you to specify extensions in your relative imports
6// read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions
7// import { router } from './routes.js';
8
9// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
10await Actor.init();
11
12// Structure of input is defined in input_schema.json
13const {
14    startUrls = ['https://crawlee.dev'],
15    maxRequestsPerCrawl = 100,
16} = await Actor.getInput() ?? {};
17
18const proxyConfiguration = await Actor.createProxyConfiguration();
19
20const crawler = new CheerioCrawler({
21    proxyConfiguration,
22    maxRequestsPerCrawl,
23    async requestHandler({ enqueueLinks, request, $, log }) {
24        log.info('enqueueing new URLs');
25        await enqueueLinks();
26
27        // Extract title from the page.
28        const title = $('title').text();
29        log.info(`${title}`, { url: request.loadedUrl });
30
31        // Save url and title to Dataset - a table-like storage.
32        await Dataset.pushData({ url: request.loadedUrl, title });
33    },
34});
35
36await crawler.run(startUrls);
37
38// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit()
39await Actor.exit();

Arab chat 1

alhemaqani111/arab-chat-1

Arab chat Free for all

Arab chat

Arab chat عربي شات

alhemaqani111/arab-chat-aarby-sht

Chat private arab

Arab chat

Telegram Get Chat Messages / Channel Comments

akula.marketing/Telegram-Get-Chat-Messages-Channel-Comments

Parse messages and their authors' account information from a Telegram channel or chat

akula.marketing

5.0

Linkedin Chat / Message scraper

saswave/linkedin-chat-message-scraper

Linkedin messaging scraper. Manage your linkedin message from chat and inmail history. Extract relevant data. Select inbox type and filter. Extract data about chat history , last message date, who send last message, profile info and more ..

SASWAVE

5.0

Apify Reddit Chat Sender

johnvc/reddit-chat-dm-sender----residential-proxy-batch-automation

Send personalized Reddit chat DMs at scale. Logs in per account, uses residential proxies, retries smartly, and tracks success/failure to the dataset. Always completes the run, even if some messages fail. Ideal for outreach, support, or notifications.

John

5.0

Telegram Get Channel Participants

akula.marketing/Telegram-Get-Channel-Participants

Actor to get the Excel file containing participants of a chat/channel

akula.marketing

5.0

DuckDuckGo Serp API

canadesk/duckduckgo-serp-api

Search for text, news, images, videos, maps or start an AI chat with DuckDuckGo! Alternative to Google SerpApi. Fast and cheap.

Canadesk Support

WhatsApp Messages Scraper

extremescrapes/whatsapp-messages-scraper

Scrape and monitor WhatsApp group or private chat messages in real time. Scan the QR code to connect and start collecting structured WhatsApp message data.

Extreme Scrapes

134

Telegram Group/channel Message Scraper

bhansalisoft/telegram-group-channel-message-scraper

Telegram Group/channel Message Scraper- Scrap Telegram Groups/Channel message from chat history

bhansalisoft

448

1.0

Free TikTok Profile Scraper

scrapesmith/free-tiktok-profile-scraper

⚡ Free Fast Tiktok Profile Scraper – Extract all videos from any profiles with titles, views, likes, comments, publish dates, thumbnails & follower count. No proxies needed. Get thousands of profiles videos in minutes, 100% free.