# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-playwright-chrome:16

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY --chown=myuser package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Node.js version:" \
    && node --version \
    && echo "NPM version:" \
    && npm --version \
    && rm -r ~/.npm

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY --chown=myuser . ./


# Run the image. If you know you won't need headful browsers,
# you can remove the XVFB start script for a micro perf gain.
CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "my-actor",
    "title": "Project Playwright Crawler JavaScript",
    "description": "Crawlee and Playwright project in JavaScript.",
    "version": "0.0",
    "meta": {
        "templateId": "js-crawlee-playwright-chrome"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile",
    "storages": {
        "dataset": {
            "actorSpecification": 1,
            "views": {
                "overview": {
                    "title": "Result",
                    "transformation": {
                        "fields": [
                            "containing_thread",
                            "reply_threads"
                        ]
                    },
                    "display": {
                        "component": "table",
                        "properties": {
                            "containing_thread": {
                                "label": "containing_thread",
                                "format": "array"
                            },
                            "reply_threads": {
                                "label": "containing_thread",
                                "format": "array"
                            }
                        }
                    }
                }
            }
        }
    }
}

.actor/input_schema.json

{
    "title": "PlaywrightCrawler Template",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "target": {
            "title": "target Threads URLs",
            "type": "string",
            "description": "URLs to crawl",
            "editor": "textfield",
            "default": "https://www.threads.net/t/CuZsgfWLyiI"
        }
    }
}

src/main.js

1/**
2 * This template is a production ready boilerplate for developing with `PlaywrightCrawler`.
3 * Use this to bootstrap your projects using the most up-to-date code.
4 * If you're looking for examples or want to learn more, see README.
5 */
6
7// For more information, see https://docs.apify.com/sdk/js
8import { Actor } from 'apify';
9// For more information, see https://crawlee.dev
10import { PlaywrightCrawler, RequestQueue } from 'crawlee';
11
12// Initialize the Apify SDK
13await Actor.init();
14const input = await Actor.getInput();
15console.log(111, JSON.stringify(input))
16
17const requestQueue = await RequestQueue.open();
18await requestQueue.addRequest({ url: input.target || '' });
19console.log(222)
20
21const proxyConfiguration = await Actor.createProxyConfiguration();
22const crawler = new PlaywrightCrawler({
23    proxyConfiguration,
24    requestQueue,
25    async requestHandler({ request, page, log }) {
26        log.info(`Processing ${request.url}...`);
27        const title = await page.title();
28        log.info(`${title}`, { url: request.loadedUrl });
29        page.on('response', async req => {
30            console.log(req.url())
31            if (req.url() === 'https://www.threads.net/api/graphql') {
32                const data = await req.json()
33                log.info(JSON.stringify())
34                await Actor.pushData(data.data.data)
35                console.log('success, waiting to exit')
36                await Actor.exit();
37            }
38        });
39        await page.waitForTimeout(6000)
40    },
41});
42
43await crawler.run();
44
45// Exit successfully
46await Actor.exit();

src/routes.js

1import { Dataset, createPlaywrightRouter } from 'crawlee';
2
3export const router = createPlaywrightRouter();
4
5router.addDefaultHandler(async ({ enqueueLinks, log }) => {
6    log.info(`enqueueing new URLs`);
7    await enqueueLinks({
8        globs: ['https://apify.com/*'],
9        label: 'detail',
10    });
11});
12
13router.addHandler('detail', async ({ request, page, log }) => {
14    const title = await page.title();
15    log.info(`${title}`, { url: request.loadedUrl });
16
17    await Dataset.pushData({
18        url: request.loadedUrl,
19        title,
20    });
21});

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
node_modules

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
    "extends": "@apify",
    "root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control

.DS_Store
.idea
dist
node_modules
apify_storage
storage

package.json

{
    "name": "crawlee-playwright-javascript",
    "version": "0.0.1",
    "type": "module",
    "description": "This is an example of an Apify actor.",
    "dependencies": {
        "apify": "^3.0.0",
        "crawlee": "^3.0.0",
        "playwright": "*"
    },
    "devDependencies": {
        "@apify/eslint-config": "^0.3.1",
        "eslint": "^8.36.0"
    },
    "scripts": {
        "start": "node src/main.js",
        "lint": "eslint ./src --ext .js,.jsx",
        "lint:fix": "eslint ./src --ext .js,.jsx --fix",
        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
    },
    "author": "It's not you it's me",
    "license": "ISC"
}

Meta threads scraper

curious_coder/threads-scraper

Scrape threads or posts from meta or instagram's new social media website "threads.net"

Curious Coder

823

5.0

(1)

Meta Threads Profile Scraper

apify/threads-profile-api-scraper

Scrape Threads profile info. Add one or more Threads usernames and extract number of followers, bio, profile details, URLs, picture URL, full name, username and more. Download structured data in JSON, CSV, XML, Excel, and HTML to use in applications and reports.

Apify

735

4.6

(5)

Meta Threads Profile Scraper

powerful_bachelor/meta-threads-profile-scraper

Gather profile data from Threads by entering one or more usernames. Extract details such as follower count, bio, profile info, links, profile picture URL, full name, username, and more. Download the structured data in JSON, CSV, XML, Excel, or HTML for seamless use in apps and reports.

Powerful Bachelor

Meta Threads Scraper - Post & Profile

thenetaji/threads-scraper

Extract Threads profiles, posts & engagement metrics easily. Lighting fast with super cheap cost. Try now!!

The Netaji

131

5.0

(1)

Meta Threads Scraper - Posts, Search & Profiles

futurizerush/meta-threads-scraper

Extract Threads posts & profiles without login. Get bio, followers, mentions. Search trending/recent content. Perfect for social monitoring & influencer discovery. CSV/JSON export.

Futurize Rush

Meta Threads Profile Scraper

futurizerush/meta-threads-profile-scraper

Meta Threads Profile Scraper

Futurize Rush

Threads 爬蟲 (貼文、關鍵字搜尋、個人檔案)

futurizerush/meta-threads-scraper-zh-tw

輕鬆擷取 Threads 資料，免登入。抓取貼文、關鍵字搜尋、個人檔案。輸出 CSV / JSON，可接 Make.com、n8n，直接送至 Google Sheets、CRM、Slack、Email。適用社群監測、KOL 搜尋、競品分析、自動化行銷。

Futurize Rush

Ynet.co.il Scraper

lexis-solutions/ynet

Scrape news content from ynet.co.il to gather headlines, summaries, and metadata. Ideal for news aggregation, market analysis, and tracking real-time trends. Fast, structured, and customizable extraction from an Israel-based source.

Lexis Solutions

5.0

(3)

Threads Scraper

red.cars/threads-scraper

Threads Scraper is a powerful Apify actor that extracts public profile and post data from Meta's Threads platform. Get comprehensive user profiles, posts, and engagement metrics without authentication or API keys.

AutomateLab

105

Threads User Posts Scraper

apibox/threads-user-posts-scraper

Extract User Posts from Threads, including captions, media, tags, mentions, and metrics such as likes, comments, and quotes. Easily collect and analyze user-generated content, and download results in formats like JSON, CSV, or Excel.

ApiBox

410

1.0

(1)

Threads User Profile Scraper

apibox/threads-user-profile-scraper

Extract User Profile from Threads, including id, username, bio, avatar, follower count and verified. Easily collect and analyze user-generated content, and download results in formats like JSON, CSV, or Excel.

ApiBox

111

Meta Threads Scraper

Meta Threads Scraper

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/main.js

src/routes.js

.dockerignore

.editorconfig

.eslintrc

.gitignore

package.json

You might also like

Meta threads scraper

Meta Threads Profile Scraper

Meta Threads Profile Scraper

Meta Threads Scraper - Post & Profile

Meta Threads Scraper - Posts, Search & Profiles

Meta Threads Profile Scraper

Threads 爬蟲 (貼文、關鍵字搜尋、個人檔案)

Ynet.co.il Scraper

Threads Scraper

Threads User Posts Scraper

Threads User Profile Scraper

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/main.js

src/routes.js

.dockerignore

.editorconfig

.eslintrc

.gitignore

package.json