Arab  chat avatar
Arab chat
Try for free

No credit card required

View all Actors
Arab  chat

Arab chat

encouraged_overtone-owner/arab-chat
Try for free

No credit card required

Massager app video calls and chat private

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-puppeteer-chrome:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY --chown=myuser package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY --chown=myuser . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node-puppeteer-chrome:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY --chown=myuser package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32    && npm install --omit=dev --omit=optional \
33    && echo "Installed NPM packages:" \
34    && (npm list --omit=dev --all || true) \
35    && echo "Node.js version:" \
36    && node --version \
37    && echo "NPM version:" \
38    && npm --version \
39    && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder --chown=myuser /home/myuser/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY --chown=myuser . ./
48
49
50# Run the image. If you know you won't need headful browsers,
51# you can remove the XVFB start script for a micro perf gain.
52CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "my-actor-1",
4    "title": "Project Puppeteer Crawler Typescript",
5    "description": "Crawlee and Puppeteer project in typescript.",
6    "version": "0.0",
7    "meta": {
8        "templateId": "ts-crawlee-puppeteer-chrome"
9    },
10    "input": "./input_schema.json",
11    "dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2    "title": "PlaywrightCrawler Template",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "startUrls": {
7            "title": "Start URLs",
8            "type": "array",
9            "description": "URLs to start with.",
10            "editor": "requestListSources",
11            "prefill": [
12                {
13                    "url": "https://apify.com"
14                }
15            ]
16        }
17    }
18}

src/main.ts

1// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/).
2import { Actor } from 'apify';
3// Web scraping and browser automation library (Read more at https://crawlee.dev)
4import { PuppeteerCrawler, Request } from 'crawlee';
5import { router } from './routes.js';
6
7// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init().
8await Actor.init();
9
10interface Input {
11    startUrls: Request[];
12}
13// Define the URLs to start the crawler with - get them from the input of the Actor or use a default list.
14const {
15    startUrls = ['https://crawlee.dev'],
16} = await Actor.getInput<Input>() ?? {};
17
18// Create a proxy configuration that will rotate proxies from Apify Proxy.
19const proxyConfiguration = await Actor.createProxyConfiguration();
20
21// Create a PuppeteerCrawler that will use the proxy configuration and and handle requests with the router from routes.js file.
22const crawler = new PuppeteerCrawler({
23    proxyConfiguration,
24    requestHandler: router,
25});
26
27// Run the crawler with the start URLs and wait for it to finish.
28await crawler.run(startUrls);
29
30// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit().
31await Actor.exit();

src/routes.ts

1import { Dataset, createPuppeteerRouter } from 'crawlee';
2
3export const router = createPuppeteerRouter();
4
5router.addDefaultHandler(async ({ enqueueLinks, log }) => {
6    log.info(`enqueueing new URLs`);
7    await enqueueLinks({
8        globs: ['https://apify.com/*'],
9        label: 'detail',
10    });
11});
12
13router.addHandler('detail', async ({ request, page, log }) => {
14    const title = await page.title();
15    log.info(`${title}`, { url: request.loadedUrl });
16
17    await Dataset.pushData({
18        url: request.loadedUrl,
19        title,
20    });
21});

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ]
20}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage

package.json

1{
2    "name": "crawlee-puppeteer-typescript",
3    "version": "0.0.1",
4    "type": "module",
5    "description": "This is an example of an Apify actor.",
6    "engines": {
7        "node": ">=18.0.0"
8    },
9    "dependencies": {
10        "apify": "^3.1.10",
11        "crawlee": "^3.5.4",
12        "puppeteer": "*"
13    },
14    "devDependencies": {
15        "@apify/eslint-config-ts": "^0.3.0",
16        "@apify/tsconfig": "^0.1.0",
17        "@typescript-eslint/eslint-plugin": "^6.7.2",
18        "@typescript-eslint/parser": "^6.7.2",
19        "eslint": "^8.50.0",
20        "tsx": "^4.6.2",
21        "typescript": "^5.3.3"
22    },
23    "scripts": {
24        "start": "npm run start:dev",
25        "start:prod": "node dist/main.js",
26        "start:dev": "tsx src/main.ts",
27        "build": "tsc",
28        "lint": "eslint ./src --ext .ts",
29        "lint:fix": "eslint ./src --ext .ts --fix",
30        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
31    },
32    "author": "It's not you it's me",
33    "license": "ISC"
34}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "module": "NodeNext",
5        "moduleResolution": "NodeNext",
6        "target": "ES2022",
7        "outDir": "dist",
8        "noUnusedLocals": false,
9        "skipLibCheck": true,
10        "lib": ["DOM"]
11    },
12    "include": [
13        "./src/**/*"
14    ]
15}
Developer
Maintained by Community
Actor metrics
  • 1 monthly user
  • 1 star
  • 100.0% runs succeeded
  • Created in Jul 2024
  • Modified about 2 months ago
Categories