Arab chat 1 avatar

Arab chat 1

Try for free

No credit card required

Go to Store
Arab chat 1

Arab chat 1

alhemaqani111/arab-chat-1
Try for free

No credit card required

Arab chat Free for all

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-playwright-chrome:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY --chown=myuser package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY --chown=myuser . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node-playwright-chrome:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY --chown=myuser package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32    && npm install --omit=dev --omit=optional \
33    && echo "Installed NPM packages:" \
34    && (npm list --omit=dev --all || true) \
35    && echo "Node.js version:" \
36    && node --version \
37    && echo "NPM version:" \
38    && npm --version \
39    && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder --chown=myuser /home/myuser/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY --chown=myuser . ./
48
49
50# Run the image. If you know you won't need headful browsers,
51# you can remove the XVFB start script for a micro perf gain.
52CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "my-actor",
4    "title": "Project Playwright Crawler Typescript",
5    "description": "Crawlee and Playwright project in typescript.",
6    "version": "0.0",
7    "meta": {
8        "templateId": "ts-crawlee-playwright-chrome"
9    },
10    "input": "./input_schema.json",
11    "dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2    "title": "PlaywrightCrawler Template",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "startUrls": {
7            "title": "Start URLs",
8            "type": "array",
9            "description": "URLs to start with.",
10            "editor": "requestListSources",
11            "prefill": [
12                {
13                    "url": "https://crawlee.dev"
14                }
15            ]
16        },
17        "maxRequestsPerCrawl": {
18            "title": "Max Requests per Crawl",
19            "type": "integer",
20            "description": "Maximum number of requests that can be made by this crawler.",
21            "default": 100
22        }
23    }
24}

src/main.ts

1/**
2 * This template is a production ready boilerplate for developing with `PlaywrightCrawler`.
3 * Use this to bootstrap your projects using the most up-to-date code.
4 * If you're looking for examples or want to learn more, see README.
5 */
6
7// For more information, see https://docs.apify.com/sdk/js
8import { Actor } from 'apify';
9// For more information, see https://crawlee.dev
10import { PlaywrightCrawler } from 'crawlee';
11// this is ESM project, and as such, it requires you to specify extensions in your relative imports
12// read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions
13// note that we need to use `.js` even when inside TS files
14import { router } from './routes.js';
15
16interface Input {
17    startUrls: string[];
18    maxRequestsPerCrawl: number;
19}
20
21// Initialize the Apify SDK
22await Actor.init();
23
24// Structure of input is defined in input_schema.json
25const {
26    startUrls = ['https://crawlee.dev'],
27    maxRequestsPerCrawl = 100,
28} = await Actor.getInput<Input>() ?? {} as Input;
29
30const proxyConfiguration = await Actor.createProxyConfiguration();
31
32const crawler = new PlaywrightCrawler({
33    proxyConfiguration,
34    maxRequestsPerCrawl,
35    requestHandler: router,
36});
37
38await crawler.run(startUrls);
39
40// Exit successfully
41await Actor.exit();

src/routes.ts

1import { Dataset, createPlaywrightRouter } from 'crawlee';
2
3export const router = createPlaywrightRouter();
4
5router.addDefaultHandler(async ({ enqueueLinks, log }) => {
6    log.info(`enqueueing new URLs`);
7    await enqueueLinks({
8        globs: ['https://apify.com/*'],
9        label: 'detail',
10    });
11});
12
13router.addHandler('detail', async ({ request, page, log }) => {
14    const title = await page.title();
15    log.info(`${title}`, { url: request.loadedUrl });
16
17    await Dataset.pushData({
18        url: request.loadedUrl,
19        title,
20    });
21});

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ]
20}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage

package.json

1{
2    "name": "crawlee-playwright-typescript",
3    "version": "0.0.1",
4    "type": "module",
5    "description": "This is an example of an Apify actor.",
6    "engines": {
7        "node": ">=18.0.0"
8    },
9    "dependencies": {
10        "apify": "^3.1.10",
11        "crawlee": "^3.5.4",
12        "playwright": "*"
13    },
14    "devDependencies": {
15        "@apify/eslint-config-ts": "^0.3.0",
16        "@apify/tsconfig": "^0.1.0",
17        "@typescript-eslint/eslint-plugin": "^6.7.2",
18        "@typescript-eslint/parser": "^6.7.2",
19        "eslint": "^8.50.0",
20        "tsx": "^4.6.2",
21        "typescript": "^5.3.3"
22    },
23    "scripts": {
24        "start": "npm run start:dev",
25        "start:prod": "node dist/main.js",
26        "start:dev": "tsx src/main.ts",
27        "build": "tsc",
28        "lint": "eslint ./src --ext .ts",
29        "lint:fix": "eslint ./src --ext .ts --fix",
30        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1",
31        "postinstall": "npx crawlee install-playwright-browsers"
32    },
33    "author": "It's not you it's me",
34    "license": "ISC"
35}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "module": "NodeNext",
5        "moduleResolution": "NodeNext",
6        "target": "ES2022",
7        "outDir": "dist",
8        "noUnusedLocals": false,
9        "skipLibCheck": true,
10        "lib": ["DOM"]
11    },
12    "include": [
13        "./src/**/*"
14    ]
15}
Developer
Maintained by Community

Actor Metrics

  • 1 monthly user

  • 0 No stars yet

  • >99% runs succeeded

  • Created in Jul 2024

  • Modified 4 months ago

Categories