Sanaa picture catch. avatar
Sanaa picture catch.

Pricing

Pay per usage

Go to Store
Sanaa picture catch.

Sanaa picture catch.

Developed by

Arab chat

Maintained by Community

0.0 (0)

Pricing

Pay per usage

0

Monthly users

1

Runs succeeded

>99%

Last modified

8 months ago

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32    && npm install --omit=dev --omit=optional \
33    && echo "Installed NPM packages:" \
34    && (npm list --omit=dev --all || true) \
35    && echo "Node.js version:" \
36    && node --version \
37    && echo "NPM version:" \
38    && npm --version \
39    && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent

.actor/actor.json

1{
2    "actorSpecification": 1,
3    "name": "my-actor-1",
4    "title": "Scrape single page in TypeScript",
5    "description": "Scrape data from single page with provided URL.",
6    "version": "0.0",
7    "meta": {
8        "templateId": "ts-start"
9    },
10    "input": "./input_schema.json",
11    "dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2    "title": "Scrape data from a web page",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "url": {
7        "title": "URL of the page",
8        "type": "string",
9        "description": "The URL of website you want to get the data from.",
10        "editor": "textfield",
11        "prefill": "https://www.apify.com"
12        }
13    },
14    "required": ["url"]
15}

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
14
15# dist folder
16dist

.gitignore

1storage
2apify_storage
3crawlee_storage
4node_modules
5dist
6tsconfig.tsbuildinfo
7storage/*
8!storage/key_value_stores
9storage/key_value_stores/*
10!storage/key_value_stores/default
11storage/key_value_stores/default/*
12!storage/key_value_stores/default/INPUT.json

package.json

1{
2    "name": "ts-start",
3    "version": "0.0.1",
4    "type": "module",
5    "description": "This is an example of an Apify actor.",
6    "engines": {
7        "node": ">=18.0.0"
8    },
9    "dependencies": {
10        "apify": "^3.1.10",
11        "axios": "^1.5.0",
12        "cheerio": "^1.0.0-rc.12"
13    },
14    "devDependencies": {
15        "@apify/tsconfig": "^0.1.0",
16        "tsx": "^4.6.2",
17        "typescript": "^5.3.3"
18    },
19    "scripts": {
20        "start": "npm run start:dev",
21        "start:prod": "node dist/main.js",
22        "start:dev": "tsx src/main.ts",
23        "build": "tsc",
24        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
25    },
26    "author": "It's not you it's me",
27    "license": "ISC"
28}

src/main.ts

1// Axios - Promise based HTTP client for the browser and node.js (Read more at https://axios-http.com/docs/intro).
2import axios from 'axios';
3// Cheerio - The fast, flexible & elegant library for parsing and manipulating HTML and XML (Read more at https://cheerio.js.org/).
4import * as cheerio from 'cheerio';
5// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/).
6import { Actor } from 'apify';
7
8// this is ESM project, and as such, it requires you to specify extensions in your relative imports
9// read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions
10// note that we need to use `.js` even when inside TS files
11// import { router } from './routes.js';
12
13// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init().
14await Actor.init();
15
16interface Input {
17    url: string;
18}
19// Structure of input is defined in input_schema.json
20const input = await Actor.getInput<Input>();
21if (!input) throw new Error("Input is missing!");
22const { url } = input;
23
24// Fetch the HTML content of the page.
25const response = await axios.get(url);
26
27// Parse the downloaded HTML with Cheerio to enable data extraction.
28const $ = cheerio.load(response.data);
29
30// Extract all headings from the page (tag name and text).
31const headings: { level: string, text: string }[] = [];
32$("h1, h2, h3, h4, h5, h6").each((_i, element) => {
33    const headingObject = {
34        level: $(element).prop("tagName").toLowerCase(),
35        text: $(element).text(),
36    };
37    console.log("Extracted heading", headingObject);
38    headings.push(headingObject);
39});
40
41// Save headings to Dataset - a table-like storage.
42await Actor.pushData(headings);
43
44// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit().
45await Actor.exit();

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "module": "NodeNext",
5        "moduleResolution": "NodeNext",
6        "target": "ES2022",
7        "outDir": "dist",
8        "noUnusedLocals": false,
9        "skipLibCheck": true,
10        "lib": ["DOM"]
11    },
12    "include": [
13        "./src/**/*"
14    ]
15}

Pricing

Pricing model

Pay per usage

This Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.