Xplate avatar
Xplate

Pricing

Pay per usage

Go to Store
Xplate

Xplate

Developed by

DevFlexi

DevFlexi

Maintained by Community

0.0 (0)

Pricing

Pay per usage

0

Total users

2

Monthly users

2

Runs succeeded

>99%

Last modified

3 months ago

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:20 AS builder
# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install all dependencies. Don't audit to speed up the installation.
RUN npm install --include=dev --audit=false
# Next, copy the source files using the user set
# in the base image.
COPY . ./
# Install all dependencies and build the project.
# Don't audit to speed up the installation.
RUN npm run build
# Create final image
FROM apify/actor-node:20
# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Copy built JS files from builder image
COPY --from=builder /usr/src/app/dist ./dist
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Create and run as a non-root user.
RUN adduser -h /home/apify -D apify && \
chown -R apify:apify ./
USER apify
# Run the image.
CMD npm run start:prod --silent

.actor/actor.json

{
"actorSpecification": 1,
"name": "xplate-license-plate-scraper",
"title": "Xplate License Plate Scraper",
"description": "An Apify Actor that scrapes license plate data from Xplate using a provided page number input. It extracts images, prices, durations, and details (emirate, character, number) from the specified page.",
"version": "0.0",
"meta": {
"templateId": "ts-start"
},
"input": "./input_schema.json",
"dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
"title": "Scrape data from Xplate",
"type": "object",
"schemaVersion": 1,
"properties": {
"pageNumber": {
"title": "Page Number",
"type": "integer",
"description": "The page number to scrape from Xplate.",
"editor": "number",
"default": 1
}
},
"required": ["pageNumber"]
}

src/main.ts

1import axios from 'axios';
2import * as cheerio from 'cheerio';
3import { Actor } from 'apify';
4
5await Actor.init();
6
7interface Input {
8 pageNumber: number;
9}
10
11// Get input (expecting a JSON object like: { "pageNumber": 3 })
12const input = await Actor.getInput<Input>();
13if (!input) throw new Error("Input is missing! Please provide a JSON object with a 'pageNumber' property.");
14const pageNumber: number = input.pageNumber || 1;
15console.log(`Scraping page number: ${pageNumber}`);
16
17// Construct the URL using the provided page number.
18const targetUrl = `https://xplate.com/en/numbers/license-plates?page=${pageNumber}`;
19
20// Define your selectors configuration.
21const XPLATES_SELECTORS = {
22 SOURCE_NAME: 'xplate',
23 ERROR_MESSAGE_SELECTOR: 'div.alert.alert-warning.text-center.m-0.rounded-3',
24 ALL_PLATES: 'div[class="number-card"]',
25 PLATE_PRICE: 'span.custom-red.dm-white',
26 PLATE_DURATION: 'div.d-flex.align-items-center.meta > div > span',
27 PLATE_LINK: 'a[class="p-0 m-0 lower-part default-dark-btn px-1 text-center bordered dm-bordered"]',
28 URL: targetUrl,
29 SKIP_CONFIGURATION: {
30 CALL_FOR_PRICE: 'Call For Price',
31 FEATURED: 'featured',
32 CHARACTER_HAS_NOC: 'noc',
33 },
34};
35
36interface Plate {
37 image: string;
38 price: string;
39 duration: string;
40 emirate: string;
41 character: string;
42 number: string;
43 source: string;
44}
45
46// Fetch the HTML content of the page using axios.
47const response = await axios.get(targetUrl, {
48 headers: {
49 // Include cookies if required by the site.
50 'Cookie': 'XSRF-TOKEN=...; xplate_session=...'
51 }
52});
53const html = response.data;
54const $ = cheerio.load(html);
55
56// Check for an error message on the page.
57if ($(XPLATES_SELECTORS.ERROR_MESSAGE_SELECTOR).length) {
58 console.error('Error message found on the page, aborting.');
59 await Actor.exit();
60}
61
62// Select all plate elements (skipping the first element if necessary).
63const plateElements = Array.from($(XPLATES_SELECTORS.ALL_PLATES)).slice(1);
64console.log(`Found ${plateElements.length} plate elements.`);
65
66const plates: Plate[] = [];
67
68// Extract information for each plate element.
69for (const plateEl of plateElements) {
70 const plateElement = $(plateEl);
71 const imgSrc = plateElement.find('img').attr('data-src') || '';
72 const price = plateElement.find(XPLATES_SELECTORS.PLATE_PRICE).text().trim() || '';
73 const duration = plateElement.find(XPLATES_SELECTORS.PLATE_DURATION).text().trim() || '';
74 const url = plateElement.find(XPLATES_SELECTORS.PLATE_LINK).attr('href') || '';
75
76 // Use regex to extract additional details from the URL.
77 const emirateMatch = url.match(/\/(\d+)-(.+?)-code-/);
78 const characterMatch = url.match(/-code-(.+?)-plate-number-/);
79 const numberMatch = url.match(/plate-number-(\d+)/);
80 const emirate = emirateMatch ? emirateMatch[2] : '';
81 const character = characterMatch ? characterMatch[1] : '';
82 const number = numberMatch ? numberMatch[1] : '';
83
84 const newPlate: Plate = {
85 image: imgSrc,
86 price,
87 duration,
88 emirate,
89 character,
90 number,
91 source: XPLATES_SELECTORS.SOURCE_NAME,
92 };
93
94 plates.push(newPlate);
95}
96
97// Log the extracted plates.
98console.log(JSON.stringify(plates, null, 2));
99
100// Save the results to the default dataset.
101await Actor.pushData(plates);
102
103await Actor.exit();

.dockerignore

# configurations
.idea
.vscode
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git
# dist folder
dist

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"root": true,
"env": {
"browser": true,
"es2020": true,
"node": true
},
"extends": [
"@apify/eslint-config-ts"
],
"parserOptions": {
"project": "./tsconfig.json",
"ecmaVersion": 2020
},
"ignorePatterns": [
"node_modules",
"dist",
"**/*.d.ts"
]
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.idea
.vscode
storage
apify_storage
crawlee_storage
node_modules
dist
tsconfig.tsbuildinfo
storage/*
!storage/key_value_stores
storage/key_value_stores/*
!storage/key_value_stores/default
storage/key_value_stores/default/*
!storage/key_value_stores/default/INPUT.json

package.json

{
"name": "ts-start",
"version": "0.0.1",
"type": "module",
"description": "This is an example of an Apify actor.",
"engines": {
"node": ">=18.0.0"
},
"dependencies": {
"apify": "^3.2.6",
"axios": "^1.5.0",
"cheerio": "^1.0.0-rc.12"
},
"devDependencies": {
"@apify/eslint-config-ts": "^0.3.0",
"@apify/tsconfig": "^0.1.0",
"@typescript-eslint/eslint-plugin": "^7.18.0",
"@typescript-eslint/parser": "^7.18.0",
"eslint": "^8.50.0",
"tsx": "^4.6.2",
"typescript": "^5.3.3"
},
"scripts": {
"start": "npm run start:dev",
"start:prod": "node dist/main.js",
"start:dev": "tsx src/main.ts",
"build": "tsc",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}

tsconfig.json

{
"extends": "@apify/tsconfig",
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ES2022",
"outDir": "dist",
"noUnusedLocals": false,
"skipLibCheck": true,
"lib": ["DOM"]
},
"include": [
"./src/**/*"
]
}