Chrono24
Deprecated
Pricing
Pay per usage
Go to Store
Chrono24
Deprecated
This is a beta version, This actor is use to fetch the data of chrono24 watches select any particular category paste the url in input
0.0 (0)
Pricing
Pay per usage
1
Total users
32
Monthly users
1
Runs succeeded
0%
Last modified
a year ago
.actor/Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:18
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image.CMD npm start --silent
.actor/actor.json
{ "actorSpecification": 1, "name": "my-actor", "title": "Scrape single page in JavaScript", "description": "Scrape data from single page with provided URL.", "version": "0.0", "meta": { "templateId": "js-start" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
.actor/input_schema.json
{ "title": "Scrape data from a web page", "type": "object", "schemaVersion": 1, "properties": { "url": { "title": "URL of the page", "type": "string", "description": "The URL of website you want to get the data from.", "editor": "textfield", "prefill": "https://www.chrono24.in/rolex/index.htm" } }, "required": ["url"]}
src/main.js
1// Axios - Promise based HTTP client for the browser and node.js (Read more at https://axios-http.com/docs/intro).2import axios from 'axios';3// Cheerio - The fast, flexible & elegant library for parsing and manipulating HTML and XML (Read more at https://cheerio.js.org/).4import * as cheerio from 'cheerio';5// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/).6import { Actor } from 'apify';7// this is ESM project, and as such, it requires you to specify extensions in your relative imports8// read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions9// import { router } from './routes.js';10
11// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init().12await Actor.init();13
14// Structure of input is defined in input_schema.json15const input = await Actor.getInput();16const { url } = input;17
18// Fetch the HTML content of the page.19const response = await axios.get(url);20
21// Parse the downloaded HTML with Cheerio to enable data extraction.22const $ = cheerio.load(response.data);23
24 const okButton = $("button:contains('OK')");25 if (okButton.length > 0) {26 // Click the OK button if found27 console.log('Clicking OK button');28 // Perform the action you need (e.g., submit a form, close the modal)29 // For simplicity, here, we're just logging that the OK button would be clicked.30 }31
32
33// Extract all headings from the page (tag name and text).34const heading = [];35
36 const items = $(".article-item-container.wt-search-result.article-image-carousel");37 items.each((index, element) => {38 const obj = {};39 const id = $(element).find('a').attr('data-article-id');40 const link = $(element).find('a').attr('href');41 const titleElement = $(element).find('.text-sm.text-sm-md.text-bold.text-ellipsis').text().trim();42 const description = $(element).find('.text-sm.text-sm-md.text-ellipsis.m-b-2').text().trim();43 // Adjusted regex to extract the numeric part of the price44 const priceText = $(element).find('.d-flex.justify-content-between.align-items-end.m-b-1 .text-bold').text().trim();45 const priceMatch = priceText.match(/(\d+(?:,\d{3})*)/);46 const price = priceMatch ? parseInt(priceMatch[1].replace(/,/g, '')) : null;47 48 const location = $(element).find('.d-flex.justify-content-between.align-items-end.m-b-1 .text-sm.text-uppercase').text().trim();49 const currency = $(element).find('.d-flex.justify-content-between.align-items-end.m-b-1 .currency').text().trim();50 51 obj.id = id;52 obj.link = "https://www.chrono24.in/"+link;53 obj.titleElement = titleElement;54 obj.description = description;55 obj.price = price;56 obj.location = location;57 obj.currency = currency;58 heading.push(obj);59 });60 61 // Loop to extract additional data (ratings)62 for (let i = 0; i < heading.length; i++) {63 const EachProduct = {};64 try {65 const responseItem = await axios.get(heading[i].link);66 const $item = cheerio.load(responseItem.data);67 68 const okButtonItem = $item('button:contains("OK")');69 if (okButtonItem.length > 0) {70 console.log('OK button found for the second request. Clicking...');71 }72 73 const rating = $item('.m-b-2.d-flex.justify-content-between span.rating').text().trim();74 const maximumImageSize = [];75 const listOfImages = $item('div[data-zoom-image]');76 77 listOfImages.each((index, element) => {78 const imageSmall = $item(element).attr('data-zoom-image');79 maximumImageSize.push(imageSmall);80 });81 EachProduct.rating = rating;82 EachProduct.images = maximumImageSize;83 } catch (error) {84 console.error('Error:', error.message);85 EachProduct.rating = 'null';86 EachProduct.imageSmall = "not found";87 }88 heading[i].EachProduct = EachProduct; 89 }90
91// Save headings to Dataset - a table-like storage.92await Actor.pushData(heading);93
94// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit().95await Actor.exit();
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
.gitignore
# This file tells Git which files shouldn't be added to source control.DS_Store.ideadistnode_modulesapify_storagestorage/*!storage/key_value_storesstorage/key_value_stores/*!storage/key_value_stores/defaultstorage/key_value_stores/default/*!storage/key_value_stores/default/INPUT.json
package.json
{ "name": "js-scrape-single-page", "version": "0.0.1", "type": "module", "description": "This is an example of an Apify actor.", "engines": { "node": ">=18.0.0" }, "dependencies": { "apify": "^3.1.10", "axios": "^1.5.0", "cheerio": "^1.0.0-rc.12" }, "scripts": { "start": "node ./src/main.js", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}