My Actor
Under maintenance
Pricing
Pay per usage
Go to Store
My Actor
Under maintenance
0.0 (0)
Pricing
Pay per usage
0
Monthly users
1
Runs succeeded
>99%
Last modified
a month ago
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20
5
6# Check preinstalled packages
7RUN npm ls crawlee apify puppeteer playwright
8
9# Copy just package.json and package-lock.json
10# to speed up the build using Docker layer cache.
11COPY package*.json ./
12
13# Install NPM packages, skip optional and development dependencies to
14# keep the image small. Avoid logging too much and print the dependency
15# tree for debugging
16RUN npm --quiet set progress=false \
17 && npm install --omit=dev --omit=optional \
18 && echo "Installed NPM packages:" \
19 && (npm list --omit=dev --all || true) \
20 && echo "Node.js version:" \
21 && node --version \
22 && echo "NPM version:" \
23 && npm --version \
24 && rm -r ~/.npm
25
26# Next, copy the remaining files and directories with the source code.
27# Since we do this after NPM install, quick build will be really fast
28# for most source file changes.
29COPY . ./
30
31# Create and run as a non-root user.
32RUN adduser -h /home/apify -D apify && \
33 chown -R apify:apify ./
34USER apify
35
36# Run the image.
37CMD npm start --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "my-actor",
4 "title": "Scrape single page in JavaScript",
5 "description": "Scrape data from single page with provided URL.",
6 "version": "0.0",
7 "meta": {
8 "templateId": "js-start"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile"
12}
.actor/input_schema.json
1{
2 "title": "Yelp Scraper Input",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "busqueda": {
7 "type": "string",
8 "title": "Profesión o Categoría",
9 "description": "Ejemplo: restaurantes, abogados, veterinarias.",
10 "editor": "textfield"
11 },
12 "lugar": {
13 "type": "string",
14 "title": "Ciudad o Ubicación",
15 "description": "Ejemplo: Bogotá, Medellín, Cali.",
16 "editor": "textfield"
17 }
18 },
19 "required": ["busqueda", "lugar"]
20}
src/main.js
1const Apify = require('apify');
2
3Apify.main(async () => {
4 // Obtener el input
5 const input = await Apify.getInput();
6 const { busqueda, lugar } = input;
7 if (!busqueda || !lugar) {
8 throw new Error('Debes proporcionar "busqueda" y "lugar".');
9 }
10
11 // Construir la URL de Yelp
12 // Formato: https://www.yelp.com/search?find_desc=<busqueda>&find_loc=<lugar>
13 const searchUrl = `https://www.yelp.com/search?find_desc=${encodeURIComponent(busqueda)}&find_loc=${encodeURIComponent(lugar)}`;
14 console.log(`Navegando a: ${searchUrl}`);
15
16 // Lanzar Puppeteer (modo visible para depuración; cambia headless a true en producción)
17 const browser = await Apify.launchPuppeteer({ headless: false });
18 const page = await browser.newPage();
19 await page.goto(searchUrl, { waitUntil: 'networkidle2' });
20
21 // Realizar un auto-scroll para cargar todos los resultados
22 await autoScroll(page);
23
24 // Espera adicional para que se carguen todos los resultados (ajusta el tiempo según sea necesario)
25 await page.waitForTimeout(30000);
26
27 // Tomar una captura para depuración
28 await page.screenshot({ path: 'debug_yelp.png' });
29 console.log('Screenshot guardado: debug_yelp.png');
30
31 // Extraer datos de Yelp; ajusta los selectores según la estructura actual
32 const results = await page.evaluate(() => {
33 const items = [];
34 // Yelp suele listar los resultados dentro de una lista <ul>, donde cada resultado es un <li>
35 const containers = document.querySelectorAll('ul.lemon--ul__373c0__1_cxs li');
36 containers.forEach(li => {
37 // Si el resultado contiene un enlace con clase que identifica el nombre del negocio
38 const nameElement = li.querySelector('a.css-166la90');
39 if (nameElement) {
40 const name = nameElement.innerText.trim();
41 const addressElement = li.querySelector('address');
42 const address = addressElement ? addressElement.innerText.trim() : 'No disponible';
43 // El rating puede estar en un div con clase similar a "i-stars__373c0__1T6rz"
44 const ratingElement = li.querySelector('div.i-stars__373c0__1T6rz');
45 const rating = ratingElement ? ratingElement.getAttribute('aria-label') : 'No disponible';
46 items.push({ name, address, rating });
47 }
48 });
49 return items;
50 });
51
52 console.log('Resultados extraídos:', results);
53 await browser.close();
54
55 // Si no se extrajeron resultados, empuja un objeto con el error exacto
56 if (results.length === 0) {
57 await Apify.pushData({ error: "No se extrajeron resultados. Verifica los selectores o la carga de la página." });
58 } else {
59 await Apify.pushData(results);
60 }
61});
62
63// Función para hacer scroll automáticamente
64async function autoScroll(page) {
65 await page.evaluate(async () => {
66 await new Promise((resolve) => {
67 let totalHeight = 0;
68 const distance = 100;
69 const timer = setInterval(() => {
70 const scrollHeight = document.body.scrollHeight;
71 window.scrollBy(0, distance);
72 totalHeight += distance;
73 if (totalHeight >= scrollHeight - window.innerHeight) {
74 clearInterval(timer);
75 resolve();
76 }
77 }, 200);
78 });
79 });
80}
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.gitignore
1# This file tells Git which files shouldn't be added to source control
2.DS_Store
3.idea
4dist
5node_modules
6apify_storage
7storage/*
8!storage/key_value_stores
9storage/key_value_stores/*
10!storage/key_value_stores/default
11storage/key_value_stores/default/*
12!storage/key_value_stores/default/INPUT.json
package.json
1{
2 "name": "yelp-scraper-actor",
3 "version": "1.0.0",
4 "description": "Actor de Apify para extraer resultados de Yelp usando Crawlee y Puppeteer",
5 "main": "main.js",
6 "scripts": {
7 "start": "node main.js"
8 },
9 "dependencies": {
10 "apify": "^3.3.2",
11 "crawlee": "^3.13.0",
12 "puppeteer": "^24.4.0"
13 },
14 "engines": {
15 "node": ">=16.0.0"
16 },
17 "license": "ISC"
18}
Pricing
Pricing model
Pay per usageThis Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.