1
2import { Actor } from 'apify';
3
4import { CheerioCrawler, Dataset } from 'crawlee';
5
6
7
8
9
10await Actor.init();
11
12
13const {
14 startUrls = ['https://crawlee.dev'],
15 maxRequestsPerCrawl = 100,
16} = await Actor.getInput() ?? {};
17
18const proxyConfiguration = await Actor.createProxyConfiguration();
19
20const crawler = new CheerioCrawler({
21 proxyConfiguration,
22 maxRequestsPerCrawl,
23 async requestHandler({ request, $, log }) {
24
25 const title = $('title').text();
26 log.info(`${title}`, { url: request.loadedUrl });
27
28 $('a[href*="link.2gis"]').each((index, el) => {
29 const innerText = $(el).text();
30
31 if(innerText.includes('.') && !innerText.includes('hh')) {
32 Actor.pushData({site: innerText, title: title})
33 }
34 })
35 },
36});
37
38await crawler.run(startUrls);
39
40
41await Actor.exit();