1
2import { Actor } from 'apify';
3import _ from 'lodash';
4
5import { CheerioCrawler } from 'crawlee';
6
7
8
9
10
11await Actor.init();
12
13
14const {
15 startUrls = ['https://crawlee.dev'],
16 maxRequestsPerCrawl = 100,
17} = await Actor.getInput() ?? {};
18
19const proxyConfiguration = await Actor.createProxyConfiguration();
20
21const crawler = new CheerioCrawler({
22 proxyConfiguration,
23 maxRequestsPerCrawl,
24 async requestHandler({ enqueueLinks, request, $, log }) {
25
26
27
28
29 const title = $('title').text();
30 log.info(`${title}`, { url: request.loadedUrl });
31
32 const telegramLinks = $('a[href*="t.me"]');
33 const vkLinks = $('a[href*="vk"]');
34 const waLinks = $('a[href*="wa.me"], a[href*="api.whatsapp"]');
35
36 const data = [];
37
38 [telegramLinks, vkLinks, waLinks].forEach((links) => {
39 links.each((idx, link) => {
40 data.push({contact: $(link).attr('href')})
41 })
42 })
43
44 const uniqContacts = _.uniqBy(data, 'contact');
45
46 Actor.pushData({url: request.loadedUrl, contacts: uniqContacts})
47 },
48});
49
50await crawler.run(startUrls);
51
52
53await Actor.exit();