1import Apify from 'apify';
2import { gotScraping } from 'got-scraping';
3import cheerio from 'cheerio';
4
5const input = await Apify.getInput();
6let home_url = input.url.split('?')[0]
7const foreignAddresses = ["realestatehungary.hu", "immobilienungarn.net"];
8foreignAddresses.forEach( (_, foreignAddress) => {
9 home_url.replace(foreignAddress, 'ingatlan.com');
10})
11
12
13const home = await gotScraping(home_url);
14const $home = cheerio.load(home.body);
15const pageNumberText = $home('.pagination__page-number').text().trim();
16let nPages = 1;
17if (pageNumberText !== '') {
18 const regex = /(\d*) oldal/gm;
19 const m = regex.exec(pageNumberText);
20 nPages = parseInt(m[1]);
21}
22
23
24const requestQueue = await Apify.openRequestQueue();
25for (let n = 1; n <= nPages; n++) {
26 await requestQueue.addRequest({
27 url: home_url + '?page=' + n.toString(),
28 userData: {
29 label: 'LISTPAGE'
30 }
31 });
32}
33
34const crawler = new Apify.CheerioCrawler({
35 requestQueue,
36 handlePageFunction: async ({ request, $ }) => {
37 if (request.userData.label == 'LISTPAGE') {
38 await Apify.utils.enqueueLinks({
39 $,
40 requestQueue,
41 selector: 'div a.listing__link.js-listing-active-area[href]',
42 baseUrl: 'https://ingatlan.com/'
43 })
44 return;
45 }
46 const parameterValues = $('div.parametersContainer div.parameterValues')
47 const data = {
48 address: $('h1.address').text().trim(),
49 price: parameterValues.eq(0).find('span').eq(0).text().trim(),
50 sqm: parameterValues.eq(1).text().trim(),
51 rooms: parameterValues.eq(2).text().trim(),
52 url: request.loadedUrl
53 }
54 Apify.pushData(data);
55 }
56})
57
58await crawler.run();