Ingatlan.com Scraper avatar
Ingatlan.com Scraper

Deprecated

Pricing

Pay per usage

Go to Store
Ingatlan.com Scraper

Ingatlan.com Scraper

Deprecated

Developed by

Mark Varga

Mark Varga

Maintained by Community

Scraper for the Hungarian property market Ingatlan.com

0.0 (0)

Pricing

Pay per usage

1

Total users

35

Monthly users

1

Runs succeeded

>99%

Last modified

3 years ago

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"extends": "@apify"
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.idea
node_modules

Dockerfile

# First, specify the base Docker image. You can read more about
# the available images at https://sdk.apify.com/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:16
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --only=prod --no-optional --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Optionally, specify how to launch the source code of your actor.
# By default, Apify's base Docker images define the CMD instruction
# that runs the Node.js source code using the command specified
# in the "scripts.start" section of the package.json file.
# In short, the instruction looks something like this:
#
# CMD npm start

INPUT_SCHEMA.json

{
"title": "Input schema for the apify_project actor.",
"type": "object",
"schemaVersion": 1,
"properties": {
"url": {
"title": "URL to the list of results to scrape.",
"type": "string",
"description": "Use this to first filter the list to your liking.",
"editor": "textfield",
"default": "https://ingatlan.com/szukites/elado+haz+v-ker+u:K%C3%A1lvin_t%C3%A9r|35834+v-ker"
}
},
"required": ["url"]
}

apify.json

{
"env": { "npm_config_loglevel": "silent" }
}

main.js

1import Apify from 'apify';
2import { gotScraping } from 'got-scraping';
3import cheerio from 'cheerio';
4
5const input = await Apify.getInput();
6let home_url = input.url.split('?')[0] // Remove params
7const foreignAddresses = ["realestatehungary.hu", "immobilienungarn.net"];
8foreignAddresses.forEach( (_, foreignAddress) => {
9 home_url.replace(foreignAddress, 'ingatlan.com');
10})
11
12// Find number of pages
13const home = await gotScraping(home_url);
14const $home = cheerio.load(home.body);
15const pageNumberText = $home('.pagination__page-number').text().trim();
16let nPages = 1;
17if (pageNumberText !== '') {
18 const regex = /(\d*) oldal/gm;
19 const m = regex.exec(pageNumberText);
20 nPages = parseInt(m[1]);
21}
22
23// Generate page urls and add to queue
24const requestQueue = await Apify.openRequestQueue();
25for (let n = 1; n <= nPages; n++) {
26 await requestQueue.addRequest({
27 url: home_url + '?page=' + n.toString(),
28 userData: {
29 label: 'LISTPAGE'
30 }
31 });
32}
33
34const crawler = new Apify.CheerioCrawler({
35 requestQueue,
36 handlePageFunction: async ({ request, $ }) => {
37 if (request.userData.label == 'LISTPAGE') {
38 await Apify.utils.enqueueLinks({
39 $,
40 requestQueue,
41 selector: 'div a.listing__link.js-listing-active-area[href]',
42 baseUrl: 'https://ingatlan.com/'
43 })
44 return;
45 }
46 const parameterValues = $('div.parametersContainer div.parameterValues')
47 const data = {
48 address: $('h1.address').text().trim(),
49 price: parameterValues.eq(0).find('span').eq(0).text().trim(),
50 sqm: parameterValues.eq(1).text().trim(),
51 rooms: parameterValues.eq(2).text().trim(),
52 url: request.loadedUrl
53 }
54 Apify.pushData(data);
55 }
56})
57
58await crawler.run();

package.json

{
"name": "ingatlancom-scraper",
"version": "0.0.1",
"description": "A scraper for the hungarian property market Ingatlan.com",
"dependencies": {
"apify": "^2.0.7"
},
"devDependencies": {
"@apify/eslint-config": "^0.1.3",
"eslint": "^7.0.0"
},
"scripts": {
"start": "node main.js",
"lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
"lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC",
"type": "module"
}