Startup Jobs Scraper avatar

Startup Jobs Scraper

Try for free

No credit card required

Go to Store
Startup Jobs Scraper

Startup Jobs Scraper

martin1080p/startup-jobs-scraper
Try for free

No credit card required

StartupJobs.cz scraper extracts job listings from the site, gathering job titles, company names, locations, and descriptions. It automates data collection, providing up-to-date job information efficiently.

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
14
15# dist folder
16dist

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ]
20}

.gitignore

1storage
2apify_storage
3crawlee_storage
4node_modules
5dist
6tsconfig.tsbuildinfo
7storage/*
8!storage/key_value_stores
9storage/key_value_stores/*
10!storage/key_value_stores/default
11storage/key_value_stores/default/*
12!storage/key_value_stores/default/INPUT.json
13
14# Added by Apify CLI
15.venv

jest.config.js

1/** @type {import('ts-jest').JestConfigWithTsJest} */
2export default {
3    preset: 'ts-jest',
4    testEnvironment: 'node',
5    moduleFileExtensions: ['ts', 'js'],
6    globals: {
7        'ts-jest': {
8            tsconfig: 'tsconfig.json',
9        },
10    },
11    moduleNameMapper: {
12        '^(\\.\\.?\\/.+)\\.js$': '$1',
13    },
14};

package.json

1{
2	"name": "startup-jobs-scraper",
3	"version": "0.1.0",
4	"type": "module",
5	"description": "This is an example of an Apify actor.",
6	"engines": {
7		"node": ">=18.0.0"
8	},
9	"dependencies": {
10		"apify": "^3.1.10",
11		"crawlee": "^3.10.5"
12	},
13	"devDependencies": {
14		"@apify/eslint-config-ts": "^0.4.1",
15		"@apify/tsconfig": "^0.1.0",
16		"@stylistic/eslint-plugin-js": "^2.3.0",
17		"@types/jest": "^29.5.12",
18		"@typescript-eslint/eslint-plugin": "^7.14.1",
19		"@typescript-eslint/parser": "^7.14.1",
20		"eslint": "^8.57.0",
21		"jest": "^29.7.0",
22		"ts-jest": "^29.1.5",
23		"tsx": "^4.6.2",
24		"typescript": "^5.3.3"
25	},
26	"scripts": {
27		"start": "npm run start:dev",
28		"start:prod": "node dist/main.js",
29		"start:dev": "tsx src/main.ts",
30		"build": "tsc",
31		"lint": "eslint ./src --ext .ts",
32		"lint:fix": "eslint ./src --ext .ts --fix",
33		"test": "jest"
34	},
35	"author": "Martin Fanta",
36	"license": "MIT"
37}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "esModuleInterop": true,
5        "module": "NodeNext",
6        "moduleResolution": "NodeNext",
7        "target": "ES2022",
8        "outDir": "dist",
9        "noUnusedLocals": false,
10        "skipLibCheck": true,
11        "lib": ["DOM"],
12    },
13    "include": [
14        "./src/**/*",
15    ]
16}

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "startup-jobs-scraper",
4	"title": "StartupJobs Scraper",
5	"description": "StartupJobs.cz scraper extracts job listings from the site, gathering job titles, company names, locations, and descriptions. It automates data collection, providing up-to-date job information efficiently.",
6	"version": "0.1",
7	"meta": {
8		"templateId": "ts-start"
9	},
10	"input": "./input_schema.json",
11	"dockerfile": "./Dockerfile",
12	"storages": {
13        "dataset": {
14            "actorSpecification": 1,
15            "fields": {},
16            "views": {
17                "overview": {
18                    "title": "Overview",
19                    "transformation": {
20						"fields": [
21							"companyLogo",
22							"name",
23							"description",
24							"mainArea",
25							"company",
26							"areas",
27							"url"
28						]
29					},
30                    "display": {
31						"component": "table",
32						"properties": {
33							"companyLogo": {
34								"label": "Company Logo",
35								"format": "image"
36							},
37							"name": {
38								"label": "Name",
39								"format": "text"
40							},
41							"description": {
42								"label": "Description",
43								"format": "text"
44							},
45							"mainArea": {
46								"label": "Main Area",
47								"format": "text"
48							},
49							"company": {
50								"label": "Company",
51								"format": "text"
52							},
53							"areas": {
54								"label": "Areas",
55								"format": "array"
56							},
57							"url": {
58								"label": "Link",
59								"format": "link"
60							}
61						}
62					}
63                }
64            }
65        }
66    }
67}

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32    && npm install --omit=dev --omit=optional \
33    && echo "Installed NPM packages:" \
34    && (npm list --omit=dev --all || true) \
35    && echo "Node.js version:" \
36    && node --version \
37    && echo "NPM version:" \
38    && npm --version \
39    && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent

.actor/input_schema.json

1{
2    "title": "Startup Jobs Scraper",
3    "type": "object",
4    "schemaVersion": 1,
5    "required": [
6        "numAds",
7        "workplaceRemote",
8        "workplaceHybrid",
9        "workplaceOnsite",
10        "locationPrague",
11        "locationBrno",
12        "locationOstrava",
13        "locationCzechia",
14        "locationSanFrancisco",
15        "locationNewYork",
16        "locationLondon",
17        "seniorityJunior",
18        "seniorityMedior",
19        "senioritySenior",
20        "typeOfEmploymentFullTime",
21        "typeOfEmploymentPartTime",
22        "typeOfCooperationFreelance",
23        "typeOfCooperationInternship",
24        "typeOfCooperationContract",
25        "areasDevelopmentBackEnd",
26        "areasDevelopmentFrontEnd",
27        "areasDevelopmentDatabase",
28        "areasDevelopmentDevOps",
29        "areasDevelopmentTesting",
30        "areasDevelopmentMobile",
31        "areasDevelopmentHardware",
32        "areasDevelopmentSecurity",
33        "areasDevelopmentSoftwareArchitect",
34        "areasDevelopmentProjectManagement",
35        "areasDevelopmentProductOwner",
36        "areasDevelopmentScrumMaster",
37        "areasDevelopmentDesign",
38        "areasDevelopment3dPrinting",
39        "areasDevelopmentGameDev",
40        "areasDevelopmentAI",
41        "areasDevelopmentMachineLearning",
42        "areasDevelopmentVR",
43        "areasDevelopmentIoT",
44        "areasDevelopmentBlockchain",
45        "areasDevelopmentNoCode",
46        "areasDevelopmentTechWriter",
47        "areasDevelopmentEmbeddedSystems",
48        "areasMarketingMarketing",
49        "areasMarketingSales",
50        "areasMarketingCreativity",
51        "areasAnalyticsBI",
52        "areasAnalyticsDataAnalysis",
53        "areasAnalyticsITAnalysis",
54        "areasAnalyticsWebAnalysis",
55        "areasAnalyticsMarketingAnalysis",
56        "areasManagementAccount",
57        "areasManagementBusinessDevelopment",
58        "areasManagementManagement",
59        "areasManagementMarketing",
60        "areasManagementSales",
61        "areasManagementScrumMaster",
62        "areasManagementOffice",
63        "areasManagementProductOwner",
64        "areasManagementProjectManagement",
65        "areasManagementOperations",
66        "areasManagementTopManagement",
67        "areasManagementTeamLead",
68        "areasManagementTech",
69        "areaAdministrationAdministration",
70        "areaAdministrationHR",
71        "areaAdministrationLecturer",
72        "areaAdministrationRest",
73        "salaryDefined",
74        "salaryCurrency",
75        "salaryType",
76        "salaryMin"
77    ],
78    "properties": {
79        "numAds": {
80            "title": "Number of jobs",
81            "description": "Total number of jobs to be fetched.",
82            "type": "integer",
83            "editor": "number",
84            "default": 40,
85            "prefill": 40,
86            "minimum": 20
87        },
88        "workplaceRemote": {
89            "title": "Remote",
90            "type": "boolean",
91            "description": "Remote work arrangement",
92            "default": true,
93            "groupCaption": "Work Arrangement",
94            "groupDescription": "Select desired work arrangements of the job."
95        },
96        "workplaceHybrid": {
97            "title": "Hybrid",
98            "type": "boolean",
99            "description": "Hybrid work arrangement",
100            "default": true
101        },
102        "workplaceOnsite": {
103            "title": "On-site",
104            "type": "boolean",
105            "description": "On-site work arrangement",
106            "default": true
107        },
108        "locationPrague": {
109            "title": "Prague",
110            "type": "boolean",
111            "description": "Job in Prague",
112            "default": true,
113            "groupCaption": "Location",
114            "groupDescription": "Select desired location of the job."
115        },
116        "locationBrno": {
117            "title": "Brno",
118            "type": "boolean",
119            "description": "Job in Brno",
120            "default": true
121        },
122        "locationOstrava": {
123            "title": "Ostrava",
124            "type": "boolean",
125            "description": "Job in Ostrava",
126            "default": true
127        },
128        "locationCzechia": {
129            "title": "Czechia",
130            "type": "boolean",
131            "description": "Job in Czechia",
132            "default": true
133        },
134        "locationSanFrancisco": {
135            "title": "San Francisco",
136            "type": "boolean",
137            "description": "Job in San Francisco",
138            "default": false
139        },
140        "locationNewYork": {
141            "title": "New York",
142            "type": "boolean",
143            "description": "Job in New York",
144            "default": false
145        },
146        "locationLondon": {
147            "title": "London",
148            "type": "boolean",
149            "description": "Job in London",
150            "default": false
151        },
152        "seniorityJunior": {
153            "title": "Junior",
154            "type": "boolean",
155            "description": "Junior position",
156            "default": true,
157            "groupCaption": "Seniority",
158            "groupDescription": "Select desired seniority of the job."
159        },
160        "seniorityMedior": {
161            "title": "Medior",
162            "type": "boolean",
163            "description": "Medior position",
164            "default": true
165        },
166        "senioritySenior": {
167            "title": "Senior",
168            "type": "boolean",
169            "description": "Senior position",
170            "default": true
171        },
172        "typeOfEmploymentFullTime": {
173            "title": "Full-time",
174            "type": "boolean",
175            "description": "Full-time job",
176            "default": true,
177            "groupCaption": "Type of Employment",
178            "groupDescription": "Select desired type of employment."
179        },
180        "typeOfEmploymentPartTime": {
181            "title": "Part-time",
182            "type": "boolean",
183            "description": "Part-time job",
184            "default": false
185        },
186        "typeOfCooperationFreelance": {
187            "title": "Freelance",
188            "type": "boolean",
189            "description": "Freelance job",
190            "default": true
191        },
192        "typeOfCooperationInternship": {
193            "title": "Internship",
194            "type": "boolean",
195            "description": "Internship",
196            "default": true
197        },
198        "typeOfCooperationContract": {
199            "title": "Contract",
200            "type": "boolean",
201            "description": "Contract job",
202            "default": true
203        },
204        "areasDevelopmentBackEnd": {
205            "sectionCaption": "Job areas",
206            "sectionDescription": "Select desired areas.",
207            "title": "Back-end Development",
208            "type": "boolean",
209            "description": "Back-end development",
210            "default": true,
211            "groupCaption": "Development",
212            "groupDescription": "Select desired areas of development."
213        },
214        "areasDevelopmentFrontEnd": {
215            "title": "Front-end Development",
216            "type": "boolean",
217            "description": "Front-end development",
218            "default": true
219        },
220        "areasDevelopmentDatabase": {
221            "title": "Database Development",
222            "type": "boolean",
223            "description": "Database development",
224            "default": true
225        },
226        "areasDevelopmentDevOps": {
227            "title": "DevOps",
228            "type": "boolean",
229            "description": "DevOps",
230            "default": false
231        },
232        "areasDevelopmentTesting": {
233            "title": "Testing",
234            "type": "boolean",
235            "description": "Testing",
236            "default": false
237        },
238        "areasDevelopmentMobile": {
239            "title": "Mobile Development",
240            "type": "boolean",
241            "description": "Mobile development",
242            "default": false
243        },
244        "areasDevelopmentHardware": {
245            "title": "Hardware Development",
246            "type": "boolean",
247            "description": "Hardware development",
248            "default": false
249        },
250        "areasDevelopmentSecurity": {
251            "title": "IT Security",
252            "type": "boolean",
253            "description": "Security",
254            "default": false
255        },
256        "areasDevelopmentSoftwareArchitect": {
257            "title": "Software Architect",
258            "type": "boolean",
259            "description": "Software architect",
260            "default": false
261        },
262        "areasDevelopmentItConsulting": {
263            "title": "IT Consulting",
264            "type": "boolean",
265            "description": "IT consulting",
266            "default": false
267        },
268        "areasDevelopmentProjectManagement": {
269            "title": "Project Management",
270            "type": "boolean",
271            "description": "Project management",
272            "default": false
273        },
274        "areasDevelopmentProductOwner": {
275            "title": "Product Owner",
276            "type": "boolean",
277            "description": "Product owner",
278            "default": false
279        },
280        "areasDevelopmentScrumMaster": {
281            "title": "Scrum Master",
282            "type": "boolean",
283            "description": "Scrum master",
284            "default": false
285        },
286        "areasDevelopmentDesign": {
287            "title": "Design",
288            "type": "boolean",
289            "description": "Design",
290            "default": false
291        },
292        "areasDevelopment3dPrinting": {
293            "title": "3D Printing",
294            "type": "boolean",
295            "description": "3D printing",
296            "default": false
297        },
298        "areasDevelopmentGameDev": {
299            "title": "Game Dev",
300            "type": "boolean",
301            "description": "Game dev",
302            "default": false
303        },
304        "areasDevelopmentAI": {
305            "title": "AI",
306            "type": "boolean",
307            "description": "AI",
308            "default": false
309        },
310        "areasDevelopmentMachineLearning": {
311            "title": "Machine Learning",
312            "type": "boolean",
313            "description": "Machine learning",
314            "default": false
315        },
316        "areasDevelopmentVR": {
317            "title": "VR / AR",
318            "type": "boolean",
319            "description": "VR / AR",
320            "default": false
321        },
322        "areasDevelopmentIoT": {
323            "title": "Internet of Things",
324            "type": "boolean",
325            "description": "IoT",
326            "default": false
327        },
328        "areasDevelopmentBlockchain": {
329            "title": "Blockchain",
330            "type": "boolean",
331            "description": "Blockchain",
332            "default": false
333        },
334        "areasDevelopmentNoCode": {
335            "title": "No Code",
336            "type": "boolean",
337            "description": "No code",
338            "default": false
339        },
340        "areasDevelopmentTechWriter": {
341            "title": "Tech Writer",
342            "type": "boolean",
343            "description": "Tech writer",
344            "default": false
345        },
346        "areasDevelopmentEmbeddedSystems": {
347            "title": "Embedded Systems",
348            "type": "boolean",
349            "description": "Embedded systems",
350            "default": false
351        },
352        "areasMarketingMarketing": {
353            "title": "Marketing",
354            "type": "boolean",
355            "description": "Marketing",
356            "default": false,
357            "groupCaption": "Marketing",
358            "groupDescription": "Select desired areas of marketing."
359        },
360        "areasMarketingSales": {
361            "title": "Sales",
362            "type": "boolean",
363            "description": "Sales",
364            "default": false
365        },
366        "areasMarketingCreativity": {
367            "title": "Creativity",
368            "type": "boolean",
369            "description": "Creativity",
370            "default": false
371        },
372        "areasAnalyticsBI": {
373            "title": "Business Intelligence",
374            "type": "boolean",
375            "description": "BI / Business Analytics",
376            "default": false,
377            "groupCaption": "Analytics",
378            "groupDescription": "Select desired areas of analytics."
379        },
380        "areasAnalyticsDataAnalysis": {
381            "title": "Data Analysis",
382            "type": "boolean",
383            "description": "Data analysis",
384            "default": false
385        },
386        "areasAnalyticsITAnalysis": {
387            "title": "IT Analysis",
388            "type": "boolean",
389            "description": "IT analysis",
390            "default": false
391        },
392        "areasAnalyticsWebAnalysis": {
393            "title": "Web Analysis",
394            "type": "boolean",
395            "description": "Web analysis",
396            "default": false
397        },
398        "areasAnalyticsMarketingAnalysis": {
399            "title": "Marketing Analysis",
400            "type": "boolean",
401            "description": "Marketing analysis",
402            "default": false
403        },
404        "areasManagementAccount": {
405            "title": "Account Management",
406            "type": "boolean",
407            "description": "Account management",
408            "default": false,
409            "groupCaption": "Management",
410            "groupDescription": "Select desired areas of management."
411        },
412        "areasManagementBusinessDevelopment": {
413            "title": "Business Development",
414            "type": "boolean",
415            "description": "Business development",
416            "default": false
417        },
418        "areasManagementManagement": {
419            "title": "Management",
420            "type": "boolean",
421            "description": "Management",
422            "default": false
423        },
424        "areasManagementMarketing": {
425            "title": "Marketing",
426            "type": "boolean",
427            "description": "Marketing",
428            "default": false
429        },
430        "areasManagementSales": {
431            "title": "Sales",
432            "type": "boolean",
433            "description": "Sales",
434            "default": false
435        },
436        "areasManagementScrumMaster": {
437            "title": "Scrum Master",
438            "type": "boolean",
439            "description": "Scrum master",
440            "default": false
441        },
442        "areasManagementOffice": {
443            "title": "Office Management",
444            "type": "boolean",
445            "description": "Office management",
446            "default": false
447        },
448        "areasManagementProductOwner": {
449            "title": "Product Owner",
450            "type": "boolean",
451            "description": "Product owner",
452            "default": false
453        },
454        "areasManagementProjectManagement": {
455            "title": "Project Management",
456            "type": "boolean",
457            "description": "Project management",
458            "default": false
459        },
460        "areasManagementOperations": {
461            "title": "Operations",
462            "type": "boolean",
463            "description": "Operations",
464            "default": false
465        },
466        "areasManagementTopManagement": {
467            "title": "Top Management",
468            "type": "boolean",
469            "description": "Top management (C-level)",
470            "default": false
471        },
472        "areasManagementTeamLead": {
473            "title": "Team Lead",
474            "type": "boolean",
475            "description": "Team lead",
476            "default": false
477        },
478        "areasManagementTech": {
479            "title": "Tech Management",
480            "type": "boolean",
481            "description": "Tech management",
482            "default": false
483        },
484        "areaAdministrationAdministration": {
485            "title": "Administration",
486            "type": "boolean",
487            "description": "Administration",
488            "default": false,
489            "groupCaption": "Administration",
490            "groupDescription": "Select desired areas of administration."
491        },
492        "areaAdministrationHR": {
493            "title": "HR",
494            "type": "boolean",
495            "description": "HR",
496            "default": false
497        },
498        "areaAdministrationLecturer": {
499            "title": "Lecturer",
500            "type": "boolean",
501            "description": "Lecturer",
502            "default": false
503        },
504        "areaAdministrationRest": {
505            "title": "Rest",
506            "type": "boolean",
507            "description": "Rest",
508            "default": false
509        },
510        "salaryDefined": {
511            "sectionCaption": "Salary",
512            "sectionDescription": "Select desired salary.",
513            "title": "Salary defined",
514            "type": "boolean",
515            "description": "Show only jobs with defined salary.",
516            "default": false
517        },
518        "salaryCurrency": {
519            "title": "Salary Currency",
520            "type": "string",
521            "description": "Currency of the salary",
522            "enum": ["CZK", "EUR"],
523            "enumTitles": ["CZK", "EUR"],
524            "prefill": "CZK",
525            "default": "CZK"
526        },
527        "salaryType": {
528            "title": "Salary Type",
529            "type": "string",
530            "description": "Type of the salary",
531            "enumTitles": ["Hourly", "Monthly"],
532            "enum": ["hourly", "monthly"],
533            "prefill": "hourly",
534            "default": "hourly"
535        },
536        "salaryMin": {
537            "title": "Minimal Salary",
538            "description": "Minimal salary",
539            "type": "integer",
540            "editor": "number",
541            "prefill": 0,
542            "default": 0,
543            "minimum": 0
544        }
545    }
546}

.vscode/launch.json

1{
2    "version": "0.2.0",
3    "configurations": [
4        {
5            "type": "node",
6            "request": "launch",
7            "name": "Launch via apify",
8            "runtimeExecutable": "apify",
9            "runtimeArgs": [
10              "run",
11              "--purge"
12            ],
13            "skipFiles": [
14              "<node_internals>/**"
15            ],
16          }
17    ]
18}

src/config.ts

1import { Config } from './types/config.js';
2
3export const config: Config = {
4    adsPerPage: 20,
5    cityIds: {
6        prague: 'ChIJi3lwCZyTC0cRkEAWZg-vAAQ',
7        brno: 'ChIJEVE_wDqUEkcRsLEUZg-vAAQ',
8        ostrava: 'ChIJneckMWpYEUcRRJGen_tzGYk',
9        czechia: 'ChIJQ4Ld14-UC0cRb1jb03UcZvg',
10        sanFrancisco: 'ChIJIQBpAG2ahYAR_6128GcTUEo',
11        newYork: 'ChIJOwg_06VPwokRYv534QaPC8g',
12        london: 'ChIJdd4hrwug2EcRmSrV3Vo6llI',
13    },
14    originUrl: 'https://www.startupjobs.cz',
15    fetchUrl: 'https://www.startupjobs.cz/api/offers',
16};

src/main.ts

1import { Actor } from 'apify';
2import { HttpCrawler } from 'crawlee';
3
4import { config } from './config.js';
5import { generateRequest } from './helpers/helper.js';
6import { ResponseParser } from './parsers/responseParser.js';
7import { Body } from './types/body.js';
8import { Input } from './types/input.js';
9
10await Actor.main(async () => {
11    const input = await Actor.getInput<Input>();
12
13    if (!input) throw new Error('Input is missing');
14
15    const proxyConfiguration = await Actor.createProxyConfiguration();
16
17    const crawler = new HttpCrawler({
18        proxyConfiguration,
19        requestHandler: async ({ body }) => {
20            const json: Body = JSON.parse(body.toString());
21            const responseParser = new ResponseParser(json);
22
23            responseParser.ads.forEach(async (ad) => {
24                await Actor.pushData(ad);
25            });
26
27            if (responseParser.hasNextPage && responseParser.currentPage * config.adsPerPage < input.numAds) {
28                await crawler.addRequests([generateRequest(input, responseParser.currentPage + 1)]);
29            }
30        },
31    });
32
33    await crawler.run([generateRequest(input, 1)]);
34});

test/adParser.test.ts

1import { AdParser } from '../src/parsers/adParser.js';
2import { ResultSetEntity } from '../src/types/body.js';
3
4describe('AdParser', () => {
5    let resultEntity: ResultSetEntity;
6    let adParser: AdParser;
7    let originUrl: string;
8
9    beforeEach(() => {
10        resultEntity = {
11            id: 1,
12            name: 'Test Name',
13            description: 'Test Description',
14            mainAreaName: 'Test Role',
15            areaNames: ['Area1', 'Area2'],
16            url: '/test-url',
17            company: 'Test Company',
18            companyType: 'Tech',
19            isStartup: true,
20            imageUrl: 'http://test.com/logo.png',
21            companyAreas: ['Area1', 'Area2'],
22            locations: 'Location1, Location2',
23            shifts: 'Shift1, Shift2',
24            seniorities: ['Junior', 'Mid', 'Senior'],
25            benefits: [0, 1],
26            collaborations: 'Collaboration1, Collaboration2',
27            isHot: true,
28            isRemote: false,
29            isTop: true,
30            salary: { max: 1000, min: 500, measure: 'monthly', currency: 'USD' },
31        };
32
33        originUrl = 'https://www.startupjobs.cz';
34
35        adParser = new AdParser(resultEntity);
36    });
37
38    test('should return name', () => {
39        expect(adParser.name).toBe(resultEntity.name);
40    });
41
42    test('should return description', () => {
43        expect(adParser.description).toBe(resultEntity.description);
44    });
45
46    test('should return roleName', () => {
47        expect(adParser.roleName).toBe(resultEntity.mainAreaName);
48    });
49
50    test('should return roleAreas', () => {
51        expect(adParser.roleAreas).toBe(resultEntity.areaNames);
52    });
53
54    test('should return url', () => {
55        expect(adParser.url).toBe(originUrl + resultEntity.url);
56    });
57
58    test('should return company', () => {
59        expect(adParser.company).toBe(resultEntity.company);
60    });
61
62    test('should return companyType', () => {
63        expect(adParser.companyType).toBe(resultEntity.companyType);
64    });
65
66    test('should return companyIsSturtup', () => {
67        expect(adParser.companyIsSturtup).toBe(resultEntity.isStartup);
68    });
69
70    test('should return companyLogo', () => {
71        expect(adParser.companyLogo).toBe(resultEntity.imageUrl);
72    });
73
74    test('should return companyAreas', () => {
75        expect(adParser.companyAreas).toBe(resultEntity.companyAreas);
76    });
77
78    test('should return locations', () => {
79        expect(adParser.locations).toEqual(
80            resultEntity.locations.split(',').map((location) => location.trim()),
81        );
82    });
83
84    test('should return shifts', () => {
85        expect(adParser.shifts).toEqual(
86            resultEntity.shifts.split(',').map((shift) => shift.trim()),
87        );
88    });
89
90    test('should return seniorities', () => {
91        expect(adParser.seniorities).toBe(resultEntity.seniorities);
92    });
93
94    test('should return benefits', () => {
95        expect(adParser.benefits).toBe(resultEntity.benefits);
96    });
97
98    test('should return collaborationTypes', () => {
99        expect(adParser.collaborationTypes).toEqual(
100            resultEntity.collaborations
101                .split(',')
102                .map((collaboration) => collaboration.trim()),
103        );
104    });
105
106    test('should return isHot', () => {
107        expect(adParser.isHot).toBe(resultEntity.isHot);
108    });
109
110    test('should return isRemote', () => {
111        expect(adParser.isRemote).toBe(resultEntity.isRemote);
112    });
113
114    test('should return isTop', () => {
115        expect(adParser.isTop).toBe(resultEntity.isTop);
116    });
117
118    test('should return salary', () => {
119        expect(adParser.salary).toBe(resultEntity.salary);
120    });
121});

src/builders/adBuilder.ts

1import { AdParser } from '../parsers/adParser.js';
2import { Ad } from '../types/ad.js';
3
4export class AdBuilder {
5    adParser: AdParser;
6    ad: Ad = {} as Ad;
7
8    constructor(adParser: AdParser) {
9        this.adParser = adParser;
10    }
11
12    addName() {
13        this.ad.name = this.adParser.name;
14        return this;
15    }
16
17    addDescription() {
18        this.ad.description = this.adParser.description;
19        return this;
20    }
21
22    addMainArea() {
23        this.ad.mainArea = this.adParser.roleName;
24        return this;
25    }
26
27    addUrl() {
28        this.ad.url = this.adParser.url;
29        return this;
30    }
31
32    addCompany() {
33        this.ad.company = this.adParser.company;
34        return this;
35    }
36
37    addCompanyType() {
38        this.ad.companyType = this.adParser.companyType;
39        return this;
40    }
41
42    addCompanyIsSturtup() {
43        this.ad.companyIsSturtup = this.adParser.companyIsSturtup;
44        return this;
45    }
46
47    addCompanyLogo() {
48        this.ad.companyLogo = this.adParser.companyLogo;
49        return this;
50    }
51
52    addCompanyAreas() {
53        this.ad.companyAreas = this.adParser.companyAreas;
54        return this;
55    }
56
57    addLocations() {
58        this.ad.locations = this.adParser.locations;
59        return this;
60    }
61
62    addShifts() {
63        this.ad.shifts = this.adParser.shifts;
64        return this;
65    }
66
67    addAreas() {
68        this.ad.areas = this.adParser.roleAreas;
69        return this;
70    }
71
72    addSeniorities() {
73        this.ad.seniorities = this.adParser.seniorities;
74        return this;
75    }
76
77    addBenefits() {
78        this.ad.benefits = this.adParser.benefits;
79        return this;
80    }
81
82    addCollaborationTypes() {
83        this.ad.collaborationTypes = this.adParser.collaborationTypes;
84        return this;
85    }
86
87    addIsHot() {
88        this.ad.isHot = this.adParser.isHot;
89        return this;
90    }
91
92    addIsRemote() {
93        this.ad.isRemote = this.adParser.isRemote;
94        return this;
95    }
96
97    addIsTop() {
98        this.ad.isTop = this.adParser.isTop;
99        return this;
100    }
101
102    addSalary() {
103        this.ad.salary = this.adParser.salary;
104        return this;
105    }
106
107    build() {
108        return this.ad;
109    }
110}

src/builders/requestBuilder.ts

1import { RequestOptions } from 'crawlee';
2
3import { config } from '../config.js';
4import { Input } from '../types/input.js';
5
6export class RequestBuilder {
7    page: number;
8    input: Input;
9    salary: string;
10    collaboration: string;
11    shift: string;
12    area: string;
13
14    constructor(input: Input, page?: number) {
15        this.page = page ?? 0;
16        this.input = input;
17
18        this.collaboration = '';
19        this.shift = '';
20        this.area = '';
21        this.salary = '';
22    }
23
24    addCollaboration(): RequestBuilder {
25        if (this.input.typeOfCooperationFreelance) this.collaboration += `&collaboration[]=freelance`;
26        if (this.input.typeOfCooperationInternship) this.collaboration += `&collaboration[]=internship`;
27        if (this.input.typeOfCooperationContract) this.collaboration += `&collaboration[]=pracovni-smlouva`;
28        if (this.input.workplaceOnsite) this.collaboration += `&collaboration[]=onsite`;
29        if (this.input.workplaceRemote) this.collaboration += `&collaboration[]=remote`;
30        if (this.input.workplaceHybrid) this.collaboration += `&collaboration[]=hybridni`;
31
32        return this;
33    }
34
35    addShift(): RequestBuilder {
36        if (this.input.typeOfEmploymentFullTime) this.shift += `&shift[]=full-time`;
37        if (this.input.typeOfEmploymentPartTime) this.shift += `&shift[]=part-time`;
38
39        return this;
40    }
41
42    addSeniority(): RequestBuilder {
43        if (this.input.seniorityJunior) this.area += `&seniority[]=junior`;
44        if (this.input.seniorityMedior) this.area += `&seniority[]=medior`;
45        if (this.input.senioritySenior) this.area += `&seniority[]=senior`;
46
47        return this;
48    }
49
50    addLocation(): RequestBuilder {
51        if (this.input.locationPrague) this.area += `&location[]=${config.cityIds.prague}`;
52        if (this.input.locationBrno) this.area += `&location[]=${config.cityIds.brno}`;
53        if (this.input.locationOstrava) this.area += `&location[]=${config.cityIds.ostrava}`;
54        if (this.input.locationCzechia) this.area += `&location[]=${config.cityIds.czechia}`;
55        if (this.input.locationSanFrancisco) this.area += `&location[]=${config.cityIds.sanFrancisco}`;
56        if (this.input.locationNewYork) this.area += `&location[]=${config.cityIds.newYork}`;
57        if (this.input.locationLondon) this.area += `&location[]=${config.cityIds.london}`;
58
59        return this;
60    }
61
62    addArea(): RequestBuilder {
63        if (this.input.areasDevelopmentBackEnd) this.area += `&area[]=vyvoj/back-end`;
64        if (this.input.areasDevelopmentFrontEnd) this.area += `&area[]=vyvoj/front-end-koder`;
65        if (this.input.areasDevelopmentDatabase) this.area += `&area[]=vyvoj/databaze`;
66        if (this.input.areasDevelopmentDevOps) this.area += `&area[]=vyvoj/devops`;
67        if (this.input.areasDevelopmentTesting) this.area += `&area[]=vyvoj/testovani`;
68        if (this.input.areasDevelopmentMobile) this.area += `&area[]=vyvoj/mobilni-vyvoj`;
69        if (this.input.areasDevelopmentHardware) this.area += `&area[]=vyvoj/hardware-firmware`;
70        if (this.input.areasDevelopmentSecurity) this.area += `&area[]=vyvoj/it-security`;
71        if (this.input.areasDevelopmentSoftwareArchitect) this.area += `&area[]=vyvoj/software-architekt`;
72        if (this.input.areasDevelopmentItConsulting) this.area += `&area[]=vyvoj/it-konzultant`;
73        if (this.input.areasDevelopmentProjectManagement) this.area += `&area[]=vyvoj/project-manager`;
74        if (this.input.areasDevelopmentProductOwner) this.area += `&area[]=vyvoj/product-manager-a-product-owner`;
75        if (this.input.areasDevelopmentScrumMaster) this.area += `&area[]=vyvoj/scrum-master`;
76        if (this.input.areasDevelopmentDesign) this.area += `&area[]=vyvoj/design`;
77        if (this.input.areasDevelopment3dPrinting) this.area += `&area[]=vyvoj/3d-tisk`;
78        if (this.input.areasDevelopmentGameDev) this.area += `&area[]=vyvoj/herni-vyvoj`;
79        if (this.input.areasDevelopmentAI) this.area += `&area[]=vyvoj/umela-inteligence`;
80        if (this.input.areasDevelopmentMachineLearning) this.area += `&area[]=vyvoj/machine-learning`;
81        if (this.input.areasDevelopmentVR) this.area += `&area[]=vyvoj/virtualni-rozsirena-realita`;
82        if (this.input.areasDevelopmentIoT) this.area += `&area[]=vyvoj/internet-veci`;
83        if (this.input.areasDevelopmentBlockchain) this.area += `&area[]=vyvoj/blockchain`;
84        if (this.input.areasDevelopmentNoCode) this.area += `&area[]=vyvoj/no-code`;
85        if (this.input.areasDevelopmentTechWriter) this.area += `&area[]=vyvoj/tech-writer`;
86        if (this.input.areasDevelopmentEmbeddedSystems) this.area += `&area[]=vyvoj/embedded-systems`;
87
88        if (this.input.areasMarketingMarketing) this.area += `&area[]=marketing-a-obchod/marketing`;
89        if (this.input.areasMarketingSales) this.area += `&area[]=marketing-a-obchod/obchod`;
90        if (this.input.areasMarketingCreativity) this.area += `&area[]=marketing-a-obchod/kreativa`;
91
92        if (this.input.areasAnalyticsBI) this.area += `&area[]=analytika/bi-business-analytik`;
93        if (this.input.areasAnalyticsDataAnalysis) this.area += `&area[]=analytika/data-analytik`;
94        if (this.input.areasAnalyticsITAnalysis) this.area += `&area[]=analytika/it-analytik`;
95        if (this.input.areasAnalyticsWebAnalysis) this.area += `&area[]=analytika/webovy-analytik`;
96        if (this.input.areasAnalyticsMarketingAnalysis) this.area += `&area[]=analytika/marketingovy-analytik`;
97
98        if (this.input.areasManagementAccount) this.area += `&area[]=management/account-manager`;
99        if (this.input.areasManagementBusinessDevelopment) this.area += `&area[]=management/business-development`;
100        if (this.input.areasManagementManagement) this.area += `&area[]=management/manager`;
101        if (this.input.areasManagementMarketing) this.area += `&area[]=management/marketing-manager`;
102        if (this.input.areasManagementSales) this.area += `&area[]=management/sales-manager`;
103        if (this.input.areasManagementScrumMaster) this.area += `&area[]=management/scrum-master`;
104        if (this.input.areasManagementOffice) this.area += `&area[]=management/office-manager`;
105        if (this.input.areasManagementProductOwner) this.area += `&area[]=management/product-manager-a-product-owner`;
106        if (this.input.areasManagementProjectManagement) this.area += `&area[]=management/project-manager`;
107        if (this.input.areasManagementOperations) this.area += `&area[]=management/provozni-manazer`;
108        if (this.input.areasManagementTeamLead) this.area += `&area[]=management/team-leader`;
109        if (this.input.areasManagementTopManagement) this.area += `&area[]=management/top-management-c-level`;
110        if (this.input.areasManagementTech) this.area += `&area[]=management/technicky-manazer`;
111
112        if (this.input.areaAdministrationAdministration) this.area += `&area[]=administrativa-a-jine/administrativa`;
113        if (this.input.areaAdministrationHR) this.area += `&area[]=administrativa-a-jine/hr`;
114        if (this.input.areaAdministrationLecturer) this.area += `&area[]=administrativa-a-jine/lektor`;
115        if (this.input.areaAdministrationRest) this.area += `&area[]=administrativa-a-jine/ostatni`;
116
117        return this;
118    }
119
120    addSalary(): RequestBuilder {
121        if (this.input.salaryDefined) this.salary += `&salary[]=1`;
122
123        this.salary += `&salaryRange[]=${
124            JSON.stringify({
125                value: this.input.salaryMin,
126                currency: this.input.salaryCurrency,
127                measure: this.input.salaryType,
128            })
129        }`;
130
131        return this;
132    }
133
134    build(): RequestOptions {
135        return {
136            url: `${config.fetchUrl}?page=${this.page}${this.salary}${this.collaboration}${this.shift}${this.area}`,
137            method: 'GET',
138            headers: {
139                'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
140                origin: config.originUrl,
141            },
142        };
143    }
144}

src/helpers/helper.ts

1import { RequestOptions } from 'crawlee';
2
3import { RequestBuilder } from '../builders/requestBuilder.js';
4import { Input } from '../types/input.js';
5
6export function generateRequest(input: Input, page: number): RequestOptions {
7    const requestBuilder = new RequestBuilder(input, page);
8
9    const request: RequestOptions = requestBuilder
10        .addShift()
11        .addSeniority()
12        .addLocation()
13        .addSalary()
14        .addCollaboration()
15        .addArea()
16        .build();
17
18    return request;
19}

src/parsers/adParser.ts

1import { config } from '../config.js';
2import { ResultSetEntity } from '../types/body.js';
3
4export class AdParser {
5    setEntity: ResultSetEntity;
6
7    constructor(setEntity: ResultSetEntity) {
8        this.setEntity = setEntity;
9    }
10
11    get name() {
12        return this.setEntity.name;
13    }
14
15    get description() {
16        return this.setEntity.description;
17    }
18
19    get roleName() {
20        return this.setEntity.mainAreaName;
21    }
22
23    get roleAreas() {
24        return this.setEntity.areaNames;
25    }
26
27    get url() {
28        return config.originUrl + this.setEntity.url;
29    }
30
31    get company() {
32        return this.setEntity.company;
33    }
34
35    get companyType() {
36        return this.setEntity.companyType;
37    }
38
39    get companyIsSturtup() {
40        return this.setEntity.isStartup;
41    }
42
43    get companyLogo() {
44        return this.setEntity.imageUrl;
45    }
46
47    get companyAreas() {
48        return this.setEntity.companyAreas;
49    }
50
51    get locations() {
52        return this.setEntity.locations.split(',').map((location) => location.trim());
53    }
54
55    get shifts() {
56        return this.setEntity.shifts.split(',').map((shift) => shift.trim());
57    }
58
59    get seniorities() {
60        return this.setEntity.seniorities;
61    }
62
63    get benefits() {
64        return this.setEntity.benefits;
65    }
66
67    get collaborationTypes() {
68        return this.setEntity.collaborations.split(',').map((collaboration) => collaboration.trim());
69    }
70
71    get isHot() {
72        return this.setEntity.isHot;
73    }
74
75    get isRemote() {
76        return this.setEntity.isRemote;
77    }
78
79    get isTop() {
80        return this.setEntity.isTop;
81    }
82
83    get salary() {
84        return this.setEntity.salary;
85    }
86}

src/parsers/responseParser.ts

1import { AdParser } from './adParser.js';
2import { AdBuilder } from '../builders/adBuilder.js';
3import { Ad } from '../types/ad.js';
4import { Body } from '../types/body.js';
5
6export class ResponseParser {
7    body: Body;
8
9    constructor(body: Body) {
10        this.body = body;
11    }
12
13    get ads(): Ad[] {
14        const ads: Ad[] = [];
15
16        this.body.resultSet.forEach((resultSetEntity) => {
17            const adParser = new AdParser(resultSetEntity);
18            const ad = new AdBuilder(adParser)
19                .addName()
20                .addDescription()
21                .addMainArea()
22                .addUrl()
23                .addCompany()
24                .addCompanyType()
25                .addCompanyIsSturtup()
26                .addCompanyLogo()
27                .addCompanyAreas()
28                .addLocations()
29                .addShifts()
30                .addAreas()
31                .addSeniorities()
32                .addBenefits()
33                .addCollaborationTypes()
34                .addIsHot()
35                .addIsRemote()
36                .addIsTop()
37                .addSalary()
38                .build();
39
40            ads.push(ad);
41        });
42
43        return ads;
44    }
45
46    get hasNextPage(): boolean {
47        return this.currentPage < this.body.paginator.max;
48    }
49
50    get currentPage(): number {
51        return this.body.paginator.current;
52    }
53}

src/types/ad.d.ts

1import { Salary } from './body.js';
2
3export type Ad = {
4    name: string;
5    description: string;
6    mainArea: string;
7    url: string;
8    company: string;
9    companyType: string;
10    companyIsSturtup: boolean;
11    companyLogo: string;
12    companyAreas: string[];
13    locations: string[];
14    shifts: string[];
15    areas: string[];
16    seniorities: string[];
17    benefits: number[];
18    collaborationTypes: string[];
19    isHot: boolean;
20    isRemote: boolean;
21    isTop: boolean;
22    salary: Salary;
23}

src/types/body.d.ts

1export interface Body {
2  resultSet: ResultSetEntity[]
3  resultCount: number
4  paginator: Paginator
5  permanentUrlForResultSet: PermanentUrlForResultSet
6  seo: Seo
7}
8
9export interface ResultSetEntity {
10  id: number
11  name: string
12  description: string
13  url: string
14  company: string
15  companyType: string
16  mainAreaName: string
17  imageUrl: string
18  locations: string
19  shifts: string
20  areaSlugs?: string[];
21  areaNames: string[];
22  seniorities: string[];
23  benefits: number[];
24  collaborations: string
25  isHot: boolean;
26  isRemote: boolean;
27  isTop: boolean;
28  companyAreas: string[];
29  isStartup: boolean;
30  salary: Salary;
31}
32
33export interface Salary {
34  max: number;
35  min: number;
36  measure: string;
37  currency: string;
38}
39
40export interface Paginator {
41  current: number;
42  max: number;
43}
44
45export interface PermanentUrlForResultSet {
46  cs: string;
47  en: string;
48}
49
50export interface Seo {
51  title: string;
52  heading: string;
53  customHtml?: undefined;
54}

src/types/config.d.ts

1export type Config = {
2    adsPerPage: number;
3    cityIds: {
4        prague: string;
5        brno: string;
6        ostrava: string;
7        czechia: string;
8        sanFrancisco: string;
9        newYork: string;
10        london: string;
11    };
12    originUrl: string;
13    fetchUrl: string;
14}

src/types/input.d.ts

1export type Input = {
2    numAds: number;
3    workplaceRemote: boolean;
4    workplaceHybrid: boolean;
5    workplaceOnsite: boolean;
6    locationPrague: boolean;
7    locationBrno: boolean;
8    locationOstrava: boolean;
9    locationCzechia: boolean;
10    locationSanFrancisco: boolean;
11    locationNewYork: boolean;
12    locationLondon: boolean;
13    seniorityJunior: boolean;
14    seniorityMedior: boolean;
15    senioritySenior: boolean;
16    typeOfEmploymentFullTime: boolean;
17    typeOfEmploymentPartTime: boolean;
18    typeOfCooperationFreelance: boolean;
19    typeOfCooperationInternship: boolean;
20    typeOfCooperationContract: boolean;
21    areasDevelopmentBackEnd: boolean;
22    areasDevelopmentFrontEnd: boolean;
23    areasDevelopmentDatabase: boolean;
24    areasDevelopmentDevOps: boolean;
25    areasDevelopmentTesting: boolean;
26    areasDevelopmentMobile: boolean;
27    areasDevelopmentHardware: boolean;
28    areasDevelopmentSecurity: boolean;
29    areasDevelopmentSoftwareArchitect: boolean;
30    areasDevelopmentItConsulting: boolean;
31    areasDevelopmentProjectManagement: boolean;
32    areasDevelopmentProductOwner: boolean;
33    areasDevelopmentScrumMaster: boolean;
34    areasDevelopmentDesign: boolean;
35    areasDevelopment3dPrinting: boolean;
36    areasDevelopmentGameDev: boolean;
37    areasDevelopmentAI: boolean;
38    areasDevelopmentMachineLearning: boolean;
39    areasDevelopmentVR: boolean;
40    areasDevelopmentIoT: boolean;
41    areasDevelopmentBlockchain: boolean;
42    areasDevelopmentNoCode: boolean;
43    areasDevelopmentTechWriter: boolean;
44    areasDevelopmentEmbeddedSystems: boolean;
45    areasMarketingMarketing: boolean;
46    areasMarketingSales: boolean;
47    areasMarketingCreativity: boolean;
48    areasAnalyticsBI: boolean;
49    areasAnalyticsDataAnalysis: boolean;
50    areasAnalyticsITAnalysis: boolean;
51    areasAnalyticsWebAnalysis: boolean;
52    areasAnalyticsMarketingAnalysis: boolean;
53    areasManagementAccount: boolean;
54    areasManagementBusinessDevelopment: boolean;
55    areasManagementManagement: boolean;
56    areasManagementMarketing: boolean;
57    areasManagementSales: boolean;
58    areasManagementScrumMaster: boolean;
59    areasManagementOffice: boolean;
60    areasManagementProductOwner: boolean;
61    areasManagementProjectManagement: boolean;
62    areasManagementOperations: boolean;
63    areasManagementTopManagement: boolean;
64    areasManagementTeamLead: boolean;
65    areasManagementTech: boolean;
66    areaAdministrationAdministration: boolean;
67    areaAdministrationHR: boolean;
68    areaAdministrationLecturer: boolean;
69    areaAdministrationRest: boolean;
70    salaryDefined: boolean;
71    salaryCurrency: string;
72    salaryType: string;
73    salaryMin: number;
74}
Developer
Maintained by Community

Actor Metrics

  • 5 monthly users

  • 1 star

  • >99% runs succeeded

  • Created in Jun 2024

  • Modified 6 months ago

Categories