Startup Jobs Scraper
Try for free
No credit card required
Go to Store
Startup Jobs Scraper
martin1080p/startup-jobs-scraper
Try for free
No credit card required
StartupJobs.cz scraper extracts job listings from the site, gathering job titles, company names, locations, and descriptions. It automates data collection, providing up-to-date job information efficiently.
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
14
15# dist folder
16dist
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "root": true,
3 "env": {
4 "browser": true,
5 "es2020": true,
6 "node": true
7 },
8 "extends": [
9 "@apify/eslint-config-ts"
10 ],
11 "parserOptions": {
12 "project": "./tsconfig.json",
13 "ecmaVersion": 2020
14 },
15 "ignorePatterns": [
16 "node_modules",
17 "dist",
18 "**/*.d.ts"
19 ]
20}
.gitignore
1storage
2apify_storage
3crawlee_storage
4node_modules
5dist
6tsconfig.tsbuildinfo
7storage/*
8!storage/key_value_stores
9storage/key_value_stores/*
10!storage/key_value_stores/default
11storage/key_value_stores/default/*
12!storage/key_value_stores/default/INPUT.json
13
14# Added by Apify CLI
15.venv
jest.config.js
1/** @type {import('ts-jest').JestConfigWithTsJest} */
2export default {
3 preset: 'ts-jest',
4 testEnvironment: 'node',
5 moduleFileExtensions: ['ts', 'js'],
6 globals: {
7 'ts-jest': {
8 tsconfig: 'tsconfig.json',
9 },
10 },
11 moduleNameMapper: {
12 '^(\\.\\.?\\/.+)\\.js$': '$1',
13 },
14};
package.json
1{
2 "name": "startup-jobs-scraper",
3 "version": "0.1.0",
4 "type": "module",
5 "description": "This is an example of an Apify actor.",
6 "engines": {
7 "node": ">=18.0.0"
8 },
9 "dependencies": {
10 "apify": "^3.1.10",
11 "crawlee": "^3.10.5"
12 },
13 "devDependencies": {
14 "@apify/eslint-config-ts": "^0.4.1",
15 "@apify/tsconfig": "^0.1.0",
16 "@stylistic/eslint-plugin-js": "^2.3.0",
17 "@types/jest": "^29.5.12",
18 "@typescript-eslint/eslint-plugin": "^7.14.1",
19 "@typescript-eslint/parser": "^7.14.1",
20 "eslint": "^8.57.0",
21 "jest": "^29.7.0",
22 "ts-jest": "^29.1.5",
23 "tsx": "^4.6.2",
24 "typescript": "^5.3.3"
25 },
26 "scripts": {
27 "start": "npm run start:dev",
28 "start:prod": "node dist/main.js",
29 "start:dev": "tsx src/main.ts",
30 "build": "tsc",
31 "lint": "eslint ./src --ext .ts",
32 "lint:fix": "eslint ./src --ext .ts --fix",
33 "test": "jest"
34 },
35 "author": "Martin Fanta",
36 "license": "MIT"
37}
tsconfig.json
1{
2 "extends": "@apify/tsconfig",
3 "compilerOptions": {
4 "esModuleInterop": true,
5 "module": "NodeNext",
6 "moduleResolution": "NodeNext",
7 "target": "ES2022",
8 "outDir": "dist",
9 "noUnusedLocals": false,
10 "skipLibCheck": true,
11 "lib": ["DOM"],
12 },
13 "include": [
14 "./src/**/*",
15 ]
16}
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "startup-jobs-scraper",
4 "title": "StartupJobs Scraper",
5 "description": "StartupJobs.cz scraper extracts job listings from the site, gathering job titles, company names, locations, and descriptions. It automates data collection, providing up-to-date job information efficiently.",
6 "version": "0.1",
7 "meta": {
8 "templateId": "ts-start"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile",
12 "storages": {
13 "dataset": {
14 "actorSpecification": 1,
15 "fields": {},
16 "views": {
17 "overview": {
18 "title": "Overview",
19 "transformation": {
20 "fields": [
21 "companyLogo",
22 "name",
23 "description",
24 "mainArea",
25 "company",
26 "areas",
27 "url"
28 ]
29 },
30 "display": {
31 "component": "table",
32 "properties": {
33 "companyLogo": {
34 "label": "Company Logo",
35 "format": "image"
36 },
37 "name": {
38 "label": "Name",
39 "format": "text"
40 },
41 "description": {
42 "label": "Description",
43 "format": "text"
44 },
45 "mainArea": {
46 "label": "Main Area",
47 "format": "text"
48 },
49 "company": {
50 "label": "Company",
51 "format": "text"
52 },
53 "areas": {
54 "label": "Areas",
55 "format": "array"
56 },
57 "url": {
58 "label": "Link",
59 "format": "link"
60 }
61 }
62 }
63 }
64 }
65 }
66 }
67}
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32 && npm install --omit=dev --omit=optional \
33 && echo "Installed NPM packages:" \
34 && (npm list --omit=dev --all || true) \
35 && echo "Node.js version:" \
36 && node --version \
37 && echo "NPM version:" \
38 && npm --version \
39 && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent
.actor/input_schema.json
1{
2 "title": "Startup Jobs Scraper",
3 "type": "object",
4 "schemaVersion": 1,
5 "required": [
6 "numAds",
7 "workplaceRemote",
8 "workplaceHybrid",
9 "workplaceOnsite",
10 "locationPrague",
11 "locationBrno",
12 "locationOstrava",
13 "locationCzechia",
14 "locationSanFrancisco",
15 "locationNewYork",
16 "locationLondon",
17 "seniorityJunior",
18 "seniorityMedior",
19 "senioritySenior",
20 "typeOfEmploymentFullTime",
21 "typeOfEmploymentPartTime",
22 "typeOfCooperationFreelance",
23 "typeOfCooperationInternship",
24 "typeOfCooperationContract",
25 "areasDevelopmentBackEnd",
26 "areasDevelopmentFrontEnd",
27 "areasDevelopmentDatabase",
28 "areasDevelopmentDevOps",
29 "areasDevelopmentTesting",
30 "areasDevelopmentMobile",
31 "areasDevelopmentHardware",
32 "areasDevelopmentSecurity",
33 "areasDevelopmentSoftwareArchitect",
34 "areasDevelopmentProjectManagement",
35 "areasDevelopmentProductOwner",
36 "areasDevelopmentScrumMaster",
37 "areasDevelopmentDesign",
38 "areasDevelopment3dPrinting",
39 "areasDevelopmentGameDev",
40 "areasDevelopmentAI",
41 "areasDevelopmentMachineLearning",
42 "areasDevelopmentVR",
43 "areasDevelopmentIoT",
44 "areasDevelopmentBlockchain",
45 "areasDevelopmentNoCode",
46 "areasDevelopmentTechWriter",
47 "areasDevelopmentEmbeddedSystems",
48 "areasMarketingMarketing",
49 "areasMarketingSales",
50 "areasMarketingCreativity",
51 "areasAnalyticsBI",
52 "areasAnalyticsDataAnalysis",
53 "areasAnalyticsITAnalysis",
54 "areasAnalyticsWebAnalysis",
55 "areasAnalyticsMarketingAnalysis",
56 "areasManagementAccount",
57 "areasManagementBusinessDevelopment",
58 "areasManagementManagement",
59 "areasManagementMarketing",
60 "areasManagementSales",
61 "areasManagementScrumMaster",
62 "areasManagementOffice",
63 "areasManagementProductOwner",
64 "areasManagementProjectManagement",
65 "areasManagementOperations",
66 "areasManagementTopManagement",
67 "areasManagementTeamLead",
68 "areasManagementTech",
69 "areaAdministrationAdministration",
70 "areaAdministrationHR",
71 "areaAdministrationLecturer",
72 "areaAdministrationRest",
73 "salaryDefined",
74 "salaryCurrency",
75 "salaryType",
76 "salaryMin"
77 ],
78 "properties": {
79 "numAds": {
80 "title": "Number of jobs",
81 "description": "Total number of jobs to be fetched.",
82 "type": "integer",
83 "editor": "number",
84 "default": 40,
85 "prefill": 40,
86 "minimum": 20
87 },
88 "workplaceRemote": {
89 "title": "Remote",
90 "type": "boolean",
91 "description": "Remote work arrangement",
92 "default": true,
93 "groupCaption": "Work Arrangement",
94 "groupDescription": "Select desired work arrangements of the job."
95 },
96 "workplaceHybrid": {
97 "title": "Hybrid",
98 "type": "boolean",
99 "description": "Hybrid work arrangement",
100 "default": true
101 },
102 "workplaceOnsite": {
103 "title": "On-site",
104 "type": "boolean",
105 "description": "On-site work arrangement",
106 "default": true
107 },
108 "locationPrague": {
109 "title": "Prague",
110 "type": "boolean",
111 "description": "Job in Prague",
112 "default": true,
113 "groupCaption": "Location",
114 "groupDescription": "Select desired location of the job."
115 },
116 "locationBrno": {
117 "title": "Brno",
118 "type": "boolean",
119 "description": "Job in Brno",
120 "default": true
121 },
122 "locationOstrava": {
123 "title": "Ostrava",
124 "type": "boolean",
125 "description": "Job in Ostrava",
126 "default": true
127 },
128 "locationCzechia": {
129 "title": "Czechia",
130 "type": "boolean",
131 "description": "Job in Czechia",
132 "default": true
133 },
134 "locationSanFrancisco": {
135 "title": "San Francisco",
136 "type": "boolean",
137 "description": "Job in San Francisco",
138 "default": false
139 },
140 "locationNewYork": {
141 "title": "New York",
142 "type": "boolean",
143 "description": "Job in New York",
144 "default": false
145 },
146 "locationLondon": {
147 "title": "London",
148 "type": "boolean",
149 "description": "Job in London",
150 "default": false
151 },
152 "seniorityJunior": {
153 "title": "Junior",
154 "type": "boolean",
155 "description": "Junior position",
156 "default": true,
157 "groupCaption": "Seniority",
158 "groupDescription": "Select desired seniority of the job."
159 },
160 "seniorityMedior": {
161 "title": "Medior",
162 "type": "boolean",
163 "description": "Medior position",
164 "default": true
165 },
166 "senioritySenior": {
167 "title": "Senior",
168 "type": "boolean",
169 "description": "Senior position",
170 "default": true
171 },
172 "typeOfEmploymentFullTime": {
173 "title": "Full-time",
174 "type": "boolean",
175 "description": "Full-time job",
176 "default": true,
177 "groupCaption": "Type of Employment",
178 "groupDescription": "Select desired type of employment."
179 },
180 "typeOfEmploymentPartTime": {
181 "title": "Part-time",
182 "type": "boolean",
183 "description": "Part-time job",
184 "default": false
185 },
186 "typeOfCooperationFreelance": {
187 "title": "Freelance",
188 "type": "boolean",
189 "description": "Freelance job",
190 "default": true
191 },
192 "typeOfCooperationInternship": {
193 "title": "Internship",
194 "type": "boolean",
195 "description": "Internship",
196 "default": true
197 },
198 "typeOfCooperationContract": {
199 "title": "Contract",
200 "type": "boolean",
201 "description": "Contract job",
202 "default": true
203 },
204 "areasDevelopmentBackEnd": {
205 "sectionCaption": "Job areas",
206 "sectionDescription": "Select desired areas.",
207 "title": "Back-end Development",
208 "type": "boolean",
209 "description": "Back-end development",
210 "default": true,
211 "groupCaption": "Development",
212 "groupDescription": "Select desired areas of development."
213 },
214 "areasDevelopmentFrontEnd": {
215 "title": "Front-end Development",
216 "type": "boolean",
217 "description": "Front-end development",
218 "default": true
219 },
220 "areasDevelopmentDatabase": {
221 "title": "Database Development",
222 "type": "boolean",
223 "description": "Database development",
224 "default": true
225 },
226 "areasDevelopmentDevOps": {
227 "title": "DevOps",
228 "type": "boolean",
229 "description": "DevOps",
230 "default": false
231 },
232 "areasDevelopmentTesting": {
233 "title": "Testing",
234 "type": "boolean",
235 "description": "Testing",
236 "default": false
237 },
238 "areasDevelopmentMobile": {
239 "title": "Mobile Development",
240 "type": "boolean",
241 "description": "Mobile development",
242 "default": false
243 },
244 "areasDevelopmentHardware": {
245 "title": "Hardware Development",
246 "type": "boolean",
247 "description": "Hardware development",
248 "default": false
249 },
250 "areasDevelopmentSecurity": {
251 "title": "IT Security",
252 "type": "boolean",
253 "description": "Security",
254 "default": false
255 },
256 "areasDevelopmentSoftwareArchitect": {
257 "title": "Software Architect",
258 "type": "boolean",
259 "description": "Software architect",
260 "default": false
261 },
262 "areasDevelopmentItConsulting": {
263 "title": "IT Consulting",
264 "type": "boolean",
265 "description": "IT consulting",
266 "default": false
267 },
268 "areasDevelopmentProjectManagement": {
269 "title": "Project Management",
270 "type": "boolean",
271 "description": "Project management",
272 "default": false
273 },
274 "areasDevelopmentProductOwner": {
275 "title": "Product Owner",
276 "type": "boolean",
277 "description": "Product owner",
278 "default": false
279 },
280 "areasDevelopmentScrumMaster": {
281 "title": "Scrum Master",
282 "type": "boolean",
283 "description": "Scrum master",
284 "default": false
285 },
286 "areasDevelopmentDesign": {
287 "title": "Design",
288 "type": "boolean",
289 "description": "Design",
290 "default": false
291 },
292 "areasDevelopment3dPrinting": {
293 "title": "3D Printing",
294 "type": "boolean",
295 "description": "3D printing",
296 "default": false
297 },
298 "areasDevelopmentGameDev": {
299 "title": "Game Dev",
300 "type": "boolean",
301 "description": "Game dev",
302 "default": false
303 },
304 "areasDevelopmentAI": {
305 "title": "AI",
306 "type": "boolean",
307 "description": "AI",
308 "default": false
309 },
310 "areasDevelopmentMachineLearning": {
311 "title": "Machine Learning",
312 "type": "boolean",
313 "description": "Machine learning",
314 "default": false
315 },
316 "areasDevelopmentVR": {
317 "title": "VR / AR",
318 "type": "boolean",
319 "description": "VR / AR",
320 "default": false
321 },
322 "areasDevelopmentIoT": {
323 "title": "Internet of Things",
324 "type": "boolean",
325 "description": "IoT",
326 "default": false
327 },
328 "areasDevelopmentBlockchain": {
329 "title": "Blockchain",
330 "type": "boolean",
331 "description": "Blockchain",
332 "default": false
333 },
334 "areasDevelopmentNoCode": {
335 "title": "No Code",
336 "type": "boolean",
337 "description": "No code",
338 "default": false
339 },
340 "areasDevelopmentTechWriter": {
341 "title": "Tech Writer",
342 "type": "boolean",
343 "description": "Tech writer",
344 "default": false
345 },
346 "areasDevelopmentEmbeddedSystems": {
347 "title": "Embedded Systems",
348 "type": "boolean",
349 "description": "Embedded systems",
350 "default": false
351 },
352 "areasMarketingMarketing": {
353 "title": "Marketing",
354 "type": "boolean",
355 "description": "Marketing",
356 "default": false,
357 "groupCaption": "Marketing",
358 "groupDescription": "Select desired areas of marketing."
359 },
360 "areasMarketingSales": {
361 "title": "Sales",
362 "type": "boolean",
363 "description": "Sales",
364 "default": false
365 },
366 "areasMarketingCreativity": {
367 "title": "Creativity",
368 "type": "boolean",
369 "description": "Creativity",
370 "default": false
371 },
372 "areasAnalyticsBI": {
373 "title": "Business Intelligence",
374 "type": "boolean",
375 "description": "BI / Business Analytics",
376 "default": false,
377 "groupCaption": "Analytics",
378 "groupDescription": "Select desired areas of analytics."
379 },
380 "areasAnalyticsDataAnalysis": {
381 "title": "Data Analysis",
382 "type": "boolean",
383 "description": "Data analysis",
384 "default": false
385 },
386 "areasAnalyticsITAnalysis": {
387 "title": "IT Analysis",
388 "type": "boolean",
389 "description": "IT analysis",
390 "default": false
391 },
392 "areasAnalyticsWebAnalysis": {
393 "title": "Web Analysis",
394 "type": "boolean",
395 "description": "Web analysis",
396 "default": false
397 },
398 "areasAnalyticsMarketingAnalysis": {
399 "title": "Marketing Analysis",
400 "type": "boolean",
401 "description": "Marketing analysis",
402 "default": false
403 },
404 "areasManagementAccount": {
405 "title": "Account Management",
406 "type": "boolean",
407 "description": "Account management",
408 "default": false,
409 "groupCaption": "Management",
410 "groupDescription": "Select desired areas of management."
411 },
412 "areasManagementBusinessDevelopment": {
413 "title": "Business Development",
414 "type": "boolean",
415 "description": "Business development",
416 "default": false
417 },
418 "areasManagementManagement": {
419 "title": "Management",
420 "type": "boolean",
421 "description": "Management",
422 "default": false
423 },
424 "areasManagementMarketing": {
425 "title": "Marketing",
426 "type": "boolean",
427 "description": "Marketing",
428 "default": false
429 },
430 "areasManagementSales": {
431 "title": "Sales",
432 "type": "boolean",
433 "description": "Sales",
434 "default": false
435 },
436 "areasManagementScrumMaster": {
437 "title": "Scrum Master",
438 "type": "boolean",
439 "description": "Scrum master",
440 "default": false
441 },
442 "areasManagementOffice": {
443 "title": "Office Management",
444 "type": "boolean",
445 "description": "Office management",
446 "default": false
447 },
448 "areasManagementProductOwner": {
449 "title": "Product Owner",
450 "type": "boolean",
451 "description": "Product owner",
452 "default": false
453 },
454 "areasManagementProjectManagement": {
455 "title": "Project Management",
456 "type": "boolean",
457 "description": "Project management",
458 "default": false
459 },
460 "areasManagementOperations": {
461 "title": "Operations",
462 "type": "boolean",
463 "description": "Operations",
464 "default": false
465 },
466 "areasManagementTopManagement": {
467 "title": "Top Management",
468 "type": "boolean",
469 "description": "Top management (C-level)",
470 "default": false
471 },
472 "areasManagementTeamLead": {
473 "title": "Team Lead",
474 "type": "boolean",
475 "description": "Team lead",
476 "default": false
477 },
478 "areasManagementTech": {
479 "title": "Tech Management",
480 "type": "boolean",
481 "description": "Tech management",
482 "default": false
483 },
484 "areaAdministrationAdministration": {
485 "title": "Administration",
486 "type": "boolean",
487 "description": "Administration",
488 "default": false,
489 "groupCaption": "Administration",
490 "groupDescription": "Select desired areas of administration."
491 },
492 "areaAdministrationHR": {
493 "title": "HR",
494 "type": "boolean",
495 "description": "HR",
496 "default": false
497 },
498 "areaAdministrationLecturer": {
499 "title": "Lecturer",
500 "type": "boolean",
501 "description": "Lecturer",
502 "default": false
503 },
504 "areaAdministrationRest": {
505 "title": "Rest",
506 "type": "boolean",
507 "description": "Rest",
508 "default": false
509 },
510 "salaryDefined": {
511 "sectionCaption": "Salary",
512 "sectionDescription": "Select desired salary.",
513 "title": "Salary defined",
514 "type": "boolean",
515 "description": "Show only jobs with defined salary.",
516 "default": false
517 },
518 "salaryCurrency": {
519 "title": "Salary Currency",
520 "type": "string",
521 "description": "Currency of the salary",
522 "enum": ["CZK", "EUR"],
523 "enumTitles": ["CZK", "EUR"],
524 "prefill": "CZK",
525 "default": "CZK"
526 },
527 "salaryType": {
528 "title": "Salary Type",
529 "type": "string",
530 "description": "Type of the salary",
531 "enumTitles": ["Hourly", "Monthly"],
532 "enum": ["hourly", "monthly"],
533 "prefill": "hourly",
534 "default": "hourly"
535 },
536 "salaryMin": {
537 "title": "Minimal Salary",
538 "description": "Minimal salary",
539 "type": "integer",
540 "editor": "number",
541 "prefill": 0,
542 "default": 0,
543 "minimum": 0
544 }
545 }
546}
.vscode/launch.json
1{
2 "version": "0.2.0",
3 "configurations": [
4 {
5 "type": "node",
6 "request": "launch",
7 "name": "Launch via apify",
8 "runtimeExecutable": "apify",
9 "runtimeArgs": [
10 "run",
11 "--purge"
12 ],
13 "skipFiles": [
14 "<node_internals>/**"
15 ],
16 }
17 ]
18}
src/config.ts
1import { Config } from './types/config.js';
2
3export const config: Config = {
4 adsPerPage: 20,
5 cityIds: {
6 prague: 'ChIJi3lwCZyTC0cRkEAWZg-vAAQ',
7 brno: 'ChIJEVE_wDqUEkcRsLEUZg-vAAQ',
8 ostrava: 'ChIJneckMWpYEUcRRJGen_tzGYk',
9 czechia: 'ChIJQ4Ld14-UC0cRb1jb03UcZvg',
10 sanFrancisco: 'ChIJIQBpAG2ahYAR_6128GcTUEo',
11 newYork: 'ChIJOwg_06VPwokRYv534QaPC8g',
12 london: 'ChIJdd4hrwug2EcRmSrV3Vo6llI',
13 },
14 originUrl: 'https://www.startupjobs.cz',
15 fetchUrl: 'https://www.startupjobs.cz/api/offers',
16};
src/main.ts
1import { Actor } from 'apify';
2import { HttpCrawler } from 'crawlee';
3
4import { config } from './config.js';
5import { generateRequest } from './helpers/helper.js';
6import { ResponseParser } from './parsers/responseParser.js';
7import { Body } from './types/body.js';
8import { Input } from './types/input.js';
9
10await Actor.main(async () => {
11 const input = await Actor.getInput<Input>();
12
13 if (!input) throw new Error('Input is missing');
14
15 const proxyConfiguration = await Actor.createProxyConfiguration();
16
17 const crawler = new HttpCrawler({
18 proxyConfiguration,
19 requestHandler: async ({ body }) => {
20 const json: Body = JSON.parse(body.toString());
21 const responseParser = new ResponseParser(json);
22
23 responseParser.ads.forEach(async (ad) => {
24 await Actor.pushData(ad);
25 });
26
27 if (responseParser.hasNextPage && responseParser.currentPage * config.adsPerPage < input.numAds) {
28 await crawler.addRequests([generateRequest(input, responseParser.currentPage + 1)]);
29 }
30 },
31 });
32
33 await crawler.run([generateRequest(input, 1)]);
34});
test/adParser.test.ts
1import { AdParser } from '../src/parsers/adParser.js';
2import { ResultSetEntity } from '../src/types/body.js';
3
4describe('AdParser', () => {
5 let resultEntity: ResultSetEntity;
6 let adParser: AdParser;
7 let originUrl: string;
8
9 beforeEach(() => {
10 resultEntity = {
11 id: 1,
12 name: 'Test Name',
13 description: 'Test Description',
14 mainAreaName: 'Test Role',
15 areaNames: ['Area1', 'Area2'],
16 url: '/test-url',
17 company: 'Test Company',
18 companyType: 'Tech',
19 isStartup: true,
20 imageUrl: 'http://test.com/logo.png',
21 companyAreas: ['Area1', 'Area2'],
22 locations: 'Location1, Location2',
23 shifts: 'Shift1, Shift2',
24 seniorities: ['Junior', 'Mid', 'Senior'],
25 benefits: [0, 1],
26 collaborations: 'Collaboration1, Collaboration2',
27 isHot: true,
28 isRemote: false,
29 isTop: true,
30 salary: { max: 1000, min: 500, measure: 'monthly', currency: 'USD' },
31 };
32
33 originUrl = 'https://www.startupjobs.cz';
34
35 adParser = new AdParser(resultEntity);
36 });
37
38 test('should return name', () => {
39 expect(adParser.name).toBe(resultEntity.name);
40 });
41
42 test('should return description', () => {
43 expect(adParser.description).toBe(resultEntity.description);
44 });
45
46 test('should return roleName', () => {
47 expect(adParser.roleName).toBe(resultEntity.mainAreaName);
48 });
49
50 test('should return roleAreas', () => {
51 expect(adParser.roleAreas).toBe(resultEntity.areaNames);
52 });
53
54 test('should return url', () => {
55 expect(adParser.url).toBe(originUrl + resultEntity.url);
56 });
57
58 test('should return company', () => {
59 expect(adParser.company).toBe(resultEntity.company);
60 });
61
62 test('should return companyType', () => {
63 expect(adParser.companyType).toBe(resultEntity.companyType);
64 });
65
66 test('should return companyIsSturtup', () => {
67 expect(adParser.companyIsSturtup).toBe(resultEntity.isStartup);
68 });
69
70 test('should return companyLogo', () => {
71 expect(adParser.companyLogo).toBe(resultEntity.imageUrl);
72 });
73
74 test('should return companyAreas', () => {
75 expect(adParser.companyAreas).toBe(resultEntity.companyAreas);
76 });
77
78 test('should return locations', () => {
79 expect(adParser.locations).toEqual(
80 resultEntity.locations.split(',').map((location) => location.trim()),
81 );
82 });
83
84 test('should return shifts', () => {
85 expect(adParser.shifts).toEqual(
86 resultEntity.shifts.split(',').map((shift) => shift.trim()),
87 );
88 });
89
90 test('should return seniorities', () => {
91 expect(adParser.seniorities).toBe(resultEntity.seniorities);
92 });
93
94 test('should return benefits', () => {
95 expect(adParser.benefits).toBe(resultEntity.benefits);
96 });
97
98 test('should return collaborationTypes', () => {
99 expect(adParser.collaborationTypes).toEqual(
100 resultEntity.collaborations
101 .split(',')
102 .map((collaboration) => collaboration.trim()),
103 );
104 });
105
106 test('should return isHot', () => {
107 expect(adParser.isHot).toBe(resultEntity.isHot);
108 });
109
110 test('should return isRemote', () => {
111 expect(adParser.isRemote).toBe(resultEntity.isRemote);
112 });
113
114 test('should return isTop', () => {
115 expect(adParser.isTop).toBe(resultEntity.isTop);
116 });
117
118 test('should return salary', () => {
119 expect(adParser.salary).toBe(resultEntity.salary);
120 });
121});
src/builders/adBuilder.ts
1import { AdParser } from '../parsers/adParser.js';
2import { Ad } from '../types/ad.js';
3
4export class AdBuilder {
5 adParser: AdParser;
6 ad: Ad = {} as Ad;
7
8 constructor(adParser: AdParser) {
9 this.adParser = adParser;
10 }
11
12 addName() {
13 this.ad.name = this.adParser.name;
14 return this;
15 }
16
17 addDescription() {
18 this.ad.description = this.adParser.description;
19 return this;
20 }
21
22 addMainArea() {
23 this.ad.mainArea = this.adParser.roleName;
24 return this;
25 }
26
27 addUrl() {
28 this.ad.url = this.adParser.url;
29 return this;
30 }
31
32 addCompany() {
33 this.ad.company = this.adParser.company;
34 return this;
35 }
36
37 addCompanyType() {
38 this.ad.companyType = this.adParser.companyType;
39 return this;
40 }
41
42 addCompanyIsSturtup() {
43 this.ad.companyIsSturtup = this.adParser.companyIsSturtup;
44 return this;
45 }
46
47 addCompanyLogo() {
48 this.ad.companyLogo = this.adParser.companyLogo;
49 return this;
50 }
51
52 addCompanyAreas() {
53 this.ad.companyAreas = this.adParser.companyAreas;
54 return this;
55 }
56
57 addLocations() {
58 this.ad.locations = this.adParser.locations;
59 return this;
60 }
61
62 addShifts() {
63 this.ad.shifts = this.adParser.shifts;
64 return this;
65 }
66
67 addAreas() {
68 this.ad.areas = this.adParser.roleAreas;
69 return this;
70 }
71
72 addSeniorities() {
73 this.ad.seniorities = this.adParser.seniorities;
74 return this;
75 }
76
77 addBenefits() {
78 this.ad.benefits = this.adParser.benefits;
79 return this;
80 }
81
82 addCollaborationTypes() {
83 this.ad.collaborationTypes = this.adParser.collaborationTypes;
84 return this;
85 }
86
87 addIsHot() {
88 this.ad.isHot = this.adParser.isHot;
89 return this;
90 }
91
92 addIsRemote() {
93 this.ad.isRemote = this.adParser.isRemote;
94 return this;
95 }
96
97 addIsTop() {
98 this.ad.isTop = this.adParser.isTop;
99 return this;
100 }
101
102 addSalary() {
103 this.ad.salary = this.adParser.salary;
104 return this;
105 }
106
107 build() {
108 return this.ad;
109 }
110}
src/builders/requestBuilder.ts
1import { RequestOptions } from 'crawlee';
2
3import { config } from '../config.js';
4import { Input } from '../types/input.js';
5
6export class RequestBuilder {
7 page: number;
8 input: Input;
9 salary: string;
10 collaboration: string;
11 shift: string;
12 area: string;
13
14 constructor(input: Input, page?: number) {
15 this.page = page ?? 0;
16 this.input = input;
17
18 this.collaboration = '';
19 this.shift = '';
20 this.area = '';
21 this.salary = '';
22 }
23
24 addCollaboration(): RequestBuilder {
25 if (this.input.typeOfCooperationFreelance) this.collaboration += `&collaboration[]=freelance`;
26 if (this.input.typeOfCooperationInternship) this.collaboration += `&collaboration[]=internship`;
27 if (this.input.typeOfCooperationContract) this.collaboration += `&collaboration[]=pracovni-smlouva`;
28 if (this.input.workplaceOnsite) this.collaboration += `&collaboration[]=onsite`;
29 if (this.input.workplaceRemote) this.collaboration += `&collaboration[]=remote`;
30 if (this.input.workplaceHybrid) this.collaboration += `&collaboration[]=hybridni`;
31
32 return this;
33 }
34
35 addShift(): RequestBuilder {
36 if (this.input.typeOfEmploymentFullTime) this.shift += `&shift[]=full-time`;
37 if (this.input.typeOfEmploymentPartTime) this.shift += `&shift[]=part-time`;
38
39 return this;
40 }
41
42 addSeniority(): RequestBuilder {
43 if (this.input.seniorityJunior) this.area += `&seniority[]=junior`;
44 if (this.input.seniorityMedior) this.area += `&seniority[]=medior`;
45 if (this.input.senioritySenior) this.area += `&seniority[]=senior`;
46
47 return this;
48 }
49
50 addLocation(): RequestBuilder {
51 if (this.input.locationPrague) this.area += `&location[]=${config.cityIds.prague}`;
52 if (this.input.locationBrno) this.area += `&location[]=${config.cityIds.brno}`;
53 if (this.input.locationOstrava) this.area += `&location[]=${config.cityIds.ostrava}`;
54 if (this.input.locationCzechia) this.area += `&location[]=${config.cityIds.czechia}`;
55 if (this.input.locationSanFrancisco) this.area += `&location[]=${config.cityIds.sanFrancisco}`;
56 if (this.input.locationNewYork) this.area += `&location[]=${config.cityIds.newYork}`;
57 if (this.input.locationLondon) this.area += `&location[]=${config.cityIds.london}`;
58
59 return this;
60 }
61
62 addArea(): RequestBuilder {
63 if (this.input.areasDevelopmentBackEnd) this.area += `&area[]=vyvoj/back-end`;
64 if (this.input.areasDevelopmentFrontEnd) this.area += `&area[]=vyvoj/front-end-koder`;
65 if (this.input.areasDevelopmentDatabase) this.area += `&area[]=vyvoj/databaze`;
66 if (this.input.areasDevelopmentDevOps) this.area += `&area[]=vyvoj/devops`;
67 if (this.input.areasDevelopmentTesting) this.area += `&area[]=vyvoj/testovani`;
68 if (this.input.areasDevelopmentMobile) this.area += `&area[]=vyvoj/mobilni-vyvoj`;
69 if (this.input.areasDevelopmentHardware) this.area += `&area[]=vyvoj/hardware-firmware`;
70 if (this.input.areasDevelopmentSecurity) this.area += `&area[]=vyvoj/it-security`;
71 if (this.input.areasDevelopmentSoftwareArchitect) this.area += `&area[]=vyvoj/software-architekt`;
72 if (this.input.areasDevelopmentItConsulting) this.area += `&area[]=vyvoj/it-konzultant`;
73 if (this.input.areasDevelopmentProjectManagement) this.area += `&area[]=vyvoj/project-manager`;
74 if (this.input.areasDevelopmentProductOwner) this.area += `&area[]=vyvoj/product-manager-a-product-owner`;
75 if (this.input.areasDevelopmentScrumMaster) this.area += `&area[]=vyvoj/scrum-master`;
76 if (this.input.areasDevelopmentDesign) this.area += `&area[]=vyvoj/design`;
77 if (this.input.areasDevelopment3dPrinting) this.area += `&area[]=vyvoj/3d-tisk`;
78 if (this.input.areasDevelopmentGameDev) this.area += `&area[]=vyvoj/herni-vyvoj`;
79 if (this.input.areasDevelopmentAI) this.area += `&area[]=vyvoj/umela-inteligence`;
80 if (this.input.areasDevelopmentMachineLearning) this.area += `&area[]=vyvoj/machine-learning`;
81 if (this.input.areasDevelopmentVR) this.area += `&area[]=vyvoj/virtualni-rozsirena-realita`;
82 if (this.input.areasDevelopmentIoT) this.area += `&area[]=vyvoj/internet-veci`;
83 if (this.input.areasDevelopmentBlockchain) this.area += `&area[]=vyvoj/blockchain`;
84 if (this.input.areasDevelopmentNoCode) this.area += `&area[]=vyvoj/no-code`;
85 if (this.input.areasDevelopmentTechWriter) this.area += `&area[]=vyvoj/tech-writer`;
86 if (this.input.areasDevelopmentEmbeddedSystems) this.area += `&area[]=vyvoj/embedded-systems`;
87
88 if (this.input.areasMarketingMarketing) this.area += `&area[]=marketing-a-obchod/marketing`;
89 if (this.input.areasMarketingSales) this.area += `&area[]=marketing-a-obchod/obchod`;
90 if (this.input.areasMarketingCreativity) this.area += `&area[]=marketing-a-obchod/kreativa`;
91
92 if (this.input.areasAnalyticsBI) this.area += `&area[]=analytika/bi-business-analytik`;
93 if (this.input.areasAnalyticsDataAnalysis) this.area += `&area[]=analytika/data-analytik`;
94 if (this.input.areasAnalyticsITAnalysis) this.area += `&area[]=analytika/it-analytik`;
95 if (this.input.areasAnalyticsWebAnalysis) this.area += `&area[]=analytika/webovy-analytik`;
96 if (this.input.areasAnalyticsMarketingAnalysis) this.area += `&area[]=analytika/marketingovy-analytik`;
97
98 if (this.input.areasManagementAccount) this.area += `&area[]=management/account-manager`;
99 if (this.input.areasManagementBusinessDevelopment) this.area += `&area[]=management/business-development`;
100 if (this.input.areasManagementManagement) this.area += `&area[]=management/manager`;
101 if (this.input.areasManagementMarketing) this.area += `&area[]=management/marketing-manager`;
102 if (this.input.areasManagementSales) this.area += `&area[]=management/sales-manager`;
103 if (this.input.areasManagementScrumMaster) this.area += `&area[]=management/scrum-master`;
104 if (this.input.areasManagementOffice) this.area += `&area[]=management/office-manager`;
105 if (this.input.areasManagementProductOwner) this.area += `&area[]=management/product-manager-a-product-owner`;
106 if (this.input.areasManagementProjectManagement) this.area += `&area[]=management/project-manager`;
107 if (this.input.areasManagementOperations) this.area += `&area[]=management/provozni-manazer`;
108 if (this.input.areasManagementTeamLead) this.area += `&area[]=management/team-leader`;
109 if (this.input.areasManagementTopManagement) this.area += `&area[]=management/top-management-c-level`;
110 if (this.input.areasManagementTech) this.area += `&area[]=management/technicky-manazer`;
111
112 if (this.input.areaAdministrationAdministration) this.area += `&area[]=administrativa-a-jine/administrativa`;
113 if (this.input.areaAdministrationHR) this.area += `&area[]=administrativa-a-jine/hr`;
114 if (this.input.areaAdministrationLecturer) this.area += `&area[]=administrativa-a-jine/lektor`;
115 if (this.input.areaAdministrationRest) this.area += `&area[]=administrativa-a-jine/ostatni`;
116
117 return this;
118 }
119
120 addSalary(): RequestBuilder {
121 if (this.input.salaryDefined) this.salary += `&salary[]=1`;
122
123 this.salary += `&salaryRange[]=${
124 JSON.stringify({
125 value: this.input.salaryMin,
126 currency: this.input.salaryCurrency,
127 measure: this.input.salaryType,
128 })
129 }`;
130
131 return this;
132 }
133
134 build(): RequestOptions {
135 return {
136 url: `${config.fetchUrl}?page=${this.page}${this.salary}${this.collaboration}${this.shift}${this.area}`,
137 method: 'GET',
138 headers: {
139 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
140 origin: config.originUrl,
141 },
142 };
143 }
144}
src/helpers/helper.ts
1import { RequestOptions } from 'crawlee';
2
3import { RequestBuilder } from '../builders/requestBuilder.js';
4import { Input } from '../types/input.js';
5
6export function generateRequest(input: Input, page: number): RequestOptions {
7 const requestBuilder = new RequestBuilder(input, page);
8
9 const request: RequestOptions = requestBuilder
10 .addShift()
11 .addSeniority()
12 .addLocation()
13 .addSalary()
14 .addCollaboration()
15 .addArea()
16 .build();
17
18 return request;
19}
src/parsers/adParser.ts
1import { config } from '../config.js';
2import { ResultSetEntity } from '../types/body.js';
3
4export class AdParser {
5 setEntity: ResultSetEntity;
6
7 constructor(setEntity: ResultSetEntity) {
8 this.setEntity = setEntity;
9 }
10
11 get name() {
12 return this.setEntity.name;
13 }
14
15 get description() {
16 return this.setEntity.description;
17 }
18
19 get roleName() {
20 return this.setEntity.mainAreaName;
21 }
22
23 get roleAreas() {
24 return this.setEntity.areaNames;
25 }
26
27 get url() {
28 return config.originUrl + this.setEntity.url;
29 }
30
31 get company() {
32 return this.setEntity.company;
33 }
34
35 get companyType() {
36 return this.setEntity.companyType;
37 }
38
39 get companyIsSturtup() {
40 return this.setEntity.isStartup;
41 }
42
43 get companyLogo() {
44 return this.setEntity.imageUrl;
45 }
46
47 get companyAreas() {
48 return this.setEntity.companyAreas;
49 }
50
51 get locations() {
52 return this.setEntity.locations.split(',').map((location) => location.trim());
53 }
54
55 get shifts() {
56 return this.setEntity.shifts.split(',').map((shift) => shift.trim());
57 }
58
59 get seniorities() {
60 return this.setEntity.seniorities;
61 }
62
63 get benefits() {
64 return this.setEntity.benefits;
65 }
66
67 get collaborationTypes() {
68 return this.setEntity.collaborations.split(',').map((collaboration) => collaboration.trim());
69 }
70
71 get isHot() {
72 return this.setEntity.isHot;
73 }
74
75 get isRemote() {
76 return this.setEntity.isRemote;
77 }
78
79 get isTop() {
80 return this.setEntity.isTop;
81 }
82
83 get salary() {
84 return this.setEntity.salary;
85 }
86}
src/parsers/responseParser.ts
1import { AdParser } from './adParser.js';
2import { AdBuilder } from '../builders/adBuilder.js';
3import { Ad } from '../types/ad.js';
4import { Body } from '../types/body.js';
5
6export class ResponseParser {
7 body: Body;
8
9 constructor(body: Body) {
10 this.body = body;
11 }
12
13 get ads(): Ad[] {
14 const ads: Ad[] = [];
15
16 this.body.resultSet.forEach((resultSetEntity) => {
17 const adParser = new AdParser(resultSetEntity);
18 const ad = new AdBuilder(adParser)
19 .addName()
20 .addDescription()
21 .addMainArea()
22 .addUrl()
23 .addCompany()
24 .addCompanyType()
25 .addCompanyIsSturtup()
26 .addCompanyLogo()
27 .addCompanyAreas()
28 .addLocations()
29 .addShifts()
30 .addAreas()
31 .addSeniorities()
32 .addBenefits()
33 .addCollaborationTypes()
34 .addIsHot()
35 .addIsRemote()
36 .addIsTop()
37 .addSalary()
38 .build();
39
40 ads.push(ad);
41 });
42
43 return ads;
44 }
45
46 get hasNextPage(): boolean {
47 return this.currentPage < this.body.paginator.max;
48 }
49
50 get currentPage(): number {
51 return this.body.paginator.current;
52 }
53}
src/types/ad.d.ts
1import { Salary } from './body.js';
2
3export type Ad = {
4 name: string;
5 description: string;
6 mainArea: string;
7 url: string;
8 company: string;
9 companyType: string;
10 companyIsSturtup: boolean;
11 companyLogo: string;
12 companyAreas: string[];
13 locations: string[];
14 shifts: string[];
15 areas: string[];
16 seniorities: string[];
17 benefits: number[];
18 collaborationTypes: string[];
19 isHot: boolean;
20 isRemote: boolean;
21 isTop: boolean;
22 salary: Salary;
23}
src/types/body.d.ts
1export interface Body {
2 resultSet: ResultSetEntity[]
3 resultCount: number
4 paginator: Paginator
5 permanentUrlForResultSet: PermanentUrlForResultSet
6 seo: Seo
7}
8
9export interface ResultSetEntity {
10 id: number
11 name: string
12 description: string
13 url: string
14 company: string
15 companyType: string
16 mainAreaName: string
17 imageUrl: string
18 locations: string
19 shifts: string
20 areaSlugs?: string[];
21 areaNames: string[];
22 seniorities: string[];
23 benefits: number[];
24 collaborations: string
25 isHot: boolean;
26 isRemote: boolean;
27 isTop: boolean;
28 companyAreas: string[];
29 isStartup: boolean;
30 salary: Salary;
31}
32
33export interface Salary {
34 max: number;
35 min: number;
36 measure: string;
37 currency: string;
38}
39
40export interface Paginator {
41 current: number;
42 max: number;
43}
44
45export interface PermanentUrlForResultSet {
46 cs: string;
47 en: string;
48}
49
50export interface Seo {
51 title: string;
52 heading: string;
53 customHtml?: undefined;
54}
src/types/config.d.ts
1export type Config = {
2 adsPerPage: number;
3 cityIds: {
4 prague: string;
5 brno: string;
6 ostrava: string;
7 czechia: string;
8 sanFrancisco: string;
9 newYork: string;
10 london: string;
11 };
12 originUrl: string;
13 fetchUrl: string;
14}
src/types/input.d.ts
1export type Input = {
2 numAds: number;
3 workplaceRemote: boolean;
4 workplaceHybrid: boolean;
5 workplaceOnsite: boolean;
6 locationPrague: boolean;
7 locationBrno: boolean;
8 locationOstrava: boolean;
9 locationCzechia: boolean;
10 locationSanFrancisco: boolean;
11 locationNewYork: boolean;
12 locationLondon: boolean;
13 seniorityJunior: boolean;
14 seniorityMedior: boolean;
15 senioritySenior: boolean;
16 typeOfEmploymentFullTime: boolean;
17 typeOfEmploymentPartTime: boolean;
18 typeOfCooperationFreelance: boolean;
19 typeOfCooperationInternship: boolean;
20 typeOfCooperationContract: boolean;
21 areasDevelopmentBackEnd: boolean;
22 areasDevelopmentFrontEnd: boolean;
23 areasDevelopmentDatabase: boolean;
24 areasDevelopmentDevOps: boolean;
25 areasDevelopmentTesting: boolean;
26 areasDevelopmentMobile: boolean;
27 areasDevelopmentHardware: boolean;
28 areasDevelopmentSecurity: boolean;
29 areasDevelopmentSoftwareArchitect: boolean;
30 areasDevelopmentItConsulting: boolean;
31 areasDevelopmentProjectManagement: boolean;
32 areasDevelopmentProductOwner: boolean;
33 areasDevelopmentScrumMaster: boolean;
34 areasDevelopmentDesign: boolean;
35 areasDevelopment3dPrinting: boolean;
36 areasDevelopmentGameDev: boolean;
37 areasDevelopmentAI: boolean;
38 areasDevelopmentMachineLearning: boolean;
39 areasDevelopmentVR: boolean;
40 areasDevelopmentIoT: boolean;
41 areasDevelopmentBlockchain: boolean;
42 areasDevelopmentNoCode: boolean;
43 areasDevelopmentTechWriter: boolean;
44 areasDevelopmentEmbeddedSystems: boolean;
45 areasMarketingMarketing: boolean;
46 areasMarketingSales: boolean;
47 areasMarketingCreativity: boolean;
48 areasAnalyticsBI: boolean;
49 areasAnalyticsDataAnalysis: boolean;
50 areasAnalyticsITAnalysis: boolean;
51 areasAnalyticsWebAnalysis: boolean;
52 areasAnalyticsMarketingAnalysis: boolean;
53 areasManagementAccount: boolean;
54 areasManagementBusinessDevelopment: boolean;
55 areasManagementManagement: boolean;
56 areasManagementMarketing: boolean;
57 areasManagementSales: boolean;
58 areasManagementScrumMaster: boolean;
59 areasManagementOffice: boolean;
60 areasManagementProductOwner: boolean;
61 areasManagementProjectManagement: boolean;
62 areasManagementOperations: boolean;
63 areasManagementTopManagement: boolean;
64 areasManagementTeamLead: boolean;
65 areasManagementTech: boolean;
66 areaAdministrationAdministration: boolean;
67 areaAdministrationHR: boolean;
68 areaAdministrationLecturer: boolean;
69 areaAdministrationRest: boolean;
70 salaryDefined: boolean;
71 salaryCurrency: string;
72 salaryType: string;
73 salaryMin: number;
74}
Developer
Maintained by Community
Actor Metrics
5 monthly users
-
1 star
>99% runs succeeded
Created in Jun 2024
Modified 6 months ago
Categories