Startup Jobs Scraper avatar
Startup Jobs Scraper

Under maintenance

Pricing

Pay per usage

Go to Store
Startup Jobs Scraper

Startup Jobs Scraper

Under maintenance

Developed by

Martin Fanta

Martin Fanta

Maintained by Community

StartupJobs.cz scraper extracts job listings from the site, gathering job titles, company names, locations, and descriptions. It automates data collection, providing up-to-date job information efficiently.

0.0 (0)

Pricing

Pay per usage

1

Total users

24

Monthly users

4

Runs succeeded

>99%

Last modified

a year ago

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git
# dist folder
dist

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"root": true,
"env": {
"browser": true,
"es2020": true,
"node": true
},
"extends": [
"@apify/eslint-config-ts"
],
"parserOptions": {
"project": "./tsconfig.json",
"ecmaVersion": 2020
},
"ignorePatterns": [
"node_modules",
"dist",
"**/*.d.ts"
]
}

.gitignore

storage
apify_storage
crawlee_storage
node_modules
dist
tsconfig.tsbuildinfo
storage/*
!storage/key_value_stores
storage/key_value_stores/*
!storage/key_value_stores/default
storage/key_value_stores/default/*
!storage/key_value_stores/default/INPUT.json
# Added by Apify CLI
.venv

jest.config.js

1/** @type {import('ts-jest').JestConfigWithTsJest} */
2export default {
3 preset: 'ts-jest',
4 testEnvironment: 'node',
5 moduleFileExtensions: ['ts', 'js'],
6 globals: {
7 'ts-jest': {
8 tsconfig: 'tsconfig.json',
9 },
10 },
11 moduleNameMapper: {
12 '^(\\.\\.?\\/.+)\\.js$': '$1',
13 },
14};

package.json

{
"name": "startup-jobs-scraper",
"version": "0.1.0",
"type": "module",
"description": "This is an example of an Apify actor.",
"engines": {
"node": ">=18.0.0"
},
"dependencies": {
"apify": "^3.1.10",
"crawlee": "^3.10.5"
},
"devDependencies": {
"@apify/eslint-config-ts": "^0.4.1",
"@apify/tsconfig": "^0.1.0",
"@stylistic/eslint-plugin-js": "^2.3.0",
"@types/jest": "^29.5.12",
"@typescript-eslint/eslint-plugin": "^7.14.1",
"@typescript-eslint/parser": "^7.14.1",
"eslint": "^8.57.0",
"jest": "^29.7.0",
"ts-jest": "^29.1.5",
"tsx": "^4.6.2",
"typescript": "^5.3.3"
},
"scripts": {
"start": "npm run start:dev",
"start:prod": "node dist/main.js",
"start:dev": "tsx src/main.ts",
"build": "tsc",
"lint": "eslint ./src --ext .ts",
"lint:fix": "eslint ./src --ext .ts --fix",
"test": "jest"
},
"author": "Martin Fanta",
"license": "MIT"
}

tsconfig.json

{
"extends": "@apify/tsconfig",
"compilerOptions": {
"esModuleInterop": true,
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ES2022",
"outDir": "dist",
"noUnusedLocals": false,
"skipLibCheck": true,
"lib": ["DOM"],
},
"include": [
"./src/**/*",
]
}

.actor/actor.json

{
"actorSpecification": 1,
"name": "startup-jobs-scraper",
"title": "StartupJobs Scraper",
"description": "StartupJobs.cz scraper extracts job listings from the site, gathering job titles, company names, locations, and descriptions. It automates data collection, providing up-to-date job information efficiently.",
"version": "0.1",
"meta": {
"templateId": "ts-start"
},
"input": "./input_schema.json",
"dockerfile": "./Dockerfile",
"storages": {
"dataset": {
"actorSpecification": 1,
"fields": {},
"views": {
"overview": {
"title": "Overview",
"transformation": {
"fields": [
"companyLogo",
"name",
"description",
"mainArea",
"company",
"areas",
"url"
]
},
"display": {
"component": "table",
"properties": {
"companyLogo": {
"label": "Company Logo",
"format": "image"
},
"name": {
"label": "Name",
"format": "text"
},
"description": {
"label": "Description",
"format": "text"
},
"mainArea": {
"label": "Main Area",
"format": "text"
},
"company": {
"label": "Company",
"format": "text"
},
"areas": {
"label": "Areas",
"format": "array"
},
"url": {
"label": "Link",
"format": "link"
}
}
}
}
}
}
}
}

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:20 AS builder
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install all dependencies. Don't audit to speed up the installation.
RUN npm install --include=dev --audit=false
# Next, copy the source files using the user set
# in the base image.
COPY . ./
# Install all dependencies and build the project.
# Don't audit to speed up the installation.
RUN npm run build
# Create final image
FROM apify/actor-node:20
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Copy built JS files from builder image
COPY --from=builder /usr/src/app/dist ./dist
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Run the image.
CMD npm run start:prod --silent

.actor/input_schema.json

{
"title": "Startup Jobs Scraper",
"type": "object",
"schemaVersion": 1,
"required": [
"numAds",
"workplaceRemote",
"workplaceHybrid",
"workplaceOnsite",
"locationPrague",
"locationBrno",
"locationOstrava",
"locationCzechia",
"locationSanFrancisco",
"locationNewYork",
"locationLondon",
"seniorityJunior",
"seniorityMedior",
"senioritySenior",
"typeOfEmploymentFullTime",
"typeOfEmploymentPartTime",
"typeOfCooperationFreelance",
"typeOfCooperationInternship",
"typeOfCooperationContract",
"areasDevelopmentBackEnd",
"areasDevelopmentFrontEnd",
"areasDevelopmentDatabase",
"areasDevelopmentDevOps",
"areasDevelopmentTesting",
"areasDevelopmentMobile",
"areasDevelopmentHardware",
"areasDevelopmentSecurity",
"areasDevelopmentSoftwareArchitect",
"areasDevelopmentProjectManagement",
"areasDevelopmentProductOwner",
"areasDevelopmentScrumMaster",
"areasDevelopmentDesign",
"areasDevelopment3dPrinting",
"areasDevelopmentGameDev",
"areasDevelopmentAI",
"areasDevelopmentMachineLearning",
"areasDevelopmentVR",
"areasDevelopmentIoT",
"areasDevelopmentBlockchain",
"areasDevelopmentNoCode",
"areasDevelopmentTechWriter",
"areasDevelopmentEmbeddedSystems",
"areasMarketingMarketing",
"areasMarketingSales",
"areasMarketingCreativity",
"areasAnalyticsBI",
"areasAnalyticsDataAnalysis",
"areasAnalyticsITAnalysis",
"areasAnalyticsWebAnalysis",
"areasAnalyticsMarketingAnalysis",
"areasManagementAccount",
"areasManagementBusinessDevelopment",
"areasManagementManagement",
"areasManagementMarketing",
"areasManagementSales",
"areasManagementScrumMaster",
"areasManagementOffice",
"areasManagementProductOwner",
"areasManagementProjectManagement",
"areasManagementOperations",
"areasManagementTopManagement",
"areasManagementTeamLead",
"areasManagementTech",
"areaAdministrationAdministration",
"areaAdministrationHR",
"areaAdministrationLecturer",
"areaAdministrationRest",
"salaryDefined",
"salaryCurrency",
"salaryType",
"salaryMin"
],
"properties": {
"numAds": {
"title": "Number of jobs",
"description": "Total number of jobs to be fetched.",
"type": "integer",
"editor": "number",
"default": 40,
"prefill": 40,
"minimum": 20
},
"workplaceRemote": {
"title": "Remote",
"type": "boolean",
"description": "Remote work arrangement",
"default": true,
"groupCaption": "Work Arrangement",
"groupDescription": "Select desired work arrangements of the job."
},
"workplaceHybrid": {
"title": "Hybrid",
"type": "boolean",
"description": "Hybrid work arrangement",
"default": true
},
"workplaceOnsite": {
"title": "On-site",
"type": "boolean",
"description": "On-site work arrangement",
"default": true
},
"locationPrague": {
"title": "Prague",
"type": "boolean",
"description": "Job in Prague",
"default": true,
"groupCaption": "Location",
"groupDescription": "Select desired location of the job."
},
"locationBrno": {
"title": "Brno",
"type": "boolean",
"description": "Job in Brno",
"default": true
},
"locationOstrava": {
"title": "Ostrava",
"type": "boolean",
"description": "Job in Ostrava",
"default": true
},
"locationCzechia": {
"title": "Czechia",
"type": "boolean",
"description": "Job in Czechia",
"default": true
},
"locationSanFrancisco": {
"title": "San Francisco",
"type": "boolean",
"description": "Job in San Francisco",
"default": false
},
"locationNewYork": {
"title": "New York",
"type": "boolean",
"description": "Job in New York",
"default": false
},
"locationLondon": {
"title": "London",
"type": "boolean",
"description": "Job in London",
"default": false
},
"seniorityJunior": {
"title": "Junior",
"type": "boolean",
"description": "Junior position",
"default": true,
"groupCaption": "Seniority",
"groupDescription": "Select desired seniority of the job."
},
"seniorityMedior": {
"title": "Medior",
"type": "boolean",
"description": "Medior position",
"default": true
},
"senioritySenior": {
"title": "Senior",
"type": "boolean",
"description": "Senior position",
"default": true
},
"typeOfEmploymentFullTime": {
"title": "Full-time",
"type": "boolean",
"description": "Full-time job",
"default": true,
"groupCaption": "Type of Employment",
"groupDescription": "Select desired type of employment."
},
"typeOfEmploymentPartTime": {
"title": "Part-time",
"type": "boolean",
"description": "Part-time job",
"default": false
},
"typeOfCooperationFreelance": {
"title": "Freelance",
"type": "boolean",
"description": "Freelance job",
"default": true
},
"typeOfCooperationInternship": {
"title": "Internship",
"type": "boolean",
"description": "Internship",
"default": true
},
"typeOfCooperationContract": {
"title": "Contract",
"type": "boolean",
"description": "Contract job",
"default": true
},
"areasDevelopmentBackEnd": {
"sectionCaption": "Job areas",
"sectionDescription": "Select desired areas.",
"title": "Back-end Development",
"type": "boolean",
"description": "Back-end development",
"default": true,
"groupCaption": "Development",
"groupDescription": "Select desired areas of development."
},
"areasDevelopmentFrontEnd": {
"title": "Front-end Development",
"type": "boolean",
"description": "Front-end development",
"default": true
},
"areasDevelopmentDatabase": {
"title": "Database Development",
"type": "boolean",
"description": "Database development",
"default": true
},
"areasDevelopmentDevOps": {
"title": "DevOps",
"type": "boolean",
"description": "DevOps",
"default": false
},
"areasDevelopmentTesting": {
"title": "Testing",
"type": "boolean",
"description": "Testing",
"default": false
},
"areasDevelopmentMobile": {
"title": "Mobile Development",
"type": "boolean",
"description": "Mobile development",
"default": false
},
"areasDevelopmentHardware": {
"title": "Hardware Development",
"type": "boolean",
"description": "Hardware development",
"default": false
},
"areasDevelopmentSecurity": {
"title": "IT Security",
"type": "boolean",
"description": "Security",
"default": false
},
"areasDevelopmentSoftwareArchitect": {
"title": "Software Architect",
"type": "boolean",
"description": "Software architect",
"default": false
},
"areasDevelopmentItConsulting": {
"title": "IT Consulting",
"type": "boolean",
"description": "IT consulting",
"default": false
},
"areasDevelopmentProjectManagement": {
"title": "Project Management",
"type": "boolean",
"description": "Project management",
"default": false
},
"areasDevelopmentProductOwner": {
"title": "Product Owner",
"type": "boolean",
"description": "Product owner",
"default": false
},
"areasDevelopmentScrumMaster": {
"title": "Scrum Master",
"type": "boolean",
"description": "Scrum master",
"default": false
},
"areasDevelopmentDesign": {
"title": "Design",
"type": "boolean",
"description": "Design",
"default": false
},
"areasDevelopment3dPrinting": {
"title": "3D Printing",
"type": "boolean",
"description": "3D printing",
"default": false
},
"areasDevelopmentGameDev": {
"title": "Game Dev",
"type": "boolean",
"description": "Game dev",
"default": false
},
"areasDevelopmentAI": {
"title": "AI",
"type": "boolean",
"description": "AI",
"default": false
},
"areasDevelopmentMachineLearning": {
"title": "Machine Learning",
"type": "boolean",
"description": "Machine learning",
"default": false
},
"areasDevelopmentVR": {
"title": "VR / AR",
"type": "boolean",
"description": "VR / AR",
"default": false
},
"areasDevelopmentIoT": {
"title": "Internet of Things",
"type": "boolean",
"description": "IoT",
"default": false
},
"areasDevelopmentBlockchain": {
"title": "Blockchain",
"type": "boolean",
"description": "Blockchain",
"default": false
},
"areasDevelopmentNoCode": {
"title": "No Code",
"type": "boolean",
"description": "No code",
"default": false
},
"areasDevelopmentTechWriter": {
"title": "Tech Writer",
"type": "boolean",
"description": "Tech writer",
"default": false
},
"areasDevelopmentEmbeddedSystems": {
"title": "Embedded Systems",
"type": "boolean",
"description": "Embedded systems",
"default": false
},
"areasMarketingMarketing": {
"title": "Marketing",
"type": "boolean",
"description": "Marketing",
"default": false,
"groupCaption": "Marketing",
"groupDescription": "Select desired areas of marketing."
},
"areasMarketingSales": {
"title": "Sales",
"type": "boolean",
"description": "Sales",
"default": false
},
"areasMarketingCreativity": {
"title": "Creativity",
"type": "boolean",
"description": "Creativity",
"default": false
},
"areasAnalyticsBI": {
"title": "Business Intelligence",
"type": "boolean",
"description": "BI / Business Analytics",
"default": false,
"groupCaption": "Analytics",
"groupDescription": "Select desired areas of analytics."
},
"areasAnalyticsDataAnalysis": {
"title": "Data Analysis",
"type": "boolean",
"description": "Data analysis",
"default": false
},
"areasAnalyticsITAnalysis": {
"title": "IT Analysis",
"type": "boolean",
"description": "IT analysis",
"default": false
},
"areasAnalyticsWebAnalysis": {
"title": "Web Analysis",
"type": "boolean",
"description": "Web analysis",
"default": false
},
"areasAnalyticsMarketingAnalysis": {
"title": "Marketing Analysis",
"type": "boolean",
"description": "Marketing analysis",
"default": false
},
"areasManagementAccount": {
"title": "Account Management",
"type": "boolean",
"description": "Account management",
"default": false,
"groupCaption": "Management",
"groupDescription": "Select desired areas of management."
},
"areasManagementBusinessDevelopment": {
"title": "Business Development",
"type": "boolean",
"description": "Business development",
"default": false
},
"areasManagementManagement": {
"title": "Management",
"type": "boolean",
"description": "Management",
"default": false
},
"areasManagementMarketing": {
"title": "Marketing",
"type": "boolean",
"description": "Marketing",
"default": false
},
"areasManagementSales": {
"title": "Sales",
"type": "boolean",
"description": "Sales",
"default": false
},
"areasManagementScrumMaster": {
"title": "Scrum Master",
"type": "boolean",
"description": "Scrum master",
"default": false
},
"areasManagementOffice": {
"title": "Office Management",
"type": "boolean",
"description": "Office management",
"default": false
},
"areasManagementProductOwner": {
"title": "Product Owner",
"type": "boolean",
"description": "Product owner",
"default": false
},
"areasManagementProjectManagement": {
"title": "Project Management",
"type": "boolean",
"description": "Project management",
"default": false
},
"areasManagementOperations": {
"title": "Operations",
"type": "boolean",
"description": "Operations",
"default": false
},
"areasManagementTopManagement": {
"title": "Top Management",
"type": "boolean",
"description": "Top management (C-level)",
"default": false
},
"areasManagementTeamLead": {
"title": "Team Lead",
"type": "boolean",
"description": "Team lead",
"default": false
},
"areasManagementTech": {
"title": "Tech Management",
"type": "boolean",
"description": "Tech management",
"default": false
},
"areaAdministrationAdministration": {
"title": "Administration",
"type": "boolean",
"description": "Administration",
"default": false,
"groupCaption": "Administration",
"groupDescription": "Select desired areas of administration."
},
"areaAdministrationHR": {
"title": "HR",
"type": "boolean",
"description": "HR",
"default": false
},
"areaAdministrationLecturer": {
"title": "Lecturer",
"type": "boolean",
"description": "Lecturer",
"default": false
},
"areaAdministrationRest": {
"title": "Rest",
"type": "boolean",
"description": "Rest",
"default": false
},
"salaryDefined": {
"sectionCaption": "Salary",
"sectionDescription": "Select desired salary.",
"title": "Salary defined",
"type": "boolean",
"description": "Show only jobs with defined salary.",
"default": false
},
"salaryCurrency": {
"title": "Salary Currency",
"type": "string",
"description": "Currency of the salary",
"enum": ["CZK", "EUR"],
"enumTitles": ["CZK", "EUR"],
"prefill": "CZK",
"default": "CZK"
},
"salaryType": {
"title": "Salary Type",
"type": "string",
"description": "Type of the salary",
"enumTitles": ["Hourly", "Monthly"],
"enum": ["hourly", "monthly"],
"prefill": "hourly",
"default": "hourly"
},
"salaryMin": {
"title": "Minimal Salary",
"description": "Minimal salary",
"type": "integer",
"editor": "number",
"prefill": 0,
"default": 0,
"minimum": 0
}
}
}

.vscode/launch.json

{
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch via apify",
"runtimeExecutable": "apify",
"runtimeArgs": [
"run",
"--purge"
],
"skipFiles": [
"<node_internals>/**"
],
}
]
}

src/config.ts

1import { Config } from './types/config.js';
2
3export const config: Config = {
4 adsPerPage: 20,
5 cityIds: {
6 prague: 'ChIJi3lwCZyTC0cRkEAWZg-vAAQ',
7 brno: 'ChIJEVE_wDqUEkcRsLEUZg-vAAQ',
8 ostrava: 'ChIJneckMWpYEUcRRJGen_tzGYk',
9 czechia: 'ChIJQ4Ld14-UC0cRb1jb03UcZvg',
10 sanFrancisco: 'ChIJIQBpAG2ahYAR_6128GcTUEo',
11 newYork: 'ChIJOwg_06VPwokRYv534QaPC8g',
12 london: 'ChIJdd4hrwug2EcRmSrV3Vo6llI',
13 },
14 originUrl: 'https://www.startupjobs.cz',
15 fetchUrl: 'https://www.startupjobs.cz/api/offers',
16};

src/main.ts

1import { Actor } from 'apify';
2import { HttpCrawler } from 'crawlee';
3
4import { config } from './config.js';
5import { generateRequest } from './helpers/helper.js';
6import { ResponseParser } from './parsers/responseParser.js';
7import { Body } from './types/body.js';
8import { Input } from './types/input.js';
9
10await Actor.main(async () => {
11 const input = await Actor.getInput<Input>();
12
13 if (!input) throw new Error('Input is missing');
14
15 const proxyConfiguration = await Actor.createProxyConfiguration();
16
17 const crawler = new HttpCrawler({
18 proxyConfiguration,
19 requestHandler: async ({ body }) => {
20 const json: Body = JSON.parse(body.toString());
21 const responseParser = new ResponseParser(json);
22
23 responseParser.ads.forEach(async (ad) => {
24 await Actor.pushData(ad);
25 });
26
27 if (responseParser.hasNextPage && responseParser.currentPage * config.adsPerPage < input.numAds) {
28 await crawler.addRequests([generateRequest(input, responseParser.currentPage + 1)]);
29 }
30 },
31 });
32
33 await crawler.run([generateRequest(input, 1)]);
34});

test/adParser.test.ts

1import { AdParser } from '../src/parsers/adParser.js';
2import { ResultSetEntity } from '../src/types/body.js';
3
4describe('AdParser', () => {
5 let resultEntity: ResultSetEntity;
6 let adParser: AdParser;
7 let originUrl: string;
8
9 beforeEach(() => {
10 resultEntity = {
11 id: 1,
12 name: 'Test Name',
13 description: 'Test Description',
14 mainAreaName: 'Test Role',
15 areaNames: ['Area1', 'Area2'],
16 url: '/test-url',
17 company: 'Test Company',
18 companyType: 'Tech',
19 isStartup: true,
20 imageUrl: 'http://test.com/logo.png',
21 companyAreas: ['Area1', 'Area2'],
22 locations: 'Location1, Location2',
23 shifts: 'Shift1, Shift2',
24 seniorities: ['Junior', 'Mid', 'Senior'],
25 benefits: [0, 1],
26 collaborations: 'Collaboration1, Collaboration2',
27 isHot: true,
28 isRemote: false,
29 isTop: true,
30 salary: { max: 1000, min: 500, measure: 'monthly', currency: 'USD' },
31 };
32
33 originUrl = 'https://www.startupjobs.cz';
34
35 adParser = new AdParser(resultEntity);
36 });
37
38 test('should return name', () => {
39 expect(adParser.name).toBe(resultEntity.name);
40 });
41
42 test('should return description', () => {
43 expect(adParser.description).toBe(resultEntity.description);
44 });
45
46 test('should return roleName', () => {
47 expect(adParser.roleName).toBe(resultEntity.mainAreaName);
48 });
49
50 test('should return roleAreas', () => {
51 expect(adParser.roleAreas).toBe(resultEntity.areaNames);
52 });
53
54 test('should return url', () => {
55 expect(adParser.url).toBe(originUrl + resultEntity.url);
56 });
57
58 test('should return company', () => {
59 expect(adParser.company).toBe(resultEntity.company);
60 });
61
62 test('should return companyType', () => {
63 expect(adParser.companyType).toBe(resultEntity.companyType);
64 });
65
66 test('should return companyIsSturtup', () => {
67 expect(adParser.companyIsSturtup).toBe(resultEntity.isStartup);
68 });
69
70 test('should return companyLogo', () => {
71 expect(adParser.companyLogo).toBe(resultEntity.imageUrl);
72 });
73
74 test('should return companyAreas', () => {
75 expect(adParser.companyAreas).toBe(resultEntity.companyAreas);
76 });
77
78 test('should return locations', () => {
79 expect(adParser.locations).toEqual(
80 resultEntity.locations.split(',').map((location) => location.trim()),
81 );
82 });
83
84 test('should return shifts', () => {
85 expect(adParser.shifts).toEqual(
86 resultEntity.shifts.split(',').map((shift) => shift.trim()),
87 );
88 });
89
90 test('should return seniorities', () => {
91 expect(adParser.seniorities).toBe(resultEntity.seniorities);
92 });
93
94 test('should return benefits', () => {
95 expect(adParser.benefits).toBe(resultEntity.benefits);
96 });
97
98 test('should return collaborationTypes', () => {
99 expect(adParser.collaborationTypes).toEqual(
100 resultEntity.collaborations
101 .split(',')
102 .map((collaboration) => collaboration.trim()),
103 );
104 });
105
106 test('should return isHot', () => {
107 expect(adParser.isHot).toBe(resultEntity.isHot);
108 });
109
110 test('should return isRemote', () => {
111 expect(adParser.isRemote).toBe(resultEntity.isRemote);
112 });
113
114 test('should return isTop', () => {
115 expect(adParser.isTop).toBe(resultEntity.isTop);
116 });
117
118 test('should return salary', () => {
119 expect(adParser.salary).toBe(resultEntity.salary);
120 });
121});

src/builders/adBuilder.ts

1import { AdParser } from '../parsers/adParser.js';
2import { Ad } from '../types/ad.js';
3
4export class AdBuilder {
5 adParser: AdParser;
6 ad: Ad = {} as Ad;
7
8 constructor(adParser: AdParser) {
9 this.adParser = adParser;
10 }
11
12 addName() {
13 this.ad.name = this.adParser.name;
14 return this;
15 }
16
17 addDescription() {
18 this.ad.description = this.adParser.description;
19 return this;
20 }
21
22 addMainArea() {
23 this.ad.mainArea = this.adParser.roleName;
24 return this;
25 }
26
27 addUrl() {
28 this.ad.url = this.adParser.url;
29 return this;
30 }
31
32 addCompany() {
33 this.ad.company = this.adParser.company;
34 return this;
35 }
36
37 addCompanyType() {
38 this.ad.companyType = this.adParser.companyType;
39 return this;
40 }
41
42 addCompanyIsSturtup() {
43 this.ad.companyIsSturtup = this.adParser.companyIsSturtup;
44 return this;
45 }
46
47 addCompanyLogo() {
48 this.ad.companyLogo = this.adParser.companyLogo;
49 return this;
50 }
51
52 addCompanyAreas() {
53 this.ad.companyAreas = this.adParser.companyAreas;
54 return this;
55 }
56
57 addLocations() {
58 this.ad.locations = this.adParser.locations;
59 return this;
60 }
61
62 addShifts() {
63 this.ad.shifts = this.adParser.shifts;
64 return this;
65 }
66
67 addAreas() {
68 this.ad.areas = this.adParser.roleAreas;
69 return this;
70 }
71
72 addSeniorities() {
73 this.ad.seniorities = this.adParser.seniorities;
74 return this;
75 }
76
77 addBenefits() {
78 this.ad.benefits = this.adParser.benefits;
79 return this;
80 }
81
82 addCollaborationTypes() {
83 this.ad.collaborationTypes = this.adParser.collaborationTypes;
84 return this;
85 }
86
87 addIsHot() {
88 this.ad.isHot = this.adParser.isHot;
89 return this;
90 }
91
92 addIsRemote() {
93 this.ad.isRemote = this.adParser.isRemote;
94 return this;
95 }
96
97 addIsTop() {
98 this.ad.isTop = this.adParser.isTop;
99 return this;
100 }
101
102 addSalary() {
103 this.ad.salary = this.adParser.salary;
104 return this;
105 }
106
107 build() {
108 return this.ad;
109 }
110}

src/builders/requestBuilder.ts

1import { RequestOptions } from 'crawlee';
2
3import { config } from '../config.js';
4import { Input } from '../types/input.js';
5
6export class RequestBuilder {
7 page: number;
8 input: Input;
9 salary: string;
10 collaboration: string;
11 shift: string;
12 area: string;
13
14 constructor(input: Input, page?: number) {
15 this.page = page ?? 0;
16 this.input = input;
17
18 this.collaboration = '';
19 this.shift = '';
20 this.area = '';
21 this.salary = '';
22 }
23
24 addCollaboration(): RequestBuilder {
25 if (this.input.typeOfCooperationFreelance) this.collaboration += `&collaboration[]=freelance`;
26 if (this.input.typeOfCooperationInternship) this.collaboration += `&collaboration[]=internship`;
27 if (this.input.typeOfCooperationContract) this.collaboration += `&collaboration[]=pracovni-smlouva`;
28 if (this.input.workplaceOnsite) this.collaboration += `&collaboration[]=onsite`;
29 if (this.input.workplaceRemote) this.collaboration += `&collaboration[]=remote`;
30 if (this.input.workplaceHybrid) this.collaboration += `&collaboration[]=hybridni`;
31
32 return this;
33 }
34
35 addShift(): RequestBuilder {
36 if (this.input.typeOfEmploymentFullTime) this.shift += `&shift[]=full-time`;
37 if (this.input.typeOfEmploymentPartTime) this.shift += `&shift[]=part-time`;
38
39 return this;
40 }
41
42 addSeniority(): RequestBuilder {
43 if (this.input.seniorityJunior) this.area += `&seniority[]=junior`;
44 if (this.input.seniorityMedior) this.area += `&seniority[]=medior`;
45 if (this.input.senioritySenior) this.area += `&seniority[]=senior`;
46
47 return this;
48 }
49
50 addLocation(): RequestBuilder {
51 if (this.input.locationPrague) this.area += `&location[]=${config.cityIds.prague}`;
52 if (this.input.locationBrno) this.area += `&location[]=${config.cityIds.brno}`;
53 if (this.input.locationOstrava) this.area += `&location[]=${config.cityIds.ostrava}`;
54 if (this.input.locationCzechia) this.area += `&location[]=${config.cityIds.czechia}`;
55 if (this.input.locationSanFrancisco) this.area += `&location[]=${config.cityIds.sanFrancisco}`;
56 if (this.input.locationNewYork) this.area += `&location[]=${config.cityIds.newYork}`;
57 if (this.input.locationLondon) this.area += `&location[]=${config.cityIds.london}`;
58
59 return this;
60 }
61
62 addArea(): RequestBuilder {
63 if (this.input.areasDevelopmentBackEnd) this.area += `&area[]=vyvoj/back-end`;
64 if (this.input.areasDevelopmentFrontEnd) this.area += `&area[]=vyvoj/front-end-koder`;
65 if (this.input.areasDevelopmentDatabase) this.area += `&area[]=vyvoj/databaze`;
66 if (this.input.areasDevelopmentDevOps) this.area += `&area[]=vyvoj/devops`;
67 if (this.input.areasDevelopmentTesting) this.area += `&area[]=vyvoj/testovani`;
68 if (this.input.areasDevelopmentMobile) this.area += `&area[]=vyvoj/mobilni-vyvoj`;
69 if (this.input.areasDevelopmentHardware) this.area += `&area[]=vyvoj/hardware-firmware`;
70 if (this.input.areasDevelopmentSecurity) this.area += `&area[]=vyvoj/it-security`;
71 if (this.input.areasDevelopmentSoftwareArchitect) this.area += `&area[]=vyvoj/software-architekt`;
72 if (this.input.areasDevelopmentItConsulting) this.area += `&area[]=vyvoj/it-konzultant`;
73 if (this.input.areasDevelopmentProjectManagement) this.area += `&area[]=vyvoj/project-manager`;
74 if (this.input.areasDevelopmentProductOwner) this.area += `&area[]=vyvoj/product-manager-a-product-owner`;
75 if (this.input.areasDevelopmentScrumMaster) this.area += `&area[]=vyvoj/scrum-master`;
76 if (this.input.areasDevelopmentDesign) this.area += `&area[]=vyvoj/design`;
77 if (this.input.areasDevelopment3dPrinting) this.area += `&area[]=vyvoj/3d-tisk`;
78 if (this.input.areasDevelopmentGameDev) this.area += `&area[]=vyvoj/herni-vyvoj`;
79 if (this.input.areasDevelopmentAI) this.area += `&area[]=vyvoj/umela-inteligence`;
80 if (this.input.areasDevelopmentMachineLearning) this.area += `&area[]=vyvoj/machine-learning`;
81 if (this.input.areasDevelopmentVR) this.area += `&area[]=vyvoj/virtualni-rozsirena-realita`;
82 if (this.input.areasDevelopmentIoT) this.area += `&area[]=vyvoj/internet-veci`;
83 if (this.input.areasDevelopmentBlockchain) this.area += `&area[]=vyvoj/blockchain`;
84 if (this.input.areasDevelopmentNoCode) this.area += `&area[]=vyvoj/no-code`;
85 if (this.input.areasDevelopmentTechWriter) this.area += `&area[]=vyvoj/tech-writer`;
86 if (this.input.areasDevelopmentEmbeddedSystems) this.area += `&area[]=vyvoj/embedded-systems`;
87
88 if (this.input.areasMarketingMarketing) this.area += `&area[]=marketing-a-obchod/marketing`;
89 if (this.input.areasMarketingSales) this.area += `&area[]=marketing-a-obchod/obchod`;
90 if (this.input.areasMarketingCreativity) this.area += `&area[]=marketing-a-obchod/kreativa`;
91
92 if (this.input.areasAnalyticsBI) this.area += `&area[]=analytika/bi-business-analytik`;
93 if (this.input.areasAnalyticsDataAnalysis) this.area += `&area[]=analytika/data-analytik`;
94 if (this.input.areasAnalyticsITAnalysis) this.area += `&area[]=analytika/it-analytik`;
95 if (this.input.areasAnalyticsWebAnalysis) this.area += `&area[]=analytika/webovy-analytik`;
96 if (this.input.areasAnalyticsMarketingAnalysis) this.area += `&area[]=analytika/marketingovy-analytik`;
97
98 if (this.input.areasManagementAccount) this.area += `&area[]=management/account-manager`;
99 if (this.input.areasManagementBusinessDevelopment) this.area += `&area[]=management/business-development`;
100 if (this.input.areasManagementManagement) this.area += `&area[]=management/manager`;
101 if (this.input.areasManagementMarketing) this.area += `&area[]=management/marketing-manager`;
102 if (this.input.areasManagementSales) this.area += `&area[]=management/sales-manager`;
103 if (this.input.areasManagementScrumMaster) this.area += `&area[]=management/scrum-master`;
104 if (this.input.areasManagementOffice) this.area += `&area[]=management/office-manager`;
105 if (this.input.areasManagementProductOwner) this.area += `&area[]=management/product-manager-a-product-owner`;
106 if (this.input.areasManagementProjectManagement) this.area += `&area[]=management/project-manager`;
107 if (this.input.areasManagementOperations) this.area += `&area[]=management/provozni-manazer`;
108 if (this.input.areasManagementTeamLead) this.area += `&area[]=management/team-leader`;
109 if (this.input.areasManagementTopManagement) this.area += `&area[]=management/top-management-c-level`;
110 if (this.input.areasManagementTech) this.area += `&area[]=management/technicky-manazer`;
111
112 if (this.input.areaAdministrationAdministration) this.area += `&area[]=administrativa-a-jine/administrativa`;
113 if (this.input.areaAdministrationHR) this.area += `&area[]=administrativa-a-jine/hr`;
114 if (this.input.areaAdministrationLecturer) this.area += `&area[]=administrativa-a-jine/lektor`;
115 if (this.input.areaAdministrationRest) this.area += `&area[]=administrativa-a-jine/ostatni`;
116
117 return this;
118 }
119
120 addSalary(): RequestBuilder {
121 if (this.input.salaryDefined) this.salary += `&salary[]=1`;
122
123 this.salary += `&salaryRange[]=${
124 JSON.stringify({
125 value: this.input.salaryMin,
126 currency: this.input.salaryCurrency,
127 measure: this.input.salaryType,
128 })
129 }`;
130
131 return this;
132 }
133
134 build(): RequestOptions {
135 return {
136 url: `${config.fetchUrl}?page=${this.page}${this.salary}${this.collaboration}${this.shift}${this.area}`,
137 method: 'GET',
138 headers: {
139 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
140 origin: config.originUrl,
141 },
142 };
143 }
144}

src/helpers/helper.ts

1import { RequestOptions } from 'crawlee';
2
3import { RequestBuilder } from '../builders/requestBuilder.js';
4import { Input } from '../types/input.js';
5
6export function generateRequest(input: Input, page: number): RequestOptions {
7 const requestBuilder = new RequestBuilder(input, page);
8
9 const request: RequestOptions = requestBuilder
10 .addShift()
11 .addSeniority()
12 .addLocation()
13 .addSalary()
14 .addCollaboration()
15 .addArea()
16 .build();
17
18 return request;
19}

src/parsers/adParser.ts

1import { config } from '../config.js';
2import { ResultSetEntity } from '../types/body.js';
3
4export class AdParser {
5 setEntity: ResultSetEntity;
6
7 constructor(setEntity: ResultSetEntity) {
8 this.setEntity = setEntity;
9 }
10
11 get name() {
12 return this.setEntity.name;
13 }
14
15 get description() {
16 return this.setEntity.description;
17 }
18
19 get roleName() {
20 return this.setEntity.mainAreaName;
21 }
22
23 get roleAreas() {
24 return this.setEntity.areaNames;
25 }
26
27 get url() {
28 return config.originUrl + this.setEntity.url;
29 }
30
31 get company() {
32 return this.setEntity.company;
33 }
34
35 get companyType() {
36 return this.setEntity.companyType;
37 }
38
39 get companyIsSturtup() {
40 return this.setEntity.isStartup;
41 }
42
43 get companyLogo() {
44 return this.setEntity.imageUrl;
45 }
46
47 get companyAreas() {
48 return this.setEntity.companyAreas;
49 }
50
51 get locations() {
52 return this.setEntity.locations.split(',').map((location) => location.trim());
53 }
54
55 get shifts() {
56 return this.setEntity.shifts.split(',').map((shift) => shift.trim());
57 }
58
59 get seniorities() {
60 return this.setEntity.seniorities;
61 }
62
63 get benefits() {
64 return this.setEntity.benefits;
65 }
66
67 get collaborationTypes() {
68 return this.setEntity.collaborations.split(',').map((collaboration) => collaboration.trim());
69 }
70
71 get isHot() {
72 return this.setEntity.isHot;
73 }
74
75 get isRemote() {
76 return this.setEntity.isRemote;
77 }
78
79 get isTop() {
80 return this.setEntity.isTop;
81 }
82
83 get salary() {
84 return this.setEntity.salary;
85 }
86}

src/parsers/responseParser.ts

1import { AdParser } from './adParser.js';
2import { AdBuilder } from '../builders/adBuilder.js';
3import { Ad } from '../types/ad.js';
4import { Body } from '../types/body.js';
5
6export class ResponseParser {
7 body: Body;
8
9 constructor(body: Body) {
10 this.body = body;
11 }
12
13 get ads(): Ad[] {
14 const ads: Ad[] = [];
15
16 this.body.resultSet.forEach((resultSetEntity) => {
17 const adParser = new AdParser(resultSetEntity);
18 const ad = new AdBuilder(adParser)
19 .addName()
20 .addDescription()
21 .addMainArea()
22 .addUrl()
23 .addCompany()
24 .addCompanyType()
25 .addCompanyIsSturtup()
26 .addCompanyLogo()
27 .addCompanyAreas()
28 .addLocations()
29 .addShifts()
30 .addAreas()
31 .addSeniorities()
32 .addBenefits()
33 .addCollaborationTypes()
34 .addIsHot()
35 .addIsRemote()
36 .addIsTop()
37 .addSalary()
38 .build();
39
40 ads.push(ad);
41 });
42
43 return ads;
44 }
45
46 get hasNextPage(): boolean {
47 return this.currentPage < this.body.paginator.max;
48 }
49
50 get currentPage(): number {
51 return this.body.paginator.current;
52 }
53}

src/types/ad.d.ts

1import { Salary } from './body.js';
2
3export type Ad = {
4 name: string;
5 description: string;
6 mainArea: string;
7 url: string;
8 company: string;
9 companyType: string;
10 companyIsSturtup: boolean;
11 companyLogo: string;
12 companyAreas: string[];
13 locations: string[];
14 shifts: string[];
15 areas: string[];
16 seniorities: string[];
17 benefits: number[];
18 collaborationTypes: string[];
19 isHot: boolean;
20 isRemote: boolean;
21 isTop: boolean;
22 salary: Salary;
23}

src/types/body.d.ts

1export interface Body {
2 resultSet: ResultSetEntity[]
3 resultCount: number
4 paginator: Paginator
5 permanentUrlForResultSet: PermanentUrlForResultSet
6 seo: Seo
7}
8
9export interface ResultSetEntity {
10 id: number
11 name: string
12 description: string
13 url: string
14 company: string
15 companyType: string
16 mainAreaName: string
17 imageUrl: string
18 locations: string
19 shifts: string
20 areaSlugs?: string[];
21 areaNames: string[];
22 seniorities: string[];
23 benefits: number[];
24 collaborations: string
25 isHot: boolean;
26 isRemote: boolean;
27 isTop: boolean;
28 companyAreas: string[];
29 isStartup: boolean;
30 salary: Salary;
31}
32
33export interface Salary {
34 max: number;
35 min: number;
36 measure: string;
37 currency: string;
38}
39
40export interface Paginator {
41 current: number;
42 max: number;
43}
44
45export interface PermanentUrlForResultSet {
46 cs: string;
47 en: string;
48}
49
50export interface Seo {
51 title: string;
52 heading: string;
53 customHtml?: undefined;
54}

src/types/config.d.ts

1export type Config = {
2 adsPerPage: number;
3 cityIds: {
4 prague: string;
5 brno: string;
6 ostrava: string;
7 czechia: string;
8 sanFrancisco: string;
9 newYork: string;
10 london: string;
11 };
12 originUrl: string;
13 fetchUrl: string;
14}

src/types/input.d.ts

1export type Input = {
2 numAds: number;
3 workplaceRemote: boolean;
4 workplaceHybrid: boolean;
5 workplaceOnsite: boolean;
6 locationPrague: boolean;
7 locationBrno: boolean;
8 locationOstrava: boolean;
9 locationCzechia: boolean;
10 locationSanFrancisco: boolean;
11 locationNewYork: boolean;
12 locationLondon: boolean;
13 seniorityJunior: boolean;
14 seniorityMedior: boolean;
15 senioritySenior: boolean;
16 typeOfEmploymentFullTime: boolean;
17 typeOfEmploymentPartTime: boolean;
18 typeOfCooperationFreelance: boolean;
19 typeOfCooperationInternship: boolean;
20 typeOfCooperationContract: boolean;
21 areasDevelopmentBackEnd: boolean;
22 areasDevelopmentFrontEnd: boolean;
23 areasDevelopmentDatabase: boolean;
24 areasDevelopmentDevOps: boolean;
25 areasDevelopmentTesting: boolean;
26 areasDevelopmentMobile: boolean;
27 areasDevelopmentHardware: boolean;
28 areasDevelopmentSecurity: boolean;
29 areasDevelopmentSoftwareArchitect: boolean;
30 areasDevelopmentItConsulting: boolean;
31 areasDevelopmentProjectManagement: boolean;
32 areasDevelopmentProductOwner: boolean;
33 areasDevelopmentScrumMaster: boolean;
34 areasDevelopmentDesign: boolean;
35 areasDevelopment3dPrinting: boolean;
36 areasDevelopmentGameDev: boolean;
37 areasDevelopmentAI: boolean;
38 areasDevelopmentMachineLearning: boolean;
39 areasDevelopmentVR: boolean;
40 areasDevelopmentIoT: boolean;
41 areasDevelopmentBlockchain: boolean;
42 areasDevelopmentNoCode: boolean;
43 areasDevelopmentTechWriter: boolean;
44 areasDevelopmentEmbeddedSystems: boolean;
45 areasMarketingMarketing: boolean;
46 areasMarketingSales: boolean;
47 areasMarketingCreativity: boolean;
48 areasAnalyticsBI: boolean;
49 areasAnalyticsDataAnalysis: boolean;
50 areasAnalyticsITAnalysis: boolean;
51 areasAnalyticsWebAnalysis: boolean;
52 areasAnalyticsMarketingAnalysis: boolean;
53 areasManagementAccount: boolean;
54 areasManagementBusinessDevelopment: boolean;
55 areasManagementManagement: boolean;
56 areasManagementMarketing: boolean;
57 areasManagementSales: boolean;
58 areasManagementScrumMaster: boolean;
59 areasManagementOffice: boolean;
60 areasManagementProductOwner: boolean;
61 areasManagementProjectManagement: boolean;
62 areasManagementOperations: boolean;
63 areasManagementTopManagement: boolean;
64 areasManagementTeamLead: boolean;
65 areasManagementTech: boolean;
66 areaAdministrationAdministration: boolean;
67 areaAdministrationHR: boolean;
68 areaAdministrationLecturer: boolean;
69 areaAdministrationRest: boolean;
70 salaryDefined: boolean;
71 salaryCurrency: string;
72 salaryType: string;
73 salaryMin: number;
74}