1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "datasetType": "jobOffers",
12 "jobOfferFilterMinSalaryPeriod": "month",
13 "inputExtendFromFunction": `/**
14 * Inputs:
15 *
16 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
17 * `ctx.input` - The input object that was passed to this Actor.
18 * `ctx.state` - An object you can use to persist state across all your custom functions.
19 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
20 * See https://crawlee.dev/docs/guides/got-scraping
21 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
22 * It takes the entry itself, and a list of properties to be used for hashing.
23 * By default, you should pass `input.cachePrimaryKeys` to it.
24 *
25 */
26 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
27 // // Example: Load Actor config from GitHub URL (public)
28 // const config = await sendRequest.get('https://raw.githubusercontent.com/username/project/main/config.json').json();
29 //
30 // // Increase concurrency during off-peak hours
31 // // NOTE: Imagine we're targetting a small server, that can be slower during the day
32 // const hours = new Date().getUTCHours();
33 // const isOffPeak = hours < 6 || hours > 20;
34 // config.maxConcurrency = isOffPeak ? 8 : 3;
35 //
36 // return config;
37 //
38 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
39 //
40 // /**
41 // * ======= ACCESSING DATASET ========
42 // * To save/load/access entries in Dataset.
43 // * Docs:
44 // * - https://docs.apify.com/platform/storage/dataset
45 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
46 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
47 // */
48 // // const dataset = await io.openDataset('MyDatasetId');
49 // // const info = await dataset.getInfo();
50 // // console.log(info.itemCount);
51 // // // => 0
52 //
53 // /**
54 // * ======= ACCESSING REMOTE DATA ========
55 // * Use `sendRequest` to get data from the internet:
56 // * Docs:
57 // * - https://github.com/apify/got-scraping
58 // */
59 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
60 // // console.log(catFact.text);
61 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
62 //
63 // /**
64 // * ======= USING CACHE ========
65 // * To save the entry to the KeyValue cache (or retrieve it), you can use
66 // * `itemCacheKey` to create the entry's ID for you:
67 // */
68 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
69 // // const cache = await io.openKeyValueStore('MyStoreId');
70 // // cache.setValue(cacheId, entry);
71 // };`,
72 "startUrlsFromFunction": `/**
73 * Inputs:
74 *
75 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
76 * `ctx.input` - The input object that was passed to this Actor.
77 * `ctx.state` - An object you can use to persist state across all your custom functions.
78 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
79 * See https://crawlee.dev/docs/guides/got-scraping
80 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
81 * It takes the entry itself, and a list of properties to be used for hashing.
82 * By default, you should pass `input.cachePrimaryKeys` to it.
83 *
84 */
85 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
86 // // Example: Create and load URLs from a Dataset by combining multiple fields
87 // const dataset = await io.openDataset(datasetNameOrId);
88 // const data = await dataset.getData();
89 // const urls = data.items.map((item) => `https://example.com/u/${item.userId}/list/${item.listId}`);
90 // return urls;
91 //
92 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
93 //
94 // /**
95 // * ======= ACCESSING DATASET ========
96 // * To save/load/access entries in Dataset.
97 // * Docs:
98 // * - https://docs.apify.com/platform/storage/dataset
99 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
100 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
101 // */
102 // // const dataset = await io.openDataset('MyDatasetId');
103 // // const info = await dataset.getInfo();
104 // // console.log(info.itemCount);
105 // // // => 0
106 //
107 // /**
108 // * ======= ACCESSING REMOTE DATA ========
109 // * Use `sendRequest` to get data from the internet:
110 // * Docs:
111 // * - https://github.com/apify/got-scraping
112 // */
113 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
114 // // console.log(catFact.text);
115 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
116 //
117 // /**
118 // * ======= USING CACHE ========
119 // * To save the entry to the KeyValue cache (or retrieve it), you can use
120 // * `itemCacheKey` to create the entry's ID for you:
121 // */
122 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
123 // // const cache = await io.openKeyValueStore('MyStoreId');
124 // // cache.setValue(cacheId, entry);
125 // };`,
126 "requestMaxEntries": 50,
127 "requestTransform": `/**
128 * Inputs:
129 * `request` - Request holding URL to be scraped.
130 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
131 * `ctx.input` - The input object that was passed to this Actor.
132 * `ctx.state` - An object you can use to persist state across all your custom functions.
133 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
134 * See https://crawlee.dev/docs/guides/got-scraping
135 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
136 * It takes the entry itself, and a list of properties to be used for hashing.
137 * By default, you should pass `input.cachePrimaryKeys` to it.
138 *
139 */
140 // async (request, { io, input, state, sendRequest, itemCacheKey }) => {
141 // // Example: Tag requests
142 // // (maybe because we use RequestQueue that pools multiple scrapers)
143 // request.userData.tag = "VARIANT_A";
144 // return requestQueue;
145 //
146 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
147 //
148 // /**
149 // * ======= ACCESSING DATASET ========
150 // * To save/load/access entries in Dataset.
151 // * Docs:
152 // * - https://docs.apify.com/platform/storage/dataset
153 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
154 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
155 // */
156 // // const dataset = await io.openDataset('MyDatasetId');
157 // // const info = await dataset.getInfo();
158 // // console.log(info.itemCount);
159 // // // => 0
160 //
161 // /**
162 // * ======= ACCESSING REMOTE DATA ========
163 // * Use `sendRequest` to get data from the internet:
164 // * Docs:
165 // * - https://github.com/apify/got-scraping
166 // */
167 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
168 // // console.log(catFact.text);
169 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
170 //
171 // /**
172 // * ======= USING CACHE ========
173 // * To save the entry to the KeyValue cache (or retrieve it), you can use
174 // * `itemCacheKey` to create the entry's ID for you:
175 // */
176 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
177 // // const cache = await io.openKeyValueStore('MyStoreId');
178 // // cache.setValue(cacheId, entry);
179 // };`,
180 "requestTransformBefore": `/**
181 * Inputs:
182 *
183 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
184 * `ctx.input` - The input object that was passed to this Actor.
185 * `ctx.state` - An object you can use to persist state across all your custom functions.
186 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
187 * See https://crawlee.dev/docs/guides/got-scraping
188 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
189 * It takes the entry itself, and a list of properties to be used for hashing.
190 * By default, you should pass `input.cachePrimaryKeys` to it.
191 *
192 */
193 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
194 // // Example: Fetch data or run code BEFORE requests are processed.
195 // state.categories = await sendRequest.get('https://example.com/my-categories').json();
196 //
197 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
198 //
199 // /**
200 // * ======= ACCESSING DATASET ========
201 // * To save/load/access entries in Dataset.
202 // * Docs:
203 // * - https://docs.apify.com/platform/storage/dataset
204 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
205 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
206 // */
207 // // const dataset = await io.openDataset('MyDatasetId');
208 // // const info = await dataset.getInfo();
209 // // console.log(info.itemCount);
210 // // // => 0
211 //
212 // /**
213 // * ======= ACCESSING REMOTE DATA ========
214 // * Use `sendRequest` to get data from the internet:
215 // * Docs:
216 // * - https://github.com/apify/got-scraping
217 // */
218 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
219 // // console.log(catFact.text);
220 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
221 //
222 // /**
223 // * ======= USING CACHE ========
224 // * To save the entry to the KeyValue cache (or retrieve it), you can use
225 // * `itemCacheKey` to create the entry's ID for you:
226 // */
227 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
228 // // const cache = await io.openKeyValueStore('MyStoreId');
229 // // cache.setValue(cacheId, entry);
230 // };`,
231 "requestTransformAfter": `/**
232 * Inputs:
233 *
234 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
235 * `ctx.input` - The input object that was passed to this Actor.
236 * `ctx.state` - An object you can use to persist state across all your custom functions.
237 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
238 * See https://crawlee.dev/docs/guides/got-scraping
239 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
240 * It takes the entry itself, and a list of properties to be used for hashing.
241 * By default, you should pass `input.cachePrimaryKeys` to it.
242 *
243 */
244 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
245 // // Example: Fetch data or run code AFTER requests are processed.
246 // delete state.categories;
247 //
248 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
249 //
250 // /**
251 // * ======= ACCESSING DATASET ========
252 // * To save/load/access entries in Dataset.
253 // * Docs:
254 // * - https://docs.apify.com/platform/storage/dataset
255 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
256 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
257 // */
258 // // const dataset = await io.openDataset('MyDatasetId');
259 // // const info = await dataset.getInfo();
260 // // console.log(info.itemCount);
261 // // // => 0
262 //
263 // /**
264 // * ======= ACCESSING REMOTE DATA ========
265 // * Use `sendRequest` to get data from the internet:
266 // * Docs:
267 // * - https://github.com/apify/got-scraping
268 // */
269 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
270 // // console.log(catFact.text);
271 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
272 //
273 // /**
274 // * ======= USING CACHE ========
275 // * To save the entry to the KeyValue cache (or retrieve it), you can use
276 // * `itemCacheKey` to create the entry's ID for you:
277 // */
278 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
279 // // const cache = await io.openKeyValueStore('MyStoreId');
280 // // cache.setValue(cacheId, entry);
281 // };`,
282 "requestFilter": `/**
283 * Inputs:
284 * `request` - Request holding URL to be scraped.
285 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
286 * `ctx.input` - The input object that was passed to this Actor.
287 * `ctx.state` - An object you can use to persist state across all your custom functions.
288 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
289 * See https://crawlee.dev/docs/guides/got-scraping
290 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
291 * It takes the entry itself, and a list of properties to be used for hashing.
292 * By default, you should pass `input.cachePrimaryKeys` to it.
293 *
294 */
295 // async (request, { io, input, state, sendRequest, itemCacheKey }) => {
296 // // Example: Filter requests based on their tag
297 // // (maybe because we use RequestQueue that pools multiple scrapers)
298 // return request.userData.tag === "VARIANT_A";
299 //
300 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
301 //
302 // /**
303 // * ======= ACCESSING DATASET ========
304 // * To save/load/access entries in Dataset.
305 // * Docs:
306 // * - https://docs.apify.com/platform/storage/dataset
307 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
308 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
309 // */
310 // // const dataset = await io.openDataset('MyDatasetId');
311 // // const info = await dataset.getInfo();
312 // // console.log(info.itemCount);
313 // // // => 0
314 //
315 // /**
316 // * ======= ACCESSING REMOTE DATA ========
317 // * Use `sendRequest` to get data from the internet:
318 // * Docs:
319 // * - https://github.com/apify/got-scraping
320 // */
321 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
322 // // console.log(catFact.text);
323 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
324 //
325 // /**
326 // * ======= USING CACHE ========
327 // * To save the entry to the KeyValue cache (or retrieve it), you can use
328 // * `itemCacheKey` to create the entry's ID for you:
329 // */
330 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
331 // // const cache = await io.openKeyValueStore('MyStoreId');
332 // // cache.setValue(cacheId, entry);
333 // };`,
334 "requestFilterBefore": `/**
335 * Inputs:
336 *
337 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
338 * `ctx.input` - The input object that was passed to this Actor.
339 * `ctx.state` - An object you can use to persist state across all your custom functions.
340 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
341 * See https://crawlee.dev/docs/guides/got-scraping
342 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
343 * It takes the entry itself, and a list of properties to be used for hashing.
344 * By default, you should pass `input.cachePrimaryKeys` to it.
345 *
346 */
347 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
348 // // Example: Fetch data or run code BEFORE requests are processed.
349 // state.categories = await sendRequest.get('https://example.com/my-categories').json();
350 //
351 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
352 //
353 // /**
354 // * ======= ACCESSING DATASET ========
355 // * To save/load/access entries in Dataset.
356 // * Docs:
357 // * - https://docs.apify.com/platform/storage/dataset
358 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
359 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
360 // */
361 // // const dataset = await io.openDataset('MyDatasetId');
362 // // const info = await dataset.getInfo();
363 // // console.log(info.itemCount);
364 // // // => 0
365 //
366 // /**
367 // * ======= ACCESSING REMOTE DATA ========
368 // * Use `sendRequest` to get data from the internet:
369 // * Docs:
370 // * - https://github.com/apify/got-scraping
371 // */
372 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
373 // // console.log(catFact.text);
374 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
375 //
376 // /**
377 // * ======= USING CACHE ========
378 // * To save the entry to the KeyValue cache (or retrieve it), you can use
379 // * `itemCacheKey` to create the entry's ID for you:
380 // */
381 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
382 // // const cache = await io.openKeyValueStore('MyStoreId');
383 // // cache.setValue(cacheId, entry);
384 // };`,
385 "requestFilterAfter": `/**
386 * Inputs:
387 *
388 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
389 * `ctx.input` - The input object that was passed to this Actor.
390 * `ctx.state` - An object you can use to persist state across all your custom functions.
391 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
392 * See https://crawlee.dev/docs/guides/got-scraping
393 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
394 * It takes the entry itself, and a list of properties to be used for hashing.
395 * By default, you should pass `input.cachePrimaryKeys` to it.
396 *
397 */
398 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
399 // // Example: Fetch data or run code AFTER requests are processed.
400 // delete state.categories;
401 //
402 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
403 //
404 // /**
405 // * ======= ACCESSING DATASET ========
406 // * To save/load/access entries in Dataset.
407 // * Docs:
408 // * - https://docs.apify.com/platform/storage/dataset
409 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
410 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
411 // */
412 // // const dataset = await io.openDataset('MyDatasetId');
413 // // const info = await dataset.getInfo();
414 // // console.log(info.itemCount);
415 // // // => 0
416 //
417 // /**
418 // * ======= ACCESSING REMOTE DATA ========
419 // * Use `sendRequest` to get data from the internet:
420 // * Docs:
421 // * - https://github.com/apify/got-scraping
422 // */
423 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
424 // // console.log(catFact.text);
425 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
426 //
427 // /**
428 // * ======= USING CACHE ========
429 // * To save the entry to the KeyValue cache (or retrieve it), you can use
430 // * `itemCacheKey` to create the entry's ID for you:
431 // */
432 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
433 // // const cache = await io.openKeyValueStore('MyStoreId');
434 // // cache.setValue(cacheId, entry);
435 // };`,
436 "outputMaxEntries": 50,
437 "outputTransform": `/**
438 * Inputs:
439 * `entry` - Scraped entry.
440 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
441 * `ctx.input` - The input object that was passed to this Actor.
442 * `ctx.state` - An object you can use to persist state across all your custom functions.
443 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
444 * See https://crawlee.dev/docs/guides/got-scraping
445 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
446 * It takes the entry itself, and a list of properties to be used for hashing.
447 * By default, you should pass `input.cachePrimaryKeys` to it.
448 *
449 */
450 // async (entry, { io, input, state, sendRequest, itemCacheKey }) => {
451 // // Example: Add extra custom fields like aggregates
452 // return {
453 // ...entry,
454 // imagesCount: entry.images.length,
455 // };
456 //
457 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
458 //
459 // /**
460 // * ======= ACCESSING DATASET ========
461 // * To save/load/access entries in Dataset.
462 // * Docs:
463 // * - https://docs.apify.com/platform/storage/dataset
464 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
465 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
466 // */
467 // // const dataset = await io.openDataset('MyDatasetId');
468 // // const info = await dataset.getInfo();
469 // // console.log(info.itemCount);
470 // // // => 0
471 //
472 // /**
473 // * ======= ACCESSING REMOTE DATA ========
474 // * Use `sendRequest` to get data from the internet:
475 // * Docs:
476 // * - https://github.com/apify/got-scraping
477 // */
478 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
479 // // console.log(catFact.text);
480 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
481 //
482 // /**
483 // * ======= USING CACHE ========
484 // * To save the entry to the KeyValue cache (or retrieve it), you can use
485 // * `itemCacheKey` to create the entry's ID for you:
486 // */
487 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
488 // // const cache = await io.openKeyValueStore('MyStoreId');
489 // // cache.setValue(cacheId, entry);
490 // };`,
491 "outputTransformBefore": `/**
492 * Inputs:
493 *
494 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
495 * `ctx.input` - The input object that was passed to this Actor.
496 * `ctx.state` - An object you can use to persist state across all your custom functions.
497 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
498 * See https://crawlee.dev/docs/guides/got-scraping
499 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
500 * It takes the entry itself, and a list of properties to be used for hashing.
501 * By default, you should pass `input.cachePrimaryKeys` to it.
502 *
503 */
504 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
505 // // Example: Fetch data or run code BEFORE entries are scraped.
506 // state.categories = await sendRequest.get('https://example.com/my-categories').json();
507 //
508 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
509 //
510 // /**
511 // * ======= ACCESSING DATASET ========
512 // * To save/load/access entries in Dataset.
513 // * Docs:
514 // * - https://docs.apify.com/platform/storage/dataset
515 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
516 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
517 // */
518 // // const dataset = await io.openDataset('MyDatasetId');
519 // // const info = await dataset.getInfo();
520 // // console.log(info.itemCount);
521 // // // => 0
522 //
523 // /**
524 // * ======= ACCESSING REMOTE DATA ========
525 // * Use `sendRequest` to get data from the internet:
526 // * Docs:
527 // * - https://github.com/apify/got-scraping
528 // */
529 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
530 // // console.log(catFact.text);
531 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
532 //
533 // /**
534 // * ======= USING CACHE ========
535 // * To save the entry to the KeyValue cache (or retrieve it), you can use
536 // * `itemCacheKey` to create the entry's ID for you:
537 // */
538 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
539 // // const cache = await io.openKeyValueStore('MyStoreId');
540 // // cache.setValue(cacheId, entry);
541 // };`,
542 "outputTransformAfter": `/**
543 * Inputs:
544 *
545 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
546 * `ctx.input` - The input object that was passed to this Actor.
547 * `ctx.state` - An object you can use to persist state across all your custom functions.
548 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
549 * See https://crawlee.dev/docs/guides/got-scraping
550 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
551 * It takes the entry itself, and a list of properties to be used for hashing.
552 * By default, you should pass `input.cachePrimaryKeys` to it.
553 *
554 */
555 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
556 // // Example: Fetch data or run code AFTER entries are scraped.
557 // delete state.categories;
558 //
559 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
560 //
561 // /**
562 // * ======= ACCESSING DATASET ========
563 // * To save/load/access entries in Dataset.
564 // * Docs:
565 // * - https://docs.apify.com/platform/storage/dataset
566 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
567 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
568 // */
569 // // const dataset = await io.openDataset('MyDatasetId');
570 // // const info = await dataset.getInfo();
571 // // console.log(info.itemCount);
572 // // // => 0
573 //
574 // /**
575 // * ======= ACCESSING REMOTE DATA ========
576 // * Use `sendRequest` to get data from the internet:
577 // * Docs:
578 // * - https://github.com/apify/got-scraping
579 // */
580 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
581 // // console.log(catFact.text);
582 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
583 //
584 // /**
585 // * ======= USING CACHE ========
586 // * To save the entry to the KeyValue cache (or retrieve it), you can use
587 // * `itemCacheKey` to create the entry's ID for you:
588 // */
589 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
590 // // const cache = await io.openKeyValueStore('MyStoreId');
591 // // cache.setValue(cacheId, entry);
592 // };`,
593 "outputFilter": `/**
594 * Inputs:
595 * `entry` - Scraped entry.
596 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
597 * `ctx.input` - The input object that was passed to this Actor.
598 * `ctx.state` - An object you can use to persist state across all your custom functions.
599 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
600 * See https://crawlee.dev/docs/guides/got-scraping
601 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
602 * It takes the entry itself, and a list of properties to be used for hashing.
603 * By default, you should pass `input.cachePrimaryKeys` to it.
604 *
605 */
606 // async (entry, { io, input, state, sendRequest, itemCacheKey }) => {
607 // // Example: Filter entries based on number of images they have (at least 5)
608 // return entry.images.length > 5;
609 //
610 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
611 //
612 // /**
613 // * ======= ACCESSING DATASET ========
614 // * To save/load/access entries in Dataset.
615 // * Docs:
616 // * - https://docs.apify.com/platform/storage/dataset
617 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
618 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
619 // */
620 // // const dataset = await io.openDataset('MyDatasetId');
621 // // const info = await dataset.getInfo();
622 // // console.log(info.itemCount);
623 // // // => 0
624 //
625 // /**
626 // * ======= ACCESSING REMOTE DATA ========
627 // * Use `sendRequest` to get data from the internet:
628 // * Docs:
629 // * - https://github.com/apify/got-scraping
630 // */
631 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
632 // // console.log(catFact.text);
633 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
634 //
635 // /**
636 // * ======= USING CACHE ========
637 // * To save the entry to the KeyValue cache (or retrieve it), you can use
638 // * `itemCacheKey` to create the entry's ID for you:
639 // */
640 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
641 // // const cache = await io.openKeyValueStore('MyStoreId');
642 // // cache.setValue(cacheId, entry);
643 // };`,
644 "outputFilterBefore": `/**
645 * Inputs:
646 *
647 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
648 * `ctx.input` - The input object that was passed to this Actor.
649 * `ctx.state` - An object you can use to persist state across all your custom functions.
650 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
651 * See https://crawlee.dev/docs/guides/got-scraping
652 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
653 * It takes the entry itself, and a list of properties to be used for hashing.
654 * By default, you should pass `input.cachePrimaryKeys` to it.
655 *
656 */
657 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
658 // // Example: Fetch data or run code BEFORE entries are scraped.
659 // state.categories = await sendRequest.get('https://example.com/my-categories').json();
660 //
661 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
662 //
663 // /**
664 // * ======= ACCESSING DATASET ========
665 // * To save/load/access entries in Dataset.
666 // * Docs:
667 // * - https://docs.apify.com/platform/storage/dataset
668 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
669 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
670 // */
671 // // const dataset = await io.openDataset('MyDatasetId');
672 // // const info = await dataset.getInfo();
673 // // console.log(info.itemCount);
674 // // // => 0
675 //
676 // /**
677 // * ======= ACCESSING REMOTE DATA ========
678 // * Use `sendRequest` to get data from the internet:
679 // * Docs:
680 // * - https://github.com/apify/got-scraping
681 // */
682 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
683 // // console.log(catFact.text);
684 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
685 //
686 // /**
687 // * ======= USING CACHE ========
688 // * To save the entry to the KeyValue cache (or retrieve it), you can use
689 // * `itemCacheKey` to create the entry's ID for you:
690 // */
691 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
692 // // const cache = await io.openKeyValueStore('MyStoreId');
693 // // cache.setValue(cacheId, entry);
694 // };`,
695 "outputFilterAfter": `/**
696 * Inputs:
697 *
698 * `ctx.io` - Apify Actor class, see https://docs.apify.com/sdk/js/reference/class/Actor.
699 * `ctx.input` - The input object that was passed to this Actor.
700 * `ctx.state` - An object you can use to persist state across all your custom functions.
701 * `ctx.sendRequest` - Fetch remote data. Uses 'got-scraping', same as Apify's `sendRequest`.
702 * See https://crawlee.dev/docs/guides/got-scraping
703 * `ctx.itemCacheKey` - A function you can use to get cacheID for current `entry`.
704 * It takes the entry itself, and a list of properties to be used for hashing.
705 * By default, you should pass `input.cachePrimaryKeys` to it.
706 *
707 */
708 // async ({ io, input, state, sendRequest, itemCacheKey }) => {
709 // // Example: Fetch data or run code AFTER entries are scraped.
710 // delete state.categories;
711 //
712 // /* ========== SEE BELOW FOR MORE EXAMPLES ========= */
713 //
714 // /**
715 // * ======= ACCESSING DATASET ========
716 // * To save/load/access entries in Dataset.
717 // * Docs:
718 // * - https://docs.apify.com/platform/storage/dataset
719 // * - https://docs.apify.com/sdk/js/docs/guides/result-storage#dataset
720 // * - https://docs.apify.com/sdk/js/docs/examples/map-and-reduce
721 // */
722 // // const dataset = await io.openDataset('MyDatasetId');
723 // // const info = await dataset.getInfo();
724 // // console.log(info.itemCount);
725 // // // => 0
726 //
727 // /**
728 // * ======= ACCESSING REMOTE DATA ========
729 // * Use `sendRequest` to get data from the internet:
730 // * Docs:
731 // * - https://github.com/apify/got-scraping
732 // */
733 // // const catFact = await sendRequest.get('https://cat-fact.herokuapp.com/facts/5887e1d85c873e0011036889').json();
734 // // console.log(catFact.text);
735 // // // => "Cats make about 100 different sounds. Dogs make only about 10.",
736 //
737 // /**
738 // * ======= USING CACHE ========
739 // * To save the entry to the KeyValue cache (or retrieve it), you can use
740 // * `itemCacheKey` to create the entry's ID for you:
741 // */
742 // // const cacheId = itemCacheKey(item, input.cachePrimaryKeys);
743 // // const cache = await io.openKeyValueStore('MyStoreId');
744 // // cache.setValue(cacheId, entry);
745 // };`,
746 "maxRequestRetries": 3,
747 "maxRequestsPerMinute": 120,
748 "minConcurrency": 1,
749 "requestHandlerTimeoutSecs": 180,
750 "logLevel": "info",
751 "errorReportingDatasetId": "REPORTING"
752};
753
754
755const run = await client.actor("jurooravec/profesia-sk-scraper").call(input);
756
757
758console.log('Results from dataset');
759console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
760const { items } = await client.dataset(run.defaultDatasetId).listItems();
761items.forEach((item) => {
762 console.dir(item);
763});
764
765