This Actor is under maintenance.
This actor is under maintenance and it may unreliable.
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
14
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
10
.eslintrc
1{
2 "extends": "@apify",
3 "root": true,
4 "rules": {
5 "padded-blocks": "off",
6 "indent": [
7 "warn",
8 2
9 ],
10 "eol-last": "off",
11 "max-classes-per-file": "off",
12 "radix": "off",
13 "no-undef": "off",
14 "import/extensions": "off",
15 "max-len": "off",
16 "no-trailing-spaces": "off",
17 "object-shorthand": "off"
18 }
19}
20
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.idea
4dist
5node_modules
6apify_storage
7storage
8
9apify_storage
apify.json
1{
2 "name": "kaktus-dobijecka",
3 "version": "0.0",
4 "buildTag": "latest",
5 "env": null,
6 "template": "project_cheerio_crawler_js"
7}
8
Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --omit=dev --omit=optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --omit=dev --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version \
21 && rm -r ~/.npm
22
23# Next, copy the remaining files and directories with the source code.
24# Since we do this after NPM install, quick build will be really fast
25# for most source file changes.
26COPY . ./
27
28
29# Run the image.
30CMD npm start --silent
31
INPUT_SCHEMA.json
1{
2 "title": "CheerioCrawler Template",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "email": {
7 "title": "E-mail data",
8 "type": "array",
9 "description": "Set to, cc and bcc. You can set more objects with addresses, in this case more e-mails will be sent.",
10 "prefill": [{"to": "your@email.com"}],
11 "editor": "json"
12 }
13 }
14}
15
package.json
1{
2 "name": "kaktus-dobijecka",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is a boilerplate of an Apify actor.",
6 "engines": {
7 "node": ">=16.0.0"
8 },
9 "dependencies": {
10 "apify": "^3.0.0",
11 "crawlee": "^3.0.0"
12 },
13 "devDependencies": {
14 "@apify/eslint-config": "^0.3.1",
15 "chai": "^4.3.6",
16 "eslint": "^8.20.0",
17 "mocha": "^10.1.0"
18 },
19 "scripts": {
20 "start": "node src/main.js",
21 "lint": "eslint ./src --ext .js,.jsx",
22 "lint:fix": "eslint ./src --ext .js,.jsx --fix",
23 "test": "mocha --recursive"
24 },
25 "author": "It's not you it's me",
26 "license": "ISC"
27}
28
src/main.js
1import { Actor } from 'apify';
2import { CheerioCrawler } from 'crawlee';
3import { router } from './routes.js';
4
5await Actor.init();
6
7const startUrls = ['https://www.mujkaktus.cz/chces-pridat'];
8
9const crawler = new CheerioCrawler({
10 requestHandler: router,
11});
12
13await crawler.run(startUrls);
14
15await Actor.exit();
16
src/routes.js
1import { Actor } from 'apify';
2import { createCheerioRouter } from 'crawlee';
3import { Utils } from './utils.js';
4
5export const router = createCheerioRouter();
6
7router.addDefaultHandler(async ({ $ }) => {
8 for (const h3 of $('h3')) {
9
10 const text = $(h3).text();
11
12 const validity = Utils.parseDate(text);
13
14 if (validity) {
15 await Actor.pushData(Utils.getResult(validity, text));
16
17 if (Utils.isSameDay(validity.date, new Date())) {
18
19 const { email: emailsData } = await Actor.getInput();
20
21 if (emailsData) {
22 for (const emailData of emailsData) {
23 await Utils.sendEmail(emailData, validity, text);
24 }
25 }
26
27 }
28
29 }
30 }
31});
src/utils.js
1import { Actor } from 'apify';
2
3export class Utils {
4
5 static parseDate(input) {
6 const dateRegexp = /(\d{1,2}\. ?\d{1,2}\. ?\d{4})[^\d]*(\d{1,2}:\d{1,2})[^\d]*(\d{1,2}:\d{1,2})/;
7
8 const match = input.match(dateRegexp);
9
10 if (match?.length === 4) {
11 const date = match[1].split('.');
12
13 const year = parseInt(date[2]);
14 const month = parseInt(date[1]);
15 const day = parseInt(date[0]);
16
17 const parsedDate = new Date(year, month - 1, day);
18
19 const from = match[2];
20 const to = match[3];
21
22 return new Validity(parsedDate, from, to);
23
24 }
25
26 return null;
27 }
28
29 static isSameDay(a, b) {
30 return a.getDate() === b.getDate() && a.getMonth() === b.getMonth() && a.getFullYear() === b.getFullYear();
31 }
32
33 static getResult(validity, text) {
34 const { date } = validity;
35
36 return {
37 Date: `${date.getFullYear()}-${(date.getMonth() + 1).toString().padStart(2, '0')}-${date.getDate().toString().padStart(2, '0')}`,
38 From: validity.from,
39 To: validity.to,
40 Text: text,
41 };
42 }
43
44 static async sendEmail(emailData, validity, text) {
45
46 await Actor.call('apify/send-mail', {
47 to: emailData.to,
48 cc: emailData.cc,
49 bcc: emailData.bcc,
50 subject: `Kaktus dobíječka dnes od ${validity.from} do ${validity.to}`,
51 text: text,
52 });
53
54 }
55
56}
57
58export class Validity {
59 constructor(date, from, to) {
60 this.date = date;
61 this.from = from;
62 this.to = to;
63 }
64
65}
66
test/test.js
1import { assert } from 'chai';
2
3import { Utils, Validity } from '../src/utils.js';
4
5describe('isSameDay', () => {
6
7 it('should should return true for same days', () => {
8 assert.isTrue(Utils.isSameDay(new Date(), new Date()));
9 assert.isTrue(Utils.isSameDay(new Date(2022, 9, 23), new Date(2022, 9, 23, 15, 10, 0)));
10 });
11
12 it('should should return false for different days', () => {
13 assert.isFalse(Utils.isSameDay(new Date(2022, 10, 23), new Date(2022, 9, 23)));
14 });
15
16});
17
18describe('parseDate', () => {
19
20 it('should parse date from string', () => {
21 assert.deepEqual(Utils.parseDate('Pokud si dneska 12. 10. 2022 od 17:00 do 20:00 hodin dobiješ alespoň 200 Kč, dáme ti dvojnásob'), new Validity(new Date(2022, 9, 12), '17:00', '20:00'));
22 });
23
24 it('should not parse date from string', () => {
25 assert.notDeepEqual(Utils.parseDate('Pokud si dneska 13. 10. 2022 od 17:00 do 20:00 hodin dobiješ alespoň 200 Kč, dáme ti dvojnásob'), new Validity(new Date(2022, 9, 12), '17:00', '20:00'));
26 assert.isNull(Utils.parseDate('test'));
27 });
28});
Developer
Maintained by Community
Actor stats
- 1 users
- 815 runs
- Modified about 1 year ago
Categories