Google Alert Alternative
Try for free
No credit card required
Go to Store
Google Alert Alternative
lukaskrivka/google-alert-alternative
Try for free
No credit card required
Monitor newly occurring search results (organic or paid) on Google and get notified when they occur.
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "root": true,
3 "env": {
4 "browser": true,
5 "es2020": true,
6 "node": true
7 },
8 "extends": [
9 "@apify/eslint-config-ts"
10 ],
11 "parserOptions": {
12 "project": "./tsconfig.json",
13 "ecmaVersion": 2020
14 },
15 "ignorePatterns": [
16 "node_modules",
17 "dist",
18 "**/*.d.ts"
19 ]
20}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv
package.json
1{
2 "name": "google-search-monitor",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is a boilerplate of an Apify actor.",
6 "engines": {
7 "node": ">=18.0.0"
8 },
9 "dependencies": {
10 "apify": "^3.1.10",
11 "crawlee": "^3.5.4"
12 },
13 "devDependencies": {
14 "@apify/eslint-config-ts": "^0.3.0",
15 "@apify/tsconfig": "^0.1.0",
16 "@typescript-eslint/eslint-plugin": "^6.7.2",
17 "@typescript-eslint/parser": "^6.7.2",
18 "eslint": "^8.50.0",
19 "tsx": "^4.6.2",
20 "typescript": "^5.3.3"
21 },
22 "scripts": {
23 "start": "npm run start:dev",
24 "start:prod": "node dist/main.js",
25 "start:dev": "tsx src/main.ts",
26 "build": "tsc",
27 "lint": "eslint ./src --ext .ts",
28 "lint:fix": "eslint ./src --ext .ts --fix",
29 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
30 },
31 "author": "It's not you it's me",
32 "license": "ISC"
33}
tsconfig.json
1{
2 "extends": "@apify/tsconfig",
3 "compilerOptions": {
4 "module": "NodeNext",
5 "moduleResolution": "NodeNext",
6 "target": "ES2022",
7 "outDir": "dist",
8 "noUnusedLocals": false,
9 "skipLibCheck": true,
10 "lib": ["DOM"]
11 },
12 "include": [
13 "./src/**/*"
14 ]
15}
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32 && npm install --omit=dev --omit=optional \
33 && echo "Installed NPM packages:" \
34 && (npm list --omit=dev --all || true) \
35 && echo "Node.js version:" \
36 && node --version \
37 && echo "NPM version:" \
38 && npm --version \
39 && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "google-search-monitor",
4 "title": "Project Cheerio Crawler Typescript",
5 "description": "Crawlee and Cheerio project in typescript.",
6 "version": "0.0",
7 "meta": {
8 "templateId": "ts-crawlee-cheerio"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile"
12}
.actor/input_schema.json
1{
2 "title": "CheerioCrawler Template",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "searchTerm": {
7 "title": "Search term",
8 "type": "string",
9 "description": "Use regular search words or enter Google Search URLs. You can also apply [advanced Google search techniques](https://blog.apify.com/how-to-scrape-google-like-a-pro/), such as <code>AI site:twitter.com</code> or <code>javascript OR python</code>.",
10 "prefill": "apify",
11 "editor": "textfield"
12 },
13 "maxResultsToCheck": {
14 "title": "Max results to check per search term",
15 "type": "integer",
16 "description": "Number of organic or paid results to check with a single search term"
17 },
18 "resultType": {
19 "title": "Result type to check",
20 "type": "string",
21 "description": "Choose whether to check organic or paid search results",
22 "enum": ["organic", "paid", "both"],
23 "default": "organic"
24 },
25 "countryCode": {
26 "title": "Country",
27 "type": "string",
28 "description": "Country location from where the search is initiated",
29 "enum": [
30 "",
31 "af",
32 "al",
33 "dz",
34 "as",
35 "ad",
36 "ao",
37 "ai",
38 "aq",
39 "ag",
40 "ar",
41 "am",
42 "aw",
43 "au",
44 "at",
45 "az",
46 "bs",
47 "bh",
48 "bd",
49 "bb",
50 "by",
51 "be",
52 "bz",
53 "bj",
54 "bm",
55 "bt",
56 "bo",
57 "ba",
58 "bw",
59 "bv",
60 "br",
61 "io",
62 "bn",
63 "bg",
64 "bf",
65 "bi",
66 "kh",
67 "cm",
68 "ca",
69 "cv",
70 "ky",
71 "cf",
72 "td",
73 "cl",
74 "cn",
75 "cx",
76 "cc",
77 "co",
78 "km",
79 "cg",
80 "cd",
81 "ck",
82 "cr",
83 "ci",
84 "hr",
85 "cu",
86 "cy",
87 "cz",
88 "dk",
89 "dj",
90 "dm",
91 "do",
92 "ec",
93 "eg",
94 "sv",
95 "gq",
96 "er",
97 "ee",
98 "et",
99 "fk",
100 "fo",
101 "fj",
102 "fi",
103 "fr",
104 "gf",
105 "pf",
106 "tf",
107 "ga",
108 "gm",
109 "ge",
110 "de",
111 "gh",
112 "gi",
113 "gr",
114 "gl",
115 "gd",
116 "gp",
117 "gu",
118 "gt",
119 "gn",
120 "gw",
121 "gy",
122 "ht",
123 "hm",
124 "va",
125 "hn",
126 "hk",
127 "hu",
128 "is",
129 "in",
130 "id",
131 "ir",
132 "iq",
133 "ie",
134 "il",
135 "it",
136 "jm",
137 "jp",
138 "jo",
139 "kz",
140 "ke",
141 "ki",
142 "kp",
143 "kr",
144 "kw",
145 "kg",
146 "la",
147 "lv",
148 "lb",
149 "ls",
150 "lr",
151 "ly",
152 "li",
153 "lt",
154 "lu",
155 "mo",
156 "mk",
157 "mg",
158 "mw",
159 "my",
160 "mv",
161 "ml",
162 "mt",
163 "mh",
164 "mq",
165 "mr",
166 "mu",
167 "yt",
168 "mx",
169 "fm",
170 "md",
171 "mc",
172 "mn",
173 "ms",
174 "ma",
175 "mz",
176 "mm",
177 "na",
178 "nr",
179 "np",
180 "nl",
181 "an",
182 "nc",
183 "nz",
184 "ni",
185 "ne",
186 "ng",
187 "nu",
188 "nf",
189 "mp",
190 "no",
191 "om",
192 "pk",
193 "pw",
194 "ps",
195 "pa",
196 "pg",
197 "py",
198 "pe",
199 "ph",
200 "pn",
201 "pl",
202 "pt",
203 "pr",
204 "qa",
205 "re",
206 "ro",
207 "ru",
208 "rw",
209 "sh",
210 "kn",
211 "lc",
212 "pm",
213 "vc",
214 "ws",
215 "sm",
216 "st",
217 "sa",
218 "sn",
219 "cs",
220 "sc",
221 "sl",
222 "sg",
223 "sk",
224 "si",
225 "sb",
226 "so",
227 "za",
228 "gs",
229 "es",
230 "lk",
231 "sd",
232 "sr",
233 "sj",
234 "sz",
235 "se",
236 "ch",
237 "sy",
238 "tw",
239 "tj",
240 "tz",
241 "th",
242 "tl",
243 "tg",
244 "tk",
245 "to",
246 "tt",
247 "tn",
248 "tr",
249 "tm",
250 "tc",
251 "tv",
252 "ug",
253 "ua",
254 "ae",
255 "gb",
256 "us",
257 "um",
258 "uy",
259 "uz",
260 "vu",
261 "ve",
262 "vn",
263 "vg",
264 "vi",
265 "wf",
266 "eh",
267 "ye",
268 "zm",
269 "zw"
270 ],
271 "enumTitles": [
272 "Default (United States)",
273 "Afghanistan",
274 "Albania",
275 "Algeria",
276 "American Samoa",
277 "Andorra",
278 "Angola",
279 "Anguilla",
280 "Antarctica",
281 "Antigua and Barbuda",
282 "Argentina",
283 "Armenia",
284 "Aruba",
285 "Australia",
286 "Austria",
287 "Azerbaijan",
288 "Bahamas",
289 "Bahrain",
290 "Bangladesh",
291 "Barbados",
292 "Belarus",
293 "Belgium",
294 "Belize",
295 "Benin",
296 "Bermuda",
297 "Bhutan",
298 "Bolivia",
299 "Bosnia and Herzegovina",
300 "Botswana",
301 "Bouvet Island",
302 "Brazil",
303 "British Indian Ocean Territory",
304 "Brunei Darussalam",
305 "Bulgaria",
306 "Burkina Faso",
307 "Burundi",
308 "Cambodia",
309 "Cameroon",
310 "Canada",
311 "Cape Verde",
312 "Cayman Islands",
313 "Central African Republic",
314 "Chad",
315 "Chile",
316 "China",
317 "Christmas Island",
318 "Cocos (Keeling) Islands",
319 "Colombia",
320 "Comoros",
321 "Congo",
322 "Congo, the Democratic Republic of the",
323 "Cook Islands",
324 "Costa Rica",
325 "Cote D'ivoire",
326 "Croatia",
327 "Cuba",
328 "Cyprus",
329 "Czech Republic",
330 "Denmark",
331 "Djibouti",
332 "Dominica",
333 "Dominican Republic",
334 "Ecuador",
335 "Egypt",
336 "El Salvador",
337 "Equatorial Guinea",
338 "Eritrea",
339 "Estonia",
340 "Ethiopia",
341 "Falkland Islands (Malvinas)",
342 "Faroe Islands",
343 "Fiji",
344 "Finland",
345 "France",
346 "French Guiana",
347 "French Polynesia",
348 "French Southern Territories",
349 "Gabon",
350 "Gambia",
351 "Georgia",
352 "Germany",
353 "Ghana",
354 "Gibraltar",
355 "Greece",
356 "Greenland",
357 "Grenada",
358 "Guadeloupe",
359 "Guam",
360 "Guatemala",
361 "Guinea",
362 "Guinea-Bissau",
363 "Guyana",
364 "Haiti",
365 "Heard Island and Mcdonald Islands",
366 "Holy See (Vatican City State)",
367 "Honduras",
368 "Hong Kong",
369 "Hungary",
370 "Iceland",
371 "India",
372 "Indonesia",
373 "Iran, Islamic Republic of",
374 "Iraq",
375 "Ireland",
376 "Israel",
377 "Italy",
378 "Jamaica",
379 "Japan",
380 "Jordan",
381 "Kazakhstan",
382 "Kenya",
383 "Kiribati",
384 "Korea, Democratic People's Republic of",
385 "Korea, Republic of",
386 "Kuwait",
387 "Kyrgyzstan",
388 "Lao People's Democratic Republic",
389 "Latvia",
390 "Lebanon",
391 "Lesotho",
392 "Liberia",
393 "Libyan Arab Jamahiriya",
394 "Liechtenstein",
395 "Lithuania",
396 "Luxembourg",
397 "Macao",
398 "Macedonia, the Former Yugoslav Republic of",
399 "Madagascar",
400 "Malawi",
401 "Malaysia",
402 "Maldives",
403 "Mali",
404 "Malta",
405 "Marshall Islands",
406 "Martinique",
407 "Mauritania",
408 "Mauritius",
409 "Mayotte",
410 "Mexico",
411 "Micronesia, Federated States of",
412 "Moldova, Republic of",
413 "Monaco",
414 "Mongolia",
415 "Montserrat",
416 "Morocco",
417 "Mozambique",
418 "Myanmar",
419 "Namibia",
420 "Nauru",
421 "Nepal",
422 "Netherlands",
423 "Netherlands Antilles",
424 "New Caledonia",
425 "New Zealand",
426 "Nicaragua",
427 "Niger",
428 "Nigeria",
429 "Niue",
430 "Norfolk Island",
431 "Northern Mariana Islands",
432 "Norway",
433 "Oman",
434 "Pakistan",
435 "Palau",
436 "Palestinian Territory, Occupied",
437 "Panama",
438 "Papua New Guinea",
439 "Paraguay",
440 "Peru",
441 "Philippines",
442 "Pitcairn",
443 "Poland",
444 "Portugal",
445 "Puerto Rico",
446 "Qatar",
447 "Reunion",
448 "Romania",
449 "Russian Federation",
450 "Rwanda",
451 "Saint Helena",
452 "Saint Kitts and Nevis",
453 "Saint Lucia",
454 "Saint Pierre and Miquelon",
455 "Saint Vincent and the Grenadines",
456 "Samoa",
457 "San Marino",
458 "Sao Tome and Principe",
459 "Saudi Arabia",
460 "Senegal",
461 "Serbia and Montenegro",
462 "Seychelles",
463 "Sierra Leone",
464 "Singapore",
465 "Slovakia",
466 "Slovenia",
467 "Solomon Islands",
468 "Somalia",
469 "South Africa",
470 "South Georgia and the South Sandwich Islands",
471 "Spain",
472 "Sri Lanka",
473 "Sudan",
474 "Suriname",
475 "Svalbard and Jan Mayen",
476 "Swaziland",
477 "Sweden",
478 "Switzerland",
479 "Syrian Arab Republic",
480 "Taiwan, Province of China",
481 "Tajikistan",
482 "Tanzania, United Republic of",
483 "Thailand",
484 "Timor-Leste",
485 "Togo",
486 "Tokelau",
487 "Tonga",
488 "Trinidad and Tobago",
489 "Tunisia",
490 "Turkey",
491 "Turkmenistan",
492 "Turks and Caicos Islands",
493 "Tuvalu",
494 "Uganda",
495 "Ukraine",
496 "United Arab Emirates",
497 "United Kingdom",
498 "United States",
499 "United States Minor Outlying Islands",
500 "Uruguay",
501 "Uzbekistan",
502 "Vanuatu",
503 "Venezuela",
504 "Viet Nam",
505 "Virgin Islands, British",
506 "Virgin Islands, U.S.",
507 "Wallis and Futuna",
508 "Western Sahara",
509 "Yemen",
510 "Zambia",
511 "Zimbabwe"
512 ]
513 }
514 }
515}
src/main.ts
1import { Actor } from 'apify';
2
3const PERSISTED_DATASET_PREFIX = 'GOOGLE-SEARCH-MONITOR';
4
5// Expand these types if needed
6interface Result {
7 title: string,
8 url: string,
9 displayedUrl: string,
10 description: string,
11 position: number,
12 type: 'organic' | 'paid'
13}
14interface GoogleSearchResultItem {
15 organicResults: Result[];
16 paidResults: Result[];
17}
18
19interface Input {
20 searchTerm: string;
21 // Max 300
22 maxResultsToCheck: number;
23 resultType: 'organic' | 'paid' | 'both';
24 countryCode: string;
25}
26
27const RESULTS_PER_PAGE = 100;
28
29await Actor.init();
30
31const {
32 searchTerm,
33 maxResultsToCheck = 300,
34 resultType = 'organic',
35 countryCode,
36} = (await Actor.getInput<Input>())!;
37
38const sanitizedSearchTerm = searchTerm.replace(/[^a-zA-Z0-9-]+/g, '-');
39
40const alreadyScrapedUrlsDataset = await Actor.openDataset(`${PERSISTED_DATASET_PREFIX}-${sanitizedSearchTerm}-URLS`);
41
42const alreadyScrapedUrls = new Set((await alreadyScrapedUrlsDataset.getData()).items.map((item) => item.url));
43
44await Actor.setStatusMessage(`Starting search for ${searchTerm}`);
45
46// This should be less than a minute so no need to persist
47const run = await Actor.call('apify/google-search-scraper', {
48 queries: searchTerm,
49 resultsPerPage: RESULTS_PER_PAGE,
50 maxPagesPerQuery: Math.ceil(maxResultsToCheck / RESULTS_PER_PAGE),
51 countryCode,
52});
53
54const scrapedData = (await Actor.apifyClient.dataset<GoogleSearchResultItem>(run.defaultDatasetId).listItems()).items;
55
56const scrapedResults = [];
57if (resultType === 'both' || resultType === 'organic') {
58 for (const result of scrapedData.flatMap((item) => item.organicResults)) {
59 scrapedResults.push(result);
60 }
61} else if (resultType === 'paid' || resultType === 'both') {
62 for (const result of scrapedData.flatMap((item) => item.paidResults)) {
63 scrapedResults.push(result);
64 }
65}
66
67const newOrganicResults = scrapedResults.filter((result) => !alreadyScrapedUrls.has(result.url));
68
69await Actor.pushData(newOrganicResults);
70await Actor.setStatusMessage(``);
71
72// Update the state of the old dataset
73await alreadyScrapedUrlsDataset.pushData(newOrganicResults);
74
75// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit()
76await Actor.exit(`Search for ${searchTerm} finished with ${newOrganicResults.length} new results out of ${scrapedResults.length} `
77 + `scraped now and ${alreadyScrapedUrls.size} already scraped`);
Developer
Maintained by Community
Actor Metrics
2 monthly users
-
1 star
>99% runs succeeded
Created in Jul 2024
Modified 5 months ago
Categories