.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
14
15# dist folder
16dist
17
.gitignore
1storage
2apify_storage
3crawlee_storage
4node_modules
5dist
6tsconfig.tsbuildinfo
7storage/*
8!storage/key_value_stores
9storage/key_value_stores/*
10!storage/key_value_stores/default
11storage/key_value_stores/default/*
12!storage/key_value_stores/default/INPUT.json
13
14# Added by Apify CLI
15.venv
16
package.json
1{
2 "name": "gh-issue-notifier",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is an example of an Apify actor.",
6 "engines": {
7 "node": ">=18.0.0"
8 },
9 "dependencies": {
10 "@octokit/openapi-types": "^18.0.0",
11 "apify": "^3.1.4",
12 "axios": "^1.4.0",
13 "cheerio": "^1.0.0-rc.12",
14 "octokit": "^3.1.0"
15 },
16 "devDependencies": {
17 "@apify/tsconfig": "^0.1.0",
18 "@types/node": "^20.5.9",
19 "ts-node": "^10.9.1",
20 "typescript": "^5.0.4"
21 },
22 "scripts": {
23 "start": "npm run start:dev",
24 "start:prod": "node dist/main.js",
25 "start:dev": "node --loader ts-node/esm src/main.ts",
26 "build": "tsc",
27 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
28 },
29 "author": "It's not you it's me",
30 "license": "ISC"
31}
32
tsconfig.json
1{
2 "extends": "@apify/tsconfig",
3 "compilerOptions": {
4 "module": "ES2022",
5 "target": "ES2022",
6 "outDir": "dist",
7 "noUnusedLocals": false,
8 "lib": ["DOM"]
9 },
10 "ts-node": { "esm": true },
11 "include": ["./src/**/*"]
12}
13
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:18 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:18
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32 && npm install --omit=dev --omit=optional \
33 && echo "Installed NPM packages:" \
34 && (npm list --omit=dev --all || true) \
35 && echo "Node.js version:" \
36 && node --version \
37 && echo "NPM version:" \
38 && npm --version \
39 && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent
52
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "gh-issue-notifier",
4 "title": "Scrape single page in TypeScript",
5 "description": "Scrape data from single page with provided URL.",
6 "version": "0.0",
7 "meta": {
8 "templateId": "ts-start"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile"
12}
13
.actor/input_schema.json
1{
2 "title": "Scrape data from a web page",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "searchRepos": {
7 "title": "URL or names of repositories",
8 "type": "array",
9 "description": "The URLs or repository names of repositories you want to monitor.",
10 "editor": "stringList",
11 "prefill": []
12 },
13 "searchKeywords": {
14 "title": "Keywords to search for",
15 "type": "array",
16 "description": "The keywords that will be looked for in the titles and bodies of new issues.",
17 "editor": "stringList",
18 "prefill": []
19 },
20 "slackToken": {
21 "title": "Slack token",
22 "type": "string",
23 "description": "Slack API token in a format xoxp-xxxxxxxxx-xxxx.",
24 "editor": "textfield",
25 "isSecret": true
26 },
27 "slackChannel": {
28 "title": "Slack channel",
29 "type": "string",
30 "description": "Channel where the notification with Github issues information will be sent (e.g. #general)",
31 "prefill": "#general",
32 "editor": "textfield"
33 }
34 },
35 "required": ["searchRepos", "searchKeywords"]
36}
37
src/hash.ts
1import crypto from 'crypto'
2
3/**
4 * Create crypto hash using SHA-256.
5 * If object includes array fields, those fields are sorted using `.sort()`
6 * @param object to hash
7 * @returns hash string
8 */
9export const createObjectDigest = async (object: any) => {
10 for (let field in object) {
11 if (object[field] instanceof Array) {
12 object[field] = object[field].sort()
13 }
14 }
15 const objectAsMessage = JSON.stringify(object)
16 return digestMessage(objectAsMessage)
17}
18
19async function digestMessage(message: string) {
20 const msgUint8 = new TextEncoder().encode(message) // encode as (utf-8) Uint8Array
21 const hashBuffer = await crypto.subtle.digest('SHA-256', msgUint8) // hash the message
22 const hashArray = Array.from(new Uint8Array(hashBuffer)) // convert buffer to byte array
23 const hashHex = hashArray.map((b) => b.toString(16).padStart(2, '0')).join('') // convert bytes to hex string
24 return hashHex
25}
26
src/main.ts
1// Cheerio - The fast, flexible & elegant library for parsing and manipulating HTML and XML (Read more at https://cheerio.js.org/).
2import * as cheerio from 'cheerio'
3// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/).
4import { Actor, log } from 'apify'
5import { Octokit } from 'octokit'
6import { RestEndpointMethodTypes } from '@octokit/plugin-rest-endpoint-methods'
7import { sendSlackNotification } from './slackMessage.js'
8import { createSlackMessageBlocks } from './slackBlocks.js'
9import { parseRepoNameOrUrl } from './utils.js'
10import { createObjectDigest } from './hash.js'
11
12// Extracts element type from array type
13export type ArrElement<ArrType> = ArrType extends readonly (infer ElementType)[]
14 ? ElementType
15 : never
16export type Issue = RestEndpointMethodTypes['issues']['listForRepo']['response']
17
18interface Input {
19 searchRepos: string[]
20 searchKeywords: string[]
21 slackToken?: string
22 slackChannel?: string
23}
24
25const KV_STORE_NAME = 'gh-issues-notifier-state'
26
27/**
28 * Checks whether or not the issue is relevant.
29 * Looks for search keywords in the title or body of the issue.
30 * @param issue to check
31 * @param searchKeywords keywords to look for
32 * @returns true if the issue is relevant
33 */
34const checkIssueRelevant = (issue: ArrElement<Issue['data']>, searchKeywords: string[]) => {
35 if (!issue || issue.state !== 'open') {
36 return false
37 }
38
39 for (let keyword of searchKeywords) {
40 if (issue.title.indexOf(keyword) > -1) {
41 return true
42 }
43 }
44
45 if (issue.body) {
46 const $ = cheerio.load(issue.body)
47 for (let keyword of searchKeywords) {
48 if ($.text().indexOf(keyword) > -1) {
49 return true
50 }
51 }
52 }
53
54 return false
55}
56
57/**
58 * Discover all issues that are deemed relevant and were not reported yet.
59 * Goes through all the provided GH repositories and looks at the issue text
60 * @param repoUrls urls of repos to search
61 * @param searchKeywords keywords to match in the search
62 * @param checkedFilter lambda to filter out issues that were already reported
63 * @throws when octokit issues fail to fetch
64 * @returns list of repos, each as a list of issues
65 */
66const findRelevantNewIssues = async (
67 repoUrls: string[],
68 searchKeywords: string[],
69 checkedFilter: (potentialNewIssue: ArrElement<Issue['data']>) => boolean
70) => {
71 let outputs: Issue['data'] = []
72 for (let url of repoUrls) {
73 let repoInfo
74 try {
75 repoInfo = parseRepoNameOrUrl(url)
76 } catch (e) {
77 if (e instanceof Error) {
78 log.error(e.message)
79 }
80 continue
81 }
82 const repoIssues = await octokit.rest.issues.listForRepo({
83 owner: repoInfo.user,
84 repo: repoInfo.repoName,
85 })
86
87 const relevantIssues = repoIssues.data
88 .filter(checkedFilter)
89 .filter((issue) => checkIssueRelevant(issue, searchKeywords))
90
91 if (relevantIssues.length === 0) {
92 continue // no relevant issues, next repo
93 }
94 outputs = outputs.concat(relevantIssues)
95 }
96 return outputs
97}
98
99/**
100 * Checks the data store for whether or not the issue was already reported.
101 * @param potentialNewIssue Issue to check
102 * @param lastCheckedAt timestamp of when the issues were last checked
103 * @returns true if issue was not reported yet
104 */
105const checkIssueNotReported = (
106 potentialNewIssue: ArrElement<Issue['data']>,
107 lastCheckedAt: number
108) => {
109 return Date.parse(potentialNewIssue.created_at) > lastCheckedAt
110}
111
112const findAndReportIssues = async (input: Input) => {
113 const reportedIssuesStore = await Actor.openKeyValueStore(KV_STORE_NAME)
114 const actorInstanceHash = await createObjectDigest(input)
115 const lastCheckedAt = +((await reportedIssuesStore.getValue(actorInstanceHash)) || '0')
116 const currentCheckedAt = Date.now()
117
118 // Closure filter function with last checked timestamp
119 const issueAlreadyCheckedFilter = (potentialNewIssue: ArrElement<Issue['data']>) => {
120 return checkIssueNotReported(potentialNewIssue, lastCheckedAt)
121 }
122
123 let newIssues: Awaited<ReturnType<typeof findRelevantNewIssues>>
124 try {
125 newIssues = await findRelevantNewIssues(searchRepos, searchKeywords, issueAlreadyCheckedFilter)
126 } catch (_e) {
127 log.error(
128 'Error fetching issues from GitHub. One of your selected repos is likely misspelled or private.'
129 )
130 return
131 }
132
133 log.info(`Saving current actor information under hash [${actorInstanceHash}]`)
134 reportedIssuesStore.setValue(actorInstanceHash, currentCheckedAt)
135
136 if (newIssues.length > 0) {
137 log.info(`Discovered ${newIssues.length} new issues.`)
138 } else {
139 log.info('No new issues discovered.')
140 return
141 }
142
143 if (input.slackChannel && input.slackToken) {
144 log.info(`Sending Slack notification with new issues.`)
145 const messagePromises = newIssues.map((newIssue) => {
146 const slackMessageBlocks = createSlackMessageBlocks(newIssue)
147 return sendSlackNotification({
148 token: input.slackToken!,
149 channel: input.slackChannel!,
150 blocks: slackMessageBlocks,
151 })
152 })
153 Promise.all(messagePromises)
154 }
155
156 // Save headings to Dataset - a table-like storage.
157 await Actor.pushData(newIssues)
158}
159
160// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init().
161await Actor.init()
162
163// Structure of input is defined in input_schema.json
164const input = await Actor.getInput<Input>()
165if (!input) throw new Error('Input is missing!')
166const { searchRepos, searchKeywords } = input
167
168if (!searchRepos || searchRepos.length === 0) {
169 log.error('No repositories provided!')
170}
171if (!searchKeywords || searchKeywords.length === 0) {
172 log.error('No search keywords provided!')
173}
174
175const octokit = new Octokit({})
176
177await findAndReportIssues(input)
178
179// Gracefully exit the Actor process. It's recommended to quit all Actors with an exit().
180await Actor.exit()
181
src/slackBlocks.ts
1import type { Issue, ArrElement } from './main.js'
2
3/**
4 * Create Slack blocks detailing the newly discovered issue.
5 * https://api.slack.com/block-kit/building
6 * @param newIssue issue to make message for, as returned by octokit
7 * @returns Slack message Blocks summarising the new issue
8 */
9export const createSlackMessageBlocks = (newIssue: ArrElement<Issue['data']>) => {
10 let blocks: object[] = createTitleBlocks()
11 blocks = blocks.concat(createIssueBlocks(newIssue))
12 return blocks
13}
14
15/**
16 * Create Slack blocks for the title of the Slack message
17 * @returns slack blocks
18 */
19const createTitleBlocks = () => {
20 return [
21 {
22 type: 'header',
23 text: {
24 type: 'plain_text',
25 text: 'New GitHub issue was discovered! 🔎',
26 emoji: true,
27 },
28 },
29 ]
30}
31
32/**
33 * Create Slack blocks for an individual issue.
34 * @returns slack blocks
35 */
36const createIssueBlocks = (issue: ArrElement<Issue['data']>) => {
37 return [
38 {
39 type: 'section',
40 fields: [
41 {
42 type: 'mrkdwn',
43 text: `<${issue.url}|${issue.title}>`,
44 },
45 // {
46 // type: 'mrkdwn',
47 // text: '*Keywords:*\nTODO',
48 // },
49 ],
50 },
51 {
52 type: 'section',
53 fields: [
54 {
55 type: 'mrkdwn',
56 text: `*Created:*\n${issue.created_at}`,
57 },
58 {
59 type: 'mrkdwn',
60 text: `*Author:*\n<${issue.user?.url}|${issue.user?.login}>`,
61 },
62 ],
63 },
64 ]
65}
66
src/slackMessage.ts
1import { ApifyClient } from 'apify'
2
3const SLACK_ACTOR_ID = 'katerinahronik/slack-message'
4const MESSAGE_TEXT = 'New GitHub issue was discovered!'
5
6type SlackIntegration = {
7 channel: string
8 token: string
9 blocks: object[]
10}
11
12/**
13 * Send a slack notification containing the provided message data.
14 * Uses the Apify Actor katerinahronik/slack-message
15 * @param slackIntegration connection data for the Slack integration
16 * @returns promise of slack actor call
17 */
18export const sendSlackNotification = async (slackIntegration: SlackIntegration) => {
19 const apifyClient = new ApifyClient({ token: process.env.APIFY_TOKEN })
20 const slackActorClient = apifyClient.actor(SLACK_ACTOR_ID)
21
22 const slackActorInput = {
23 token: slackIntegration.token,
24 channel: slackIntegration.channel,
25 text: MESSAGE_TEXT,
26 blocks: slackIntegration.blocks,
27 }
28
29 return slackActorClient.call(slackActorInput)
30}
31
src/utils.ts
1export const parseRepoNameOrUrl = (nameOrUrl: string) => {
2 const repo = nameOrUrl.replace('https://github.com/', '').replace('https://api.github.com/repos/', '')
3 const splitRepoName = repo.split('/')
4 if (splitRepoName.length !== 2) {
5 throw new Error('Invalid repo name provided')
6 }
7 const [user, repoName] = splitRepoName
8 return { user, repoName }
9}
10
Developer
Maintained by Community
Actor stats
- 1 users
- 43 runs
- Modified 3 months ago
Categories