Productboard Roadmap Scraper
Try for free
No credit card required
Go to Store
Productboard Roadmap Scraper
zuzana_stetinova/productboard-roadmap-scraper
Try for free
No credit card required
This scraper extracts data from Productboard roadmap to a key-value store (OUTPUT) as a map (id -- feature). Scraped feature data includes feature name, description, timeline, teams and connected features with their title, description and timeline.
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "root": true,
3 "env": {
4 "browser": true,
5 "es2020": true,
6 "node": true
7 },
8 "extends": [
9 "@apify/eslint-config-ts"
10 ],
11 "parserOptions": {
12 "project": "./tsconfig.json",
13 "ecmaVersion": 2020
14 },
15 "ignorePatterns": [
16 "node_modules",
17 "dist",
18 "**/*.d.ts"
19 ]
20}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv
package.json
1{
2 "name": "productboard-roadmap-scraper",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "Apify actor to scrape data from Productboard roadmap.",
6 "engines": {
7 "node": ">=18.0.0"
8 },
9 "dependencies": {
10 "@apify/log": "^2.5.0",
11 "apify": "^3.1.10",
12 "axios": "^1.6.7",
13 "crawlee": "^3.5.4",
14 "playwright": "1.42.1"
15 },
16 "devDependencies": {
17 "@apify/eslint-config-ts": "^0.3.0",
18 "@apify/tsconfig": "^0.1.0",
19 "@typescript-eslint/eslint-plugin": "^6.7.2",
20 "@typescript-eslint/parser": "^6.7.2",
21 "eslint": "^8.50.0",
22 "tsx": "^4.6.2",
23 "typescript": "^5.3.3"
24 },
25 "scripts": {
26 "start": "npm run start:dev",
27 "start:prod": "node dist/main.js",
28 "start:dev": "tsx src/main.ts",
29 "build": "tsc",
30 "lint": "eslint ./src --ext .ts",
31 "lint:fix": "eslint ./src --ext .ts --fix"
32 },
33 "license": "ISC"
34}
tsconfig.json
1{
2 "extends": "@apify/tsconfig",
3 "compilerOptions": {
4 "module": "NodeNext",
5 "moduleResolution": "NodeNext",
6 "target": "ES2022",
7 "outDir": "dist",
8 "noUnusedLocals": false,
9 "skipLibCheck": true,
10 "lib": ["DOM"]
11 },
12 "include": [
13 "./src/**/*"
14 ]
15}
.actor/Dockerfile
1FROM apify/actor-node-playwright-chrome:18 AS builder
2
3COPY package*.json ./
4
5RUN npm install --include=dev --audit=false
6
7COPY . ./
8
9RUN npm run build
10
11FROM apify/actor-node-playwright-chrome:18
12
13COPY package*.json ./
14
15RUN npm --quiet set progress=false \
16 && npm install --omit=dev --omit=optional \
17 && echo "Installed NPM packages:" \
18 && (npm list --omit=dev --all || true) \
19 && echo "Node.js version:" \
20 && node --version \
21 && echo "NPM version:" \
22 && npm --version \
23 && rm -r ~/.npm
24
25COPY /home/myuser/dist ./dist
26
27COPY . ./
28
29
30CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "productboard-roadmap-scraper",
4 "title": "Productboard roadmap scraper",
5 "description": "Actor is scraping basic data from Productboard roadmap to a key-value store.",
6 "version": "0.0",
7 "input": "./input_schema.json",
8 "dockerfile": "./Dockerfile"
9}
.actor/input_schema.json
1{
2 "title": "Productboard roadmap scraper",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "productboardRoadmapUrl": {
7 "title": "Productboard roadmap URL",
8 "type": "string",
9 "description": "URL of the Productboard roadmap.",
10 "editor": "textfield"
11 },
12 "userEmail": {
13 "title": "Productboard user email",
14 "type": "string",
15 "description": "Email for Productboard account used for access.",
16 "editor": "textfield"
17 },
18 "userPassword": {
19 "title": "Productboard user password",
20 "type": "string",
21 "description": "Password for Productboard account used for access.",
22 "editor": "textfield",
23 "isSecret": true
24 }
25 },
26 "required": ["productboardRoadmapUrl", "userEmail", "userPassword"]
27}
src/constants.ts
1export const loginSelectors = {
2 emailInput: 'input#email',
3 passwordInput: 'input#password',
4 submitButton: 'button[type="submit"]',
5} as const;
6
7export const STORE_ID = 'result';
8export const STORE_RESULT_KEY = 'OUTPUT';
9
10const getRoadmapId = (url: string) => url.split('/roadmap/')[1].split('-')[0];
11export const isRoadmapInitialRequest = ({ requestUrl, productboardRoadmapUrl }:{requestUrl: string, productboardRoadmapUrl: string}) => (
12 requestUrl.includes(`/${getRoadmapId(productboardRoadmapUrl)}/initial`)
13);
src/handlers.ts
1import { Page, Response } from 'playwright';
2import axios from 'axios';
3import log from '@apify/log';
4import { isArrayOfObjects, isObject } from './utils.js';
5import { FeatureData } from './types.js';
6import { loginSelectors } from './constants.js';
7
8export const handleLogin = async (page: Page, { userEmail, userPassword }: {
9 userEmail: string;
10 userPassword: string
11}) => {
12 await page.click(loginSelectors.emailInput);
13 await page.keyboard.type(userEmail);
14 await page.click(loginSelectors.passwordInput);
15 await page.keyboard.type(userPassword);
16 await page.click(loginSelectors.submitButton);
17};
18
19const isFeature = (test: Record<string, unknown>) => test.featureType === 'feature';
20const isSubfeature = (test: Record<string, unknown>) => test.featureType === 'subfeature';
21
22type RequiredResponse = {
23 releases: Array<Record<string, unknown>>
24 features: Array<Record<string, unknown>>
25 listColumnItems: Array<Record<string, unknown>>
26 releaseAssignments: Array<Record<string, unknown>>
27 columnValues: Array<Record<string, unknown>>
28}
29
30export const parseRequiredResponse = async ({ response } : { response: Response }): Promise<RequiredResponse | null> => {
31 try {
32 const jsonResponse = await response.json();
33 if (!isObject(jsonResponse)) {
34 throw new Error('Invalid response format');
35 }
36 const { releases, features, listColumnItems, releaseAssignments, columnValues } = jsonResponse;
37 if (!isArrayOfObjects(releases)) throw new Error('Invalid data format of releases');
38 if (!isArrayOfObjects(features)) throw new Error('Invalid data format of features');
39 if (!isArrayOfObjects(listColumnItems)) throw new Error('Invalid data format of listColumnItems');
40 if (!isArrayOfObjects(releaseAssignments)) throw new Error('Invalid data format of releaseAssignments');
41 if (!isArrayOfObjects(columnValues)) throw new Error('Invalid data format of columnValues');
42
43 return { releases, features, listColumnItems, releaseAssignments, columnValues };
44 } catch (err) {
45 log.error('Failed to parse response as JSON.', { err });
46 return null;
47 }
48};
49
50const getReleasesMap = (releases: Array<Record<string, unknown>>): Record<string, string> => (
51 Object.fromEntries(releases.map((release) => ([release.id, release.name])))
52);
53
54const getTeamsMap = (listColumnItems: Array<Record<string, unknown>>): Record<string, string> => (
55 Object.fromEntries(listColumnItems.map((columnItem) => ([columnItem.id, columnItem.label])))
56);
57
58type FeatureItemsMap = {[p: string]: FeatureData};
59type SubfeatureItemsMap = {[p: string]: { id: string; parentId: string; title: string; timeline: string | null }};
60const getFeatureItemMaps = (features: Array<Record<string, unknown>>): {
61 featureItemsMap: FeatureItemsMap;
62 subfeatureItemsMap: SubfeatureItemsMap
63} => {
64 const allItems = Object.values(features);
65 const featureItemsMap = Object.fromEntries(allItems
66 .filter(isFeature)
67 .map((item) => (
68 [String(item.id),
69 { title: String(item.name), description: null, timeline: [] as string[], team: null, features: null }])),
70 );
71
72 const subfeatureItemsMap = Object.fromEntries(allItems
73 .filter(isSubfeature)
74 .map((item) => (
75 [String(item.id),
76 { id: String(item.id), title: String(item.name), parentId: String(item.parentId), timeline: null }])));
77 return {
78 featureItemsMap,
79 subfeatureItemsMap,
80 };
81};
82
83const addTeamsToFeatureMaps = (
84 { columnValues, teamsMap, featureItemsMap }:{
85 columnValues: Array<Record<string, unknown>>,
86 teamsMap: Record<string, string>,
87 featureItemsMap: FeatureItemsMap
88 },
89) => {
90 columnValues.forEach(({ value, featureId }) => {
91 if ((typeof featureId !== 'number' && typeof featureId !== 'string') || (typeof value !== 'number' && typeof value !== 'string')) {
92 log.warning('Invalid feature id or value in columnValues // skipped', { featureId, value });
93 return;
94 }
95 if (featureId in featureItemsMap) {
96 featureItemsMap[featureId].team = teamsMap[value];
97 }
98 });
99};
100
101const addReleasesToFeatureMaps = (
102 { releaseAssignments, releasesMap, featureItemsMap, subfeatureItemsMap }:{
103 releaseAssignments: Array<Record<string, unknown>>,
104 releasesMap: Record<string, string>,
105 featureItemsMap: FeatureItemsMap,
106 subfeatureItemsMap: SubfeatureItemsMap
107 },
108) => {
109 releaseAssignments.forEach(({ releaseId, featureId }) => {
110 if ((typeof featureId !== 'number' && typeof featureId !== 'string') || (typeof releaseId !== 'number' && typeof releaseId !== 'string')) {
111 log.warning('Invalid release or feature id in releaseAssignments // skipped', { releaseId, featureId });
112 return;
113 }
114 if (featureId in featureItemsMap) {
115 featureItemsMap[featureId].timeline = [...featureItemsMap[featureId].timeline, releasesMap[releaseId]];
116 }
117 if (featureId in subfeatureItemsMap) {
118 subfeatureItemsMap[featureId].timeline = releasesMap[releaseId];
119 }
120 });
121};
122
123const addSubfeaturesToFeatureMaps = (
124 { featureItemsMap, subfeatureItemsMap }:{
125 featureItemsMap: FeatureItemsMap,
126 subfeatureItemsMap: SubfeatureItemsMap
127 },
128) => {
129 Object.values(subfeatureItemsMap)
130 .forEach((item) => {
131 const { id, title, timeline, parentId } = item;
132
133 featureItemsMap[parentId].features = { ...featureItemsMap[parentId].features, [id]: { title, description: null, timeline } };
134 });
135};
136
137export const handleInitialRequest = async (response: RequiredResponse): Promise<Record<string, FeatureData> | null> => {
138 const { releases, features, listColumnItems, releaseAssignments, columnValues } = response;
139
140 const releasesMap = getReleasesMap(releases);
141 const teamsMap = getTeamsMap(listColumnItems);
142 const { featureItemsMap, subfeatureItemsMap } = getFeatureItemMaps(features);
143
144 addTeamsToFeatureMaps({ columnValues, teamsMap, featureItemsMap });
145 addReleasesToFeatureMaps({ releaseAssignments, releasesMap, featureItemsMap, subfeatureItemsMap });
146
147 addSubfeaturesToFeatureMaps({ featureItemsMap, subfeatureItemsMap });
148
149 return featureItemsMap;
150};
151
152const getFeatureDetail = async ({ featureId, cookieHeader }:{featureId: string, cookieHeader: string}) => (await axios.get(`https://apify.productboard.com/api/features/${featureId}`, { headers: { Cookie: cookieHeader } })).data.feature;
153
154export const getHandleDetailRequest = ({ cookieHeader, featureItemsMap }: { cookieHeader: string, featureItemsMap: Record<string, FeatureData>}) => (
155 async ({ featureId }: { featureId: string }): Promise<FeatureData> => {
156 const featureDetail = await getFeatureDetail({ featureId, cookieHeader });
157
158 const subfeatures = featureItemsMap[featureId].features;
159 if (subfeatures === null) {
160 return {
161 ...featureItemsMap[featureId],
162 description: featureDetail.description,
163 };
164 }
165 const subfeaturesWithDescription = Object.fromEntries(
166 await Promise.all(
167 Object.entries(subfeatures).map(async ([subfeatureId, subfeature]) => {
168 const subfeatureDetail = await getFeatureDetail({ featureId: subfeatureId, cookieHeader });
169 return [subfeatureId, { ...subfeature, description: subfeatureDetail.description }];
170 })));
171
172 return {
173 ...featureItemsMap[featureId],
174 description: featureDetail.description,
175 features: subfeaturesWithDescription,
176 };
177 });
src/main.ts
1import { Actor } from 'apify';
2import { chromium } from 'playwright';
3import { getHandleDetailRequest, parseRequiredResponse, handleInitialRequest, handleLogin } from './handlers.js';
4import { isRoadmapInitialRequest, STORE_ID } from './constants.js';
5import {FeatureData} from "./types.js";
6
7await Actor.init();
8
9// actor input
10interface Input {
11 productboardRoadmapUrl: string;
12 userEmail: string;
13 userPassword: string
14}
15const { productboardRoadmapUrl, userEmail, userPassword } = await Actor.getInput<Input>() ?? {};
16if (!productboardRoadmapUrl || !userEmail || !userPassword) {
17 throw new Error('At least one of the required actor inputs is missing.');
18}
19
20// actor store
21const keyValueStore = await Actor.openKeyValueStore(STORE_ID);
22
23// launch and login
24const browser = await chromium.launch({ headless: true });
25
26const page = await browser.newPage();
27await page.goto(productboardRoadmapUrl);
28await page.waitForLoadState('load');
29
30await handleLogin(page, { userEmail, userPassword });
31
32// catch initial request and get and store data
33await page.waitForResponse(async (response) => {
34 const isInitialRequest = isRoadmapInitialRequest({ requestUrl: response.request().url(), productboardRoadmapUrl });
35 if (!isInitialRequest) return false;
36
37 const requiredResponse = await parseRequiredResponse({ response });
38 if (!requiredResponse) return false;
39
40 const featureItemsMap = await handleInitialRequest(requiredResponse);
41 if (!featureItemsMap) return false;
42
43 const cookieHeader = (await response.request().allHeaders()).cookie;
44 const handleDetailRequest = getHandleDetailRequest({ cookieHeader, featureItemsMap });
45
46 await Promise.all(Object.keys(featureItemsMap).map(async (featureId) => {
47 const valueToStore = await handleDetailRequest({ featureId });
48 await keyValueStore.setValue(featureId, valueToStore);
49 }));
50 return true;
51});
52
53const output: Record<string, FeatureData | null> = {};
54await keyValueStore.forEachKey(async (key) => {
55 output[key] = await keyValueStore.getValue(key);
56});
57
58await Actor.setValue('OUTPUT', output);
59
60await browser.close();
61await Actor.exit();
src/types.ts
1export type SubfeatureData = {
2 title: string,
3 description: string | null,
4 timeline: string | null
5}
6
7export type FeatureData = {
8 title: string,
9 description: string | null,
10 timeline: string[],
11 team: string | null,
12 features: Record<string, SubfeatureData> | null
13}
src/utils.ts
1export const isObject = (test: unknown): test is Record<string, unknown> => test !== null && typeof test === 'object';
2
3export const isArrayOfObjects = (test: unknown): test is Array<Record<string, unknown>> => Array.isArray(test) && test.every(isObject);
Developer
Maintained by Community
Actor Metrics
1 monthly user
-
1 star
Created in Mar 2024
Modified a month ago
Categories