Productboard Roadmap Scraper avatar
Productboard Roadmap Scraper

Pricing

Pay per usage

Go to Store
Productboard Roadmap Scraper

Productboard Roadmap Scraper

Developed by

Zuzana Štětinová

Zuzana Štětinová

Maintained by Community

This scraper extracts data from Productboard roadmap to a key-value store (OUTPUT) as a map (id -- feature). Scraped feature data includes feature name, description, timeline, teams and connected features with their title, description and timeline.

0.0 (0)

Pricing

Pay per usage

1

Total users

1

Monthly users

1

Last modified

5 months ago

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"root": true,
"env": {
"browser": true,
"es2020": true,
"node": true
},
"extends": [
"@apify/eslint-config-ts"
],
"parserOptions": {
"project": "./tsconfig.json",
"ecmaVersion": 2020
},
"ignorePatterns": [
"node_modules",
"dist",
"**/*.d.ts"
]
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.DS_Store
.idea
dist
node_modules
apify_storage
storage
# Added by Apify CLI
.venv

package.json

{
"name": "productboard-roadmap-scraper",
"version": "0.0.1",
"type": "module",
"description": "Apify actor to scrape data from Productboard roadmap.",
"engines": {
"node": ">=18.0.0"
},
"dependencies": {
"@apify/log": "^2.5.0",
"apify": "^3.1.10",
"axios": "^1.6.7",
"crawlee": "^3.5.4",
"playwright": "1.42.1"
},
"devDependencies": {
"@apify/eslint-config-ts": "^0.3.0",
"@apify/tsconfig": "^0.1.0",
"@typescript-eslint/eslint-plugin": "^6.7.2",
"@typescript-eslint/parser": "^6.7.2",
"eslint": "^8.50.0",
"tsx": "^4.6.2",
"typescript": "^5.3.3"
},
"scripts": {
"start": "npm run start:dev",
"start:prod": "node dist/main.js",
"start:dev": "tsx src/main.ts",
"build": "tsc",
"lint": "eslint ./src --ext .ts",
"lint:fix": "eslint ./src --ext .ts --fix"
},
"license": "ISC"
}

tsconfig.json

{
"extends": "@apify/tsconfig",
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ES2022",
"outDir": "dist",
"noUnusedLocals": false,
"skipLibCheck": true,
"lib": ["DOM"]
},
"include": [
"./src/**/*"
]
}

.actor/Dockerfile

FROM apify/actor-node-playwright-chrome:18 AS builder
COPY --chown=myuser package*.json ./
RUN npm install --include=dev --audit=false
COPY --chown=myuser . ./
RUN npm run build
FROM apify/actor-node-playwright-chrome:18
COPY --chown=myuser package*.json ./
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
COPY --from=builder --chown=myuser /home/myuser/dist ./dist
COPY --chown=myuser . ./
CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent

.actor/actor.json

{
"actorSpecification": 1,
"name": "productboard-roadmap-scraper",
"title": "Productboard roadmap scraper",
"description": "Actor is scraping basic data from Productboard roadmap to a key-value store.",
"version": "0.0",
"input": "./input_schema.json",
"dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
"title": "Productboard roadmap scraper",
"type": "object",
"schemaVersion": 1,
"properties": {
"productboardRoadmapUrl": {
"title": "Productboard roadmap URL",
"type": "string",
"description": "URL of the Productboard roadmap.",
"editor": "textfield"
},
"userEmail": {
"title": "Productboard user email",
"type": "string",
"description": "Email for Productboard account used for access.",
"editor": "textfield"
},
"userPassword": {
"title": "Productboard user password",
"type": "string",
"description": "Password for Productboard account used for access.",
"editor": "textfield",
"isSecret": true
}
},
"required": ["productboardRoadmapUrl", "userEmail", "userPassword"]
}

src/constants.ts

1export const loginSelectors = {
2 emailInput: 'input#email',
3 passwordInput: 'input#password',
4 submitButton: 'button[type="submit"]',
5} as const;
6
7export const STORE_ID = 'result';
8export const STORE_RESULT_KEY = 'OUTPUT';
9
10const getRoadmapId = (url: string) => url.split('/roadmap/')[1].split('-')[0];
11export const isRoadmapInitialRequest = ({ requestUrl, productboardRoadmapUrl }:{requestUrl: string, productboardRoadmapUrl: string}) => (
12 requestUrl.includes(`/${getRoadmapId(productboardRoadmapUrl)}/initial`)
13);

src/handlers.ts

1import { Page, Response } from 'playwright';
2import axios from 'axios';
3import log from '@apify/log';
4import { isArrayOfObjects, isObject } from './utils.js';
5import { FeatureData } from './types.js';
6import { loginSelectors } from './constants.js';
7
8export const handleLogin = async (page: Page, { userEmail, userPassword }: {
9 userEmail: string;
10 userPassword: string
11}) => {
12 await page.click(loginSelectors.emailInput);
13 await page.keyboard.type(userEmail);
14 await page.click(loginSelectors.passwordInput);
15 await page.keyboard.type(userPassword);
16 await page.click(loginSelectors.submitButton);
17};
18
19const isFeature = (test: Record<string, unknown>) => test.featureType === 'feature';
20const isSubfeature = (test: Record<string, unknown>) => test.featureType === 'subfeature';
21
22type RequiredResponse = {
23 releases: Array<Record<string, unknown>>
24 features: Array<Record<string, unknown>>
25 listColumnItems: Array<Record<string, unknown>>
26 releaseAssignments: Array<Record<string, unknown>>
27 columnValues: Array<Record<string, unknown>>
28}
29
30export const parseRequiredResponse = async ({ response } : { response: Response }): Promise<RequiredResponse | null> => {
31 try {
32 const jsonResponse = await response.json();
33 if (!isObject(jsonResponse)) {
34 throw new Error('Invalid response format');
35 }
36 const { releases, features, listColumnItems, releaseAssignments, columnValues } = jsonResponse;
37 if (!isArrayOfObjects(releases)) throw new Error('Invalid data format of releases');
38 if (!isArrayOfObjects(features)) throw new Error('Invalid data format of features');
39 if (!isArrayOfObjects(listColumnItems)) throw new Error('Invalid data format of listColumnItems');
40 if (!isArrayOfObjects(releaseAssignments)) throw new Error('Invalid data format of releaseAssignments');
41 if (!isArrayOfObjects(columnValues)) throw new Error('Invalid data format of columnValues');
42
43 return { releases, features, listColumnItems, releaseAssignments, columnValues };
44 } catch (err) {
45 log.error('Failed to parse response as JSON.', { err });
46 return null;
47 }
48};
49
50const getReleasesMap = (releases: Array<Record<string, unknown>>): Record<string, string> => (
51 Object.fromEntries(releases.map((release) => ([release.id, release.name])))
52);
53
54const getTeamsMap = (listColumnItems: Array<Record<string, unknown>>): Record<string, string> => (
55 Object.fromEntries(listColumnItems.map((columnItem) => ([columnItem.id, columnItem.label])))
56);
57
58type FeatureItemsMap = {[p: string]: FeatureData};
59type SubfeatureItemsMap = {[p: string]: { id: string; parentId: string; title: string; timeline: string | null }};
60const getFeatureItemMaps = (features: Array<Record<string, unknown>>): {
61 featureItemsMap: FeatureItemsMap;
62 subfeatureItemsMap: SubfeatureItemsMap
63} => {
64 const allItems = Object.values(features);
65 const featureItemsMap = Object.fromEntries(allItems
66 .filter(isFeature)
67 .map((item) => (
68 [String(item.id),
69 { title: String(item.name), description: null, timeline: [] as string[], team: null, features: null }])),
70 );
71
72 const subfeatureItemsMap = Object.fromEntries(allItems
73 .filter(isSubfeature)
74 .map((item) => (
75 [String(item.id),
76 { id: String(item.id), title: String(item.name), parentId: String(item.parentId), timeline: null }])));
77 return {
78 featureItemsMap,
79 subfeatureItemsMap,
80 };
81};
82
83const addTeamsToFeatureMaps = (
84 { columnValues, teamsMap, featureItemsMap }:{
85 columnValues: Array<Record<string, unknown>>,
86 teamsMap: Record<string, string>,
87 featureItemsMap: FeatureItemsMap
88 },
89) => {
90 columnValues.forEach(({ value, featureId }) => {
91 if ((typeof featureId !== 'number' && typeof featureId !== 'string') || (typeof value !== 'number' && typeof value !== 'string')) {
92 log.warning('Invalid feature id or value in columnValues // skipped', { featureId, value });
93 return;
94 }
95 if (featureId in featureItemsMap) {
96 featureItemsMap[featureId].team = teamsMap[value];
97 }
98 });
99};
100
101const addReleasesToFeatureMaps = (
102 { releaseAssignments, releasesMap, featureItemsMap, subfeatureItemsMap }:{
103 releaseAssignments: Array<Record<string, unknown>>,
104 releasesMap: Record<string, string>,
105 featureItemsMap: FeatureItemsMap,
106 subfeatureItemsMap: SubfeatureItemsMap
107 },
108) => {
109 releaseAssignments.forEach(({ releaseId, featureId }) => {
110 if ((typeof featureId !== 'number' && typeof featureId !== 'string') || (typeof releaseId !== 'number' && typeof releaseId !== 'string')) {
111 log.warning('Invalid release or feature id in releaseAssignments // skipped', { releaseId, featureId });
112 return;
113 }
114 if (featureId in featureItemsMap) {
115 featureItemsMap[featureId].timeline = [...featureItemsMap[featureId].timeline, releasesMap[releaseId]];
116 }
117 if (featureId in subfeatureItemsMap) {
118 subfeatureItemsMap[featureId].timeline = releasesMap[releaseId];
119 }
120 });
121};
122
123const addSubfeaturesToFeatureMaps = (
124 { featureItemsMap, subfeatureItemsMap }:{
125 featureItemsMap: FeatureItemsMap,
126 subfeatureItemsMap: SubfeatureItemsMap
127 },
128) => {
129 Object.values(subfeatureItemsMap)
130 .forEach((item) => {
131 const { id, title, timeline, parentId } = item;
132
133 featureItemsMap[parentId].features = { ...featureItemsMap[parentId].features, [id]: { title, description: null, timeline } };
134 });
135};
136
137export const handleInitialRequest = async (response: RequiredResponse): Promise<Record<string, FeatureData> | null> => {
138 const { releases, features, listColumnItems, releaseAssignments, columnValues } = response;
139
140 const releasesMap = getReleasesMap(releases);
141 const teamsMap = getTeamsMap(listColumnItems);
142 const { featureItemsMap, subfeatureItemsMap } = getFeatureItemMaps(features);
143
144 addTeamsToFeatureMaps({ columnValues, teamsMap, featureItemsMap });
145 addReleasesToFeatureMaps({ releaseAssignments, releasesMap, featureItemsMap, subfeatureItemsMap });
146
147 addSubfeaturesToFeatureMaps({ featureItemsMap, subfeatureItemsMap });
148
149 return featureItemsMap;
150};
151
152const getFeatureDetail = async ({ featureId, cookieHeader }:{featureId: string, cookieHeader: string}) => (await axios.get(`https://apify.productboard.com/api/features/${featureId}`, { headers: { Cookie: cookieHeader } })).data.feature;
153
154export const getHandleDetailRequest = ({ cookieHeader, featureItemsMap }: { cookieHeader: string, featureItemsMap: Record<string, FeatureData>}) => (
155 async ({ featureId }: { featureId: string }): Promise<FeatureData> => {
156 const featureDetail = await getFeatureDetail({ featureId, cookieHeader });
157
158 const subfeatures = featureItemsMap[featureId].features;
159 if (subfeatures === null) {
160 return {
161 ...featureItemsMap[featureId],
162 description: featureDetail.description,
163 };
164 }
165 const subfeaturesWithDescription = Object.fromEntries(
166 await Promise.all(
167 Object.entries(subfeatures).map(async ([subfeatureId, subfeature]) => {
168 const subfeatureDetail = await getFeatureDetail({ featureId: subfeatureId, cookieHeader });
169 return [subfeatureId, { ...subfeature, description: subfeatureDetail.description }];
170 })));
171
172 return {
173 ...featureItemsMap[featureId],
174 description: featureDetail.description,
175 features: subfeaturesWithDescription,
176 };
177 });

src/main.ts

1import { Actor } from 'apify';
2import { chromium } from 'playwright';
3import { getHandleDetailRequest, parseRequiredResponse, handleInitialRequest, handleLogin } from './handlers.js';
4import { isRoadmapInitialRequest, STORE_ID } from './constants.js';
5import {FeatureData} from "./types.js";
6
7await Actor.init();
8
9// actor input
10interface Input {
11 productboardRoadmapUrl: string;
12 userEmail: string;
13 userPassword: string
14}
15const { productboardRoadmapUrl, userEmail, userPassword } = await Actor.getInput<Input>() ?? {};
16if (!productboardRoadmapUrl || !userEmail || !userPassword) {
17 throw new Error('At least one of the required actor inputs is missing.');
18}
19
20// actor store
21const keyValueStore = await Actor.openKeyValueStore(STORE_ID);
22
23// launch and login
24const browser = await chromium.launch({ headless: true });
25
26const page = await browser.newPage();
27await page.goto(productboardRoadmapUrl);
28await page.waitForLoadState('load');
29
30await handleLogin(page, { userEmail, userPassword });
31
32// catch initial request and get and store data
33await page.waitForResponse(async (response) => {
34 const isInitialRequest = isRoadmapInitialRequest({ requestUrl: response.request().url(), productboardRoadmapUrl });
35 if (!isInitialRequest) return false;
36
37 const requiredResponse = await parseRequiredResponse({ response });
38 if (!requiredResponse) return false;
39
40 const featureItemsMap = await handleInitialRequest(requiredResponse);
41 if (!featureItemsMap) return false;
42
43 const cookieHeader = (await response.request().allHeaders()).cookie;
44 const handleDetailRequest = getHandleDetailRequest({ cookieHeader, featureItemsMap });
45
46 await Promise.all(Object.keys(featureItemsMap).map(async (featureId) => {
47 const valueToStore = await handleDetailRequest({ featureId });
48 await keyValueStore.setValue(featureId, valueToStore);
49 }));
50 return true;
51});
52
53const output: Record<string, FeatureData | null> = {};
54await keyValueStore.forEachKey(async (key) => {
55 output[key] = await keyValueStore.getValue(key);
56});
57
58await Actor.setValue('OUTPUT', output);
59
60await browser.close();
61await Actor.exit();

src/types.ts

1export type SubfeatureData = {
2 title: string,
3 description: string | null,
4 timeline: string | null
5}
6
7export type FeatureData = {
8 title: string,
9 description: string | null,
10 timeline: string[],
11 team: string | null,
12 features: Record<string, SubfeatureData> | null
13}

src/utils.ts

1export const isObject = (test: unknown): test is Record<string, unknown> => test !== null && typeof test === 'object';
2
3export const isArrayOfObjects = (test: unknown): test is Array<Record<string, unknown>> => Array.isArray(test) && test.every(isObject);