Productboard Roadmap Scraper avatar

Productboard Roadmap Scraper

Try for free

No credit card required

Go to Store
Productboard Roadmap Scraper

Productboard Roadmap Scraper

zuzana_stetinova/productboard-roadmap-scraper
Try for free

No credit card required

This scraper extracts data from Productboard roadmap to a key-value store (OUTPUT) as a map (id -- feature). Scraped feature data includes feature name, description, timeline, teams and connected features with their title, description and timeline.

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ]
20}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv

package.json

1{
2	"name": "productboard-roadmap-scraper",
3	"version": "0.0.1",
4	"type": "module",
5	"description": "Apify actor to scrape data from Productboard roadmap.",
6	"engines": {
7		"node": ">=18.0.0"
8	},
9	"dependencies": {
10		"@apify/log": "^2.5.0",
11		"apify": "^3.1.10",
12		"axios": "^1.6.7",
13		"crawlee": "^3.5.4",
14		"playwright": "1.42.1"
15	},
16	"devDependencies": {
17		"@apify/eslint-config-ts": "^0.3.0",
18		"@apify/tsconfig": "^0.1.0",
19		"@typescript-eslint/eslint-plugin": "^6.7.2",
20		"@typescript-eslint/parser": "^6.7.2",
21		"eslint": "^8.50.0",
22		"tsx": "^4.6.2",
23		"typescript": "^5.3.3"
24	},
25	"scripts": {
26		"start": "npm run start:dev",
27		"start:prod": "node dist/main.js",
28		"start:dev": "tsx src/main.ts",
29		"build": "tsc",
30		"lint": "eslint ./src --ext .ts",
31		"lint:fix": "eslint ./src --ext .ts --fix"
32	},
33	"license": "ISC"
34}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "module": "NodeNext",
5        "moduleResolution": "NodeNext",
6        "target": "ES2022",
7        "outDir": "dist",
8        "noUnusedLocals": false,
9        "skipLibCheck": true,
10        "lib": ["DOM"]
11    },
12    "include": [
13        "./src/**/*"
14    ]
15}

.actor/Dockerfile

1FROM apify/actor-node-playwright-chrome:18 AS builder
2
3COPY --chown=myuser package*.json ./
4
5RUN npm install --include=dev --audit=false
6
7COPY --chown=myuser . ./
8
9RUN npm run build
10
11FROM apify/actor-node-playwright-chrome:18
12
13COPY --chown=myuser package*.json ./
14
15RUN npm --quiet set progress=false \
16    && npm install --omit=dev --omit=optional \
17    && echo "Installed NPM packages:" \
18    && (npm list --omit=dev --all || true) \
19    && echo "Node.js version:" \
20    && node --version \
21    && echo "NPM version:" \
22    && npm --version \
23    && rm -r ~/.npm
24
25COPY --from=builder --chown=myuser /home/myuser/dist ./dist
26
27COPY --chown=myuser . ./
28
29
30CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "productboard-roadmap-scraper",
4	"title": "Productboard roadmap scraper",
5	"description": "Actor is scraping basic data from Productboard roadmap to a key-value store.",
6	"version": "0.0",
7	"input": "./input_schema.json",
8	"dockerfile": "./Dockerfile"
9}

.actor/input_schema.json

1{
2    "title": "Productboard roadmap scraper",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "productboardRoadmapUrl": {
7            "title": "Productboard roadmap URL",
8            "type": "string",
9            "description": "URL of the Productboard roadmap.",
10            "editor": "textfield"
11        },
12        "userEmail": {
13            "title": "Productboard user email",
14            "type": "string",
15            "description": "Email for Productboard account used for access.",
16            "editor": "textfield"
17        },
18        "userPassword": {
19            "title": "Productboard user password",
20            "type": "string",
21            "description": "Password for Productboard account used for access.",
22            "editor": "textfield",
23            "isSecret": true
24        }
25    },
26    "required": ["productboardRoadmapUrl", "userEmail", "userPassword"]
27}

src/constants.ts

1export const loginSelectors = {
2    emailInput: 'input#email',
3    passwordInput: 'input#password',
4    submitButton: 'button[type="submit"]',
5} as const;
6
7export const STORE_ID = 'result';
8export const STORE_RESULT_KEY = 'OUTPUT';
9
10const getRoadmapId = (url: string) => url.split('/roadmap/')[1].split('-')[0];
11export const isRoadmapInitialRequest = ({ requestUrl, productboardRoadmapUrl }:{requestUrl: string, productboardRoadmapUrl: string}) => (
12    requestUrl.includes(`/${getRoadmapId(productboardRoadmapUrl)}/initial`)
13);

src/handlers.ts

1import { Page, Response } from 'playwright';
2import axios from 'axios';
3import log from '@apify/log';
4import { isArrayOfObjects, isObject } from './utils.js';
5import { FeatureData } from './types.js';
6import { loginSelectors } from './constants.js';
7
8export const handleLogin = async (page: Page, { userEmail, userPassword }: {
9    userEmail: string;
10    userPassword: string
11}) => {
12    await page.click(loginSelectors.emailInput);
13    await page.keyboard.type(userEmail);
14    await page.click(loginSelectors.passwordInput);
15    await page.keyboard.type(userPassword);
16    await page.click(loginSelectors.submitButton);
17};
18
19const isFeature = (test: Record<string, unknown>) => test.featureType === 'feature';
20const isSubfeature = (test: Record<string, unknown>) => test.featureType === 'subfeature';
21
22type RequiredResponse = {
23    releases: Array<Record<string, unknown>>
24    features: Array<Record<string, unknown>>
25    listColumnItems: Array<Record<string, unknown>>
26    releaseAssignments: Array<Record<string, unknown>>
27    columnValues: Array<Record<string, unknown>>
28}
29
30export const parseRequiredResponse = async ({ response } : { response: Response }): Promise<RequiredResponse | null> => {
31    try {
32        const jsonResponse = await response.json();
33        if (!isObject(jsonResponse)) {
34            throw new Error('Invalid response format');
35        }
36        const { releases, features, listColumnItems, releaseAssignments, columnValues } = jsonResponse;
37        if (!isArrayOfObjects(releases)) throw new Error('Invalid data format of releases');
38        if (!isArrayOfObjects(features)) throw new Error('Invalid data format of features');
39        if (!isArrayOfObjects(listColumnItems)) throw new Error('Invalid data format of listColumnItems');
40        if (!isArrayOfObjects(releaseAssignments)) throw new Error('Invalid data format of releaseAssignments');
41        if (!isArrayOfObjects(columnValues)) throw new Error('Invalid data format of columnValues');
42
43        return { releases, features, listColumnItems, releaseAssignments, columnValues };
44    } catch (err) {
45        log.error('Failed to parse response as JSON.', { err });
46        return null;
47    }
48};
49
50const getReleasesMap = (releases: Array<Record<string, unknown>>): Record<string, string> => (
51    Object.fromEntries(releases.map((release) => ([release.id, release.name])))
52);
53
54const getTeamsMap = (listColumnItems: Array<Record<string, unknown>>): Record<string, string> => (
55    Object.fromEntries(listColumnItems.map((columnItem) => ([columnItem.id, columnItem.label])))
56);
57
58type FeatureItemsMap = {[p: string]: FeatureData};
59type SubfeatureItemsMap = {[p: string]: { id: string; parentId: string; title: string; timeline: string | null }};
60const getFeatureItemMaps = (features: Array<Record<string, unknown>>): {
61    featureItemsMap: FeatureItemsMap;
62    subfeatureItemsMap: SubfeatureItemsMap
63} => {
64    const allItems = Object.values(features);
65    const featureItemsMap = Object.fromEntries(allItems
66        .filter(isFeature)
67        .map((item) => (
68            [String(item.id),
69                { title: String(item.name), description: null, timeline: [] as string[], team: null, features: null }])),
70    );
71
72    const subfeatureItemsMap = Object.fromEntries(allItems
73        .filter(isSubfeature)
74        .map((item) => (
75            [String(item.id),
76                { id: String(item.id), title: String(item.name), parentId: String(item.parentId), timeline: null }])));
77    return {
78        featureItemsMap,
79        subfeatureItemsMap,
80    };
81};
82
83const addTeamsToFeatureMaps = (
84    { columnValues, teamsMap, featureItemsMap }:{
85        columnValues: Array<Record<string, unknown>>,
86        teamsMap: Record<string, string>,
87        featureItemsMap: FeatureItemsMap
88    },
89) => {
90    columnValues.forEach(({ value, featureId }) => {
91        if ((typeof featureId !== 'number' && typeof featureId !== 'string') || (typeof value !== 'number' && typeof value !== 'string')) {
92            log.warning('Invalid feature id or value in columnValues // skipped', { featureId, value });
93            return;
94        }
95        if (featureId in featureItemsMap) {
96            featureItemsMap[featureId].team = teamsMap[value];
97        }
98    });
99};
100
101const addReleasesToFeatureMaps = (
102    { releaseAssignments, releasesMap, featureItemsMap, subfeatureItemsMap }:{
103        releaseAssignments: Array<Record<string, unknown>>,
104        releasesMap: Record<string, string>,
105        featureItemsMap: FeatureItemsMap,
106        subfeatureItemsMap: SubfeatureItemsMap
107    },
108) => {
109    releaseAssignments.forEach(({ releaseId, featureId }) => {
110        if ((typeof featureId !== 'number' && typeof featureId !== 'string') || (typeof releaseId !== 'number' && typeof releaseId !== 'string')) {
111            log.warning('Invalid release or feature id in releaseAssignments // skipped', { releaseId, featureId });
112            return;
113        }
114        if (featureId in featureItemsMap) {
115            featureItemsMap[featureId].timeline = [...featureItemsMap[featureId].timeline, releasesMap[releaseId]];
116        }
117        if (featureId in subfeatureItemsMap) {
118            subfeatureItemsMap[featureId].timeline = releasesMap[releaseId];
119        }
120    });
121};
122
123const addSubfeaturesToFeatureMaps = (
124    { featureItemsMap, subfeatureItemsMap }:{
125        featureItemsMap: FeatureItemsMap,
126        subfeatureItemsMap: SubfeatureItemsMap
127    },
128) => {
129    Object.values(subfeatureItemsMap)
130        .forEach((item) => {
131            const { id, title, timeline, parentId } = item;
132
133            featureItemsMap[parentId].features = { ...featureItemsMap[parentId].features, [id]: { title, description: null, timeline } };
134        });
135};
136
137export const handleInitialRequest = async (response: RequiredResponse): Promise<Record<string, FeatureData> | null> => {
138    const { releases, features, listColumnItems, releaseAssignments, columnValues } = response;
139
140    const releasesMap = getReleasesMap(releases);
141    const teamsMap = getTeamsMap(listColumnItems);
142    const { featureItemsMap, subfeatureItemsMap } = getFeatureItemMaps(features);
143
144    addTeamsToFeatureMaps({ columnValues, teamsMap, featureItemsMap });
145    addReleasesToFeatureMaps({ releaseAssignments, releasesMap, featureItemsMap, subfeatureItemsMap });
146
147    addSubfeaturesToFeatureMaps({ featureItemsMap, subfeatureItemsMap });
148
149    return featureItemsMap;
150};
151
152const getFeatureDetail = async ({ featureId, cookieHeader }:{featureId: string, cookieHeader: string}) => (await axios.get(`https://apify.productboard.com/api/features/${featureId}`, { headers: { Cookie: cookieHeader } })).data.feature;
153
154export const getHandleDetailRequest = ({ cookieHeader, featureItemsMap }: { cookieHeader: string, featureItemsMap: Record<string, FeatureData>}) => (
155    async ({ featureId }: { featureId: string }): Promise<FeatureData> => {
156        const featureDetail = await getFeatureDetail({ featureId, cookieHeader });
157
158        const subfeatures = featureItemsMap[featureId].features;
159        if (subfeatures === null) {
160            return {
161                ...featureItemsMap[featureId],
162                description: featureDetail.description,
163            };
164        }
165        const subfeaturesWithDescription = Object.fromEntries(
166            await Promise.all(
167                Object.entries(subfeatures).map(async ([subfeatureId, subfeature]) => {
168                    const subfeatureDetail = await getFeatureDetail({ featureId: subfeatureId, cookieHeader });
169                    return [subfeatureId, { ...subfeature, description: subfeatureDetail.description }];
170                })));
171
172        return {
173            ...featureItemsMap[featureId],
174            description: featureDetail.description,
175            features: subfeaturesWithDescription,
176        };
177    });

src/main.ts

1import { Actor } from 'apify';
2import { chromium } from 'playwright';
3import { getHandleDetailRequest, parseRequiredResponse, handleInitialRequest, handleLogin } from './handlers.js';
4import { isRoadmapInitialRequest, STORE_ID } from './constants.js';
5import {FeatureData} from "./types.js";
6
7await Actor.init();
8
9// actor input
10interface Input {
11    productboardRoadmapUrl: string;
12    userEmail: string;
13    userPassword: string
14}
15const { productboardRoadmapUrl, userEmail, userPassword } = await Actor.getInput<Input>() ?? {};
16if (!productboardRoadmapUrl || !userEmail || !userPassword) {
17    throw new Error('At least one of the required actor inputs is missing.');
18}
19
20// actor store
21const keyValueStore = await Actor.openKeyValueStore(STORE_ID);
22
23// launch and login
24const browser = await chromium.launch({ headless: true });
25
26const page = await browser.newPage();
27await page.goto(productboardRoadmapUrl);
28await page.waitForLoadState('load');
29
30await handleLogin(page, { userEmail, userPassword });
31
32// catch initial request and get and store data
33await page.waitForResponse(async (response) => {
34    const isInitialRequest = isRoadmapInitialRequest({ requestUrl: response.request().url(), productboardRoadmapUrl });
35    if (!isInitialRequest) return false;
36
37    const requiredResponse = await parseRequiredResponse({ response });
38    if (!requiredResponse) return false;
39
40    const featureItemsMap = await handleInitialRequest(requiredResponse);
41    if (!featureItemsMap) return false;
42
43    const cookieHeader = (await response.request().allHeaders()).cookie;
44    const handleDetailRequest = getHandleDetailRequest({ cookieHeader, featureItemsMap });
45
46    await Promise.all(Object.keys(featureItemsMap).map(async (featureId) => {
47        const valueToStore = await handleDetailRequest({ featureId });
48        await keyValueStore.setValue(featureId, valueToStore);
49    }));
50    return true;
51});
52
53const output: Record<string, FeatureData | null> = {};
54await keyValueStore.forEachKey(async (key) => {
55    output[key] = await keyValueStore.getValue(key);
56});
57
58await Actor.setValue('OUTPUT', output);
59
60await browser.close();
61await Actor.exit();

src/types.ts

1export type SubfeatureData = {
2    title: string,
3    description: string | null,
4    timeline: string | null
5}
6
7export type FeatureData = {
8    title: string,
9    description: string | null,
10    timeline: string[],
11    team: string | null,
12    features: Record<string, SubfeatureData> | null
13}

src/utils.ts

1export const isObject = (test: unknown): test is Record<string, unknown> => test !== null && typeof test === 'object';
2
3export const isArrayOfObjects = (test: unknown): test is Array<Record<string, unknown>> => Array.isArray(test) && test.every(isObject);
Developer
Maintained by Community

Actor Metrics

  • 1 monthly user

  • 1 star

  • Created in Mar 2024

  • Modified a month ago