Rulemaking Proposals
Deprecated
Pricing
Pay per usage
Go to Store
Rulemaking Proposals
Deprecated
0.0 (0)
Pricing
Pay per usage
1
Total users
1
Monthly users
1
Last modified
a year ago
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify", "root": true}
.gitignore
# This file tells Git which files shouldn't be added to source control
.DS_Store.ideadistnode_modulesapify_storagestorage
# Added by Apify CLI.venv
crawlee.json
{ "purgeOnStart": false}
Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://crawlee.dev/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node-playwright-chrome:20
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image. If you know you won't need headful browsers,# you can remove the XVFB start script for a micro perf gain.CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
package.json
{ "name": "consumerfinance", "version": "0.0.1", "type": "commonjs", "description": "consumerfinance rules extractor", "dependencies": { "22": "^0.0.0", "apify": "^3.1.10", "crawlee": "^3.5.4", "fs": "^0.0.1-security", "node-fetch": "^3.3.2", "path": "^0.12.7", "pdf-parse": "^1.1.1", "playwright": "^1.43.1" }, "devDependencies": { "@apify/eslint-config": "^0.4.0", "@types/pdf-parse": "^1.1.4", "eslint": "^8.50.0", "@playwright/test": "^1.43.1" }, "scripts": { "start": "node src/linksdata.js && node src/main.js", "lint": "eslint ./src --ext .js,.jsx", "lint:fix": "eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the no tests yet, sad!\" && exit 1", "postinstall": "npx crawlee install-playwright-browsers" }, "author": "Moazzam Malek"}
playwright.config.js
1// @ts-check2const { defineConfig, devices } = require('@playwright/test');3
4/**5 * Read environment variables from file.6 * https://github.com/motdotla/dotenv7 */8// require('dotenv').config();9
10/**11 * @see https://playwright.dev/docs/test-configuration12 */13module.exports = defineConfig({14 testDir: './tests',15 /* Run tests in files in parallel */16 fullyParallel: true,17 /* Fail the build on CI if you accidentally left test.only in the source code. */18 forbidOnly: !!process.env.CI,19 /* Retry on CI only */20 retries: process.env.CI ? 2 : 0,21 /* Opt out of parallel tests on CI. */22 workers: process.env.CI ? 1 : undefined,23 /* Reporter to use. See https://playwright.dev/docs/test-reporters */24 reporter: 'html',25 /* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */26 use: {27 /* Base URL to use in actions like `await page.goto('/')`. */28 // baseURL: 'http://127.0.0.1:3000',29
30 /* Collect trace when retrying the failed test. See https://playwright.dev/docs/trace-viewer */31 trace: 'on-first-retry',32 },33
34 /* Configure projects for major browsers */35 projects: [36 {37 name: 'chromium',38 use: { ...devices['Desktop Chrome'] },39 },40
41 {42 name: 'firefox',43 use: { ...devices['Desktop Firefox'] },44 },45
46 {47 name: 'webkit',48 use: { ...devices['Desktop Safari'] },49 },50
51 /* Test against mobile viewports. */52 // {53 // name: 'Mobile Chrome',54 // use: { ...devices['Pixel 5'] },55 // },56 // {57 // name: 'Mobile Safari',58 // use: { ...devices['iPhone 12'] },59 // },60
61 /* Test against branded browsers. */62 // {63 // name: 'Microsoft Edge',64 // use: { ...devices['Desktop Edge'], channel: 'msedge' },65 // },66 // {67 // name: 'Google Chrome',68 // use: { ...devices['Desktop Chrome'], channel: 'chrome' },69 // },70 ],71
72 /* Run your local dev server before starting the tests */73 // webServer: {74 // command: 'npm run start',75 // url: 'http://127.0.0.1:3000',76 // reuseExistingServer: !process.env.CI,77 // },78
79 globalTimeout: 60*1000,80});
start.bat
Downloadstart.sh
Download.actor/actor.json
{ "actorSpecification": 1, "name": "Rulemaking-proposals", "title": "Project Playwright Crawler JavaScript", "description": "Crawlee and Playwright project in JavaScript.", "version": "0.0", "meta": { "templateId": "js-crawlee-playwright-chrome" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
.actor/Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://crawlee.dev/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node-playwright-chrome:18
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image. If you know you won't need headful browsers,# you can remove the XVFB start script for a micro perf gain.CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
.actor/input_schema.json
{ "title": "PlaywrightCrawler Template", "type": "object", "schemaVersion": 1, "properties": { "startUrls": { "title": "Start URLs", "type": "array", "description": "URLs to start with.", "editor": "requestListSources", "prefill": [ { "url": "https://apify.com" } ] } }}
src/get.js
1const fs = require('fs').promises;2const path = require('path');3
4async function getLinks() {5 const filePath = path.join(__dirname, 'links.json');6 try {7 const data = await fs.readFile(filePath, 'utf-8');8 console.log(data.length)9 return JSON.parse(data);10 } catch (error) {11 console.error('Error reading links.json:', error);12 return [];13 }14}15
16module.exports = { getLinks };
src/links.json
[ { "href": "https://www.regulations.gov/document/NCUA-2024-0026-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0070-0007" }, { "href": "https://www.regulations.gov/document/NCUA-2024-0008-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0142-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0137-0003" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0117-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0023-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0082-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0118-0003" }, { "href": "https://www.regulations.gov/document/NCUA_FRDOC_0001-0347" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0072-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0070-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0061-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0019-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0043-0009" }, { "href": "https://www.regulations.gov/document/NCUA-2023-0045-0001" }, { "href": "https://www.regulations.gov/document/NCUA-2022-0138-0023" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0179/" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0099" }, { "href": "https://www.regulations.gov/document/NCUA_FRDOC_0001-0310" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0185" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0145" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0138" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0132" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0123" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0099" }, { "href": "https://www.regulations.gov/document/NCUA-2022-0008-0007" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0008" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0005" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0041" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0038" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0072" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0125" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0040" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0100" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0149" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0079" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0036" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0127" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0102" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0072" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0074" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0114" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0107" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0038" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0056" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0036" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0037" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0045" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0042" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0056" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0098" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0033" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0028" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0010" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0030" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0006" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0042" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0080" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0081" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0043" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0125" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0019" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0021" }, { "href": "https://www.regulations.gov/docket/NCUA-2021-0001" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0114" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0116" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0098" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0033" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0107" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0036" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0026" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0080" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0081" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0074" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0037" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0064" }, { "href": "https://www.regulations.gov/document/NCUA_FRDOC_0001-0143" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0054" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0056" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0043" }, { "href": "https://www.regulations.gov/docket/NCUA-2019-0112" }, { "href": "https://www.regulations.gov/docket/NCUA-2019-0112" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0035" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0044" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0033" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2020-0015" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0034" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0025" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0033" }, { "href": "https://www.regulations.gov/docket/NCUA-2019-0112" }, { "href": "https://www.regulations.gov/document/NCUA_FRDOC_0001-0076" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0028" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0026" }, { "href": "https://www.regulations.gov/docket/NCUA-2019-0121" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0027" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0030" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0017" }, { "href": "https://www.regulations.gov/docket/NCUA-2019-0120" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0044" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0030" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0051" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0033" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0025" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0028" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0029" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0041" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0027" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0075" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0032" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0031" }, { "href": "https://www.regulations.gov/docket/NCUA-2022-0030" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0039" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0017" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0050" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0051" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0040" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0041" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0039" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0033" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0034" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0031" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0027" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0030" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0024" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0023" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0017" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0009" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0006" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0005" }, { "href": "https://www.regulations.gov/docket/NCUA-2018-0003" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0055" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0054" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0049" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0045" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0043" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0044" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0042" }, { "href": "/about/pages/budget-strategic-planning/budget-comments-2018.aspx" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0036" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0038" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0032" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0031" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0030" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0025" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0022" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0024" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0026" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0023" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0017" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0018" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0007" }, { "href": "https://www.regulations.gov/docket/NCUA-2017-0002" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0089" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0088" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0081" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0085" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0039" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0073" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0075" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0076" }, { "href": "/About/Pages/budget-strategic-planning/budget-comments-2017.aspx" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0044" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0039" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0040" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0033" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0024" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0013" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0004" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0006" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0003" }, { "href": "https://www.regulations.gov/docket/NCUA-2016-0005" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0060" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0059" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0055" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0049" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0048" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0044" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0042" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0043" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0038" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0037" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0034" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0030" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0031" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0029" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0019" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0018" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0020" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0021" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0015" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0013" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0010" }, { "href": "https://www.regulations.gov/docket/NCUA-2015-0011" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0043" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0037" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0036" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0034" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0031" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0027" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0026" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0028" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0025" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0020" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0017" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0045" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0008" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0004" }, { "href": "https://www.regulations.gov/docket/NCUA-2014-0007" }, { "href": "/About/Pages/budget-strategic-planning/comments.aspx" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0123" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0124" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0122" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0125" }, { "href": "https://www.regulations.gov/docket/FRS-2013-0386" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0120" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0112" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0113" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0116" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0114" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0111" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0095" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0096" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0060" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0062" }, { "href": "https://www.regulations.gov/docket/FRS-2013-0227" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0037" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0034" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0030" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0029" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0021" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0014" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0013" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0011" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0003" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0005" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0004" }, { "href": "https://www.regulations.gov/docket/NCUA-2013-0006" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0041" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0040" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0038" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0015" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0026" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0017" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0010" }, { "href": "https://www.regulations.gov/docket/NCUA-2012-0005" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0073" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0076" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0067" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0066" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0063" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0055" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0048" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0039" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0041" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0034" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0035" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0032" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0028" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0029" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0016" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0014" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0011" }, { "href": "https://www.regulations.gov/docket/NCUA-2011-0003" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0060" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0062" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0061" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0051" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0048" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0047" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0045" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0040" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0039" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0035" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0031" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0022" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0020" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0028" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0028" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0005" }, { "href": "https://www.regulations.gov/docket/NCUA-2010-0003" }]
src/linksdata.js
1const { chromium } = require("playwright");2const fs = require('fs').promises;3const path = require('path');4
5async function scrapeLinks() {6 const browser = await chromium.launch();7 const page = await browser.newPage();8
9 try {10 await page.goto("https://ncua.gov/regulation-supervision/rulemakings-proposals-comment");11
12 const links = await page.$$eval(".dynamic-table-main tbody tr td a", (elements) => {13 return elements.map((element) => ({14 href: element.getAttribute("href"),15 }));16 });17
18 const fileName = 'links.json';19 const folderName = 'src';20 const folderPath = path.join(process.cwd(), folderName);21 const filePath = path.join(folderPath, fileName);22
23 // Create the folder if it doesn't exist24 await fs.mkdir(folderPath, { recursive: true });25
26 await fs.writeFile(filePath, JSON.stringify(links, null, 2));27 console.log(`Links saved to ${filePath}`);28 } catch (error) {29 console.error("Error:", error);30 } finally {31 await browser.close();32 }33}34
35scrapeLinks();
src/main.js
1const { Actor } = require("apify");2const { PlaywrightCrawler, Dataset } = require("crawlee");3const { getLinks } = require("./get");4const fs = require('fs').promises;5const path = require('path');6
7Actor.main(async () => {8 const initialArrayOfObjects = await getLinks();9 const links = initialArrayOfObjects.map((obj) => obj.href);10
11 const validLinks = links.filter(link => {12 return typeof link === 'string' && link.trim() !== '' && link.startsWith('http');13 });14 console.log(validLinks.length);15 if (validLinks.length === 0) {16 throw new Error("No valid links found");17 }18 const crawler = new PlaywrightCrawler({19 async requestHandler({ request, page, log }) {20 try {21 const url = request.url;22 log.info(`Processing ${url}`);23
24 await page.goto(url);25
26 const selectorExists = await page.waitForSelector('//*[@class="px-2"]')27 .then(() => true)28 .catch(() => false);29
30 let paragraphs = [];31 if (selectorExists) {32 paragraphs = await page.$$eval('p', elements =>33 elements.map(element => element.innerText.trim())34 );35 }36
37 const data = { url, paragraphs };38
39 // Push data to the dataset40 41 if(paragraphs.length == 0){42 paragraphs ="We're sorry, an error has occurred\n\nA general error occurred while processing your request.\n\nWhat could have caused this?\n\nThe link may be outdated or incorrect.\n\nThe document or docket you are looking for was not found.\n\nWhat can you do?\n\nPlease try typing in the URL again or return to the home page.\n\nIf you believe you should not be getting this error page or the problem persists, please contact the Help Desk"43 }44 await Dataset.pushData(data);45 log.info(`Data pushed to dataset for URL: ${url}`);46 47 } catch (error) {48 log.error(`An error occurred while processing URL ${url}: ${error}`);49
50 // Push error data to the dataset51 const errorData = { url, errorMessage: error.toString() };52 await Dataset.pushData(errorData);53
54 log.error(`Error data pushed to dataset for URL: ${url}`);55 }56 },57 });58 await crawler.addRequests(validLinks);59
60 // Run the crawler61 await crawler.run();62
63 // Save the paragraphsByLink array to a JSON file if needed64 // const fileName = 'paragraphsByLink.json';65 // const filePath = path.join(process.cwd(), fileName);66 // await fs.writeFile(filePath, JSON.stringify(paragraphsByLink, null, 2));67 // console.log(`Data saved to ${filePath}`);68});
src/routes.js
1const { Dataset, createPlaywrightRouter, LoggerText } = require("crawlee");2const pdfParse = require("pdf-parse");3const router = createPlaywrightRouter();4const { load } = require("cheerio");5const fs = require("fs");6
7router.addDefaultHandler(async ({ request, page, enqueueLinks, log }) => {8 const title = await page.title();9 log.info(`${title}`, { url: request.loadedUrl });10 x = title;11
12 await enqueueLinks({13 selector: ".field-type-text_with_summary h2 a[href*=regulatory-review]",14 label: "detail5",15 });16
17 await enqueueLinks({18 selector:19 ".field-type-text_with_summary h2 a[href*=regulatory-reform-agenda]",20 label: "detail6",21 });22
23 await enqueueLinks({24 selector:25 ".field-type-text_with_summary h2 a[href*=interpretive-rulings-policy-statements]",26 label: "detail7",27 });28
29 // async function fetchPDF(pdfLink) {30 // const { default: fetch } = await import("node-fetch");31 // try {32 // const mkdirAsync = promisify(fs.mkdir);33 // const directory = "storage/PDFs";34 // await mkdirAsync(directory, { recursive: true });35 // const writeFileAsync = promisify(fs.writeFile);36 // const response = await fetch(pdfLink);37 // const buffer = await response.buffer();38 // const pdfText = await pdfParse(buffer);39 // const serialNumber = pdfLink.substring(40 // pdfLink.lastIndexOf("/") + 1,41 // pdfLink.lastIndexOf(".")42 // );43 // const filename = `storage/PDFs/${serialNumber}.json`; // Updated path44 // const jsonData = JSON.stringify(45 // { link: pdfLink, text: pdfText.text },46 // null,47 // 248 // );49 // await writeFileAsync(filename, jsonData);50 // console.log(`JSON file "${filename}" created successfully.`);51 // } catch (error) {52 // const writeFileAsync = promisify(fs.writeFile);53 // const serialNumber = pdfLink.substring(54 // pdfLink.lastIndexOf("/") + 1,55 // pdfLink.lastIndexOf(".")56 // );57 // const filename = `storage/PDFs/${serialNumber}.json`; // Updated path58 // const jsonData = JSON.stringify(59 // {60 // link: pdfLink,61 // error: `Error fetching or parsing PDF from ${pdfLink} and ${error}`,62 // },63 // null,64 // 265 // );66 // await writeFileAsync(filename, jsonData);67 // console.error(68 // `Error fetching or parsing PDF from ${pdfLink}:`,69 // error70 // );71 // }72 // }73});74
75router.addHandler("detail4", async ({ request, page, log }) => {76 try {77 const title = await page.title();78 log.info(`${title}`, { url: request.loadedUrl });79 // const result = await page.evaluate(() => {80 // const result = {81 // Category: "Rules and Regulations",82 // Title:83 // document.querySelector(".page-title")?.innerText || "N/A",84 // MainParagraphText:85 // document.querySelector(".field-type-text_with_summary")86 // ?.innerText || "N/A",87 // Links: [],88 // PDFs: [],89 // };90
91 // const linkElements = document.querySelectorAll(92 // ".field-type-text_with_summary a"93 // );94 // for (const el of Array.from(linkElements)) {95 // const obj = {96 // linkText: el.innerText || "N/A",97 // link: el.href || "",98 // };99 // const numericValue = Number(obj.linkText);100
101 // if (102 // isNaN(numericValue) &&103 // !obj.link.includes("mailto") &&104 // obj.link !== ""105 // ) {106 // if (obj.link.endsWith(".pdf")) {107 // result.PDFs.push(obj);108 // } else result.Links.push(obj);109 // }110 // }111
112 // return result;113 // });114
115 if (request.errorMessages.includes("Data item is too large")) {116 await Dataset.pushData({117 url: request.url,118 ...result,119 PDFs: PDFs.map((item) => ({120 ...item,121 text: "Please retrieve manually due to size limitations",122 })),123 Links: Links.map((item) => ({124 ...item,125 text: "Please retrieve manually due to size limitations",126 })),127 });128 } else {129 await Dataset.pushData({130 url2: request.url,131 // ...result,132 // Links,133 // PDFs,134 // InnerPDFs135 });136 }137 } catch (error) {138 log.error(139 `An unexpected error occurred: ${error.message || error.code}`140 );141 }142});143
144
145router.addHandler("detail5", async ({ request, page, log }) => {146 try {147 const title = await page.title();148 const url = request.loadedUrl;149 log.info(`${title}`, { url: request.loadedUrl });150 const result = await page.evaluate(() => {151 const result = {152 Category: "Rules and Regulations",153 Title:154 document.querySelector(".page-title")?.innerText || "N/A",155 MainParagraphText:156 document.querySelector(".field-type-text_with_summary")157 ?.innerText || "N/A",158 Links: [],159 PDFs: [],160 };161
162 const linkElements = document.querySelectorAll(163 ".field-type-text_with_summary a"164 );165 for (const el of Array.from(linkElements)) {166 const obj = {167 linkText: el.innerText || "N/A",168 link: el.href || "",169 };170 const numericValue = Number(obj.linkText);171
172 if (173 isNaN(numericValue) &&174 !obj.link.includes("mailto") &&175 obj.link !== ""176 ) {177 if (obj.link.endsWith(".pdf")) {178 result.PDFs.push(obj);179 } else result.Links.push(obj);180 }181 }182
183 return result;184 });185
186 if (request.errorMessages.includes("Data item is too large")) {187 await Dataset.pushData({188 url: request.url,189 ...result,190 PDFs: PDFs.map((item) => ({191 ...item,192 text: "Please retrieve manually due to size limitations",193 })),194 Links: Links.map((item) => ({195 ...item,196 text: "Please retrieve manually due to size limitations",197 })),198 });199 } else {200 await Dataset.pushData({201 url2: request.url,202 ...result,203 // Links,204 // PDFs,205 // InnerPDFs206 });207 }208 } catch (error) {209 log.error(210 `An unexpected error occurred: ${error.message || error.code}`211 );212 }213});214
215router.addHandler("detail6", async ({ request, page, log }) => {216 try {217 const title = await page.title();218 const url = request.loadedUrl;219 log.info(`${title}`, { url: request.loadedUrl });220 const result = await page.evaluate(() => {221 const result = {222 Category: "Rules and Regulations",223 Title:224 document.querySelector(".page-title")?.innerText || "N/A",225 MainParagraphText:226 document.querySelector(".field-type-text_with_summary")227 ?.innerText || "N/A",228 Links: [],229 PDFs: [],230 };231
232 const linkElements = document.querySelectorAll(233 ".field-type-text_with_summary a"234 );235 for (const el of Array.from(linkElements)) {236 const obj = {237 linkText: el.innerText || "N/A",238 link: el.href || "",239 };240 const numericValue = Number(obj.linkText);241
242 if (243 isNaN(numericValue) &&244 !obj.link.includes("mailto") &&245 obj.link !== ""246 ) {247 if (obj.link.endsWith(".pdf")) {248 result.PDFs.push(obj);249 } else result.Links.push(obj);250 }251 }252
253 return result;254 });255
256 // const Links = (257 // await Promise.allSettled(258 // result.Links.map(259 // (link) =>260 // new Promise(async (res, rej) => {261 // try {262 // // let innerTitle;263 // if (!link.link.includes(".pdf")) {264 // const FederalRegisterResponse =265 // await page.request.fetch(link.link);266 // const $ = load(267 // await FederalRegisterResponse.text()268 // );269 // const contentDiv = $(".layout-content");270 // innerLinks = contentDiv271 // .find("a")272 // .map((i, el) => $(el).attr("href"))273 // .get();274 // innerLinks = innerLinks.map((innerLink) => {275 // if (!innerLink.startsWith("http")) {276 // return (277 // "https://ncua.gov" + innerLink278 // );279 // }280 // return innerLink;281 // });282 // innerLinks = Array.from(283 // new Set(innerLinks)284 // );285 // PDFLinks = innerLinks.filter((link) =>286 // link.endsWith(".pdf")287 // );288 // innerLinks = innerLinks.filter(289 // (link) => !link.endsWith(".pdf")290 // );291 // innerLinks = innerLinks.filter(292 // (link) => !link.endsWith("@ncua.gov")293 // );294 // innerLinks = innerLinks.filter(295 // (link) => !link.includes("#ftn")296 // );297 // innerText = $("p").text();298 // }299 // res({300 // ...link,301 // innerText,302 // innerLinks,303 // PDFLinks,304 // });305 // } catch (e) {306 // // console.log(e);307 // res({308 // ...link,309 // // error: e.message || e.code || true,310 // error: "404 page not found",311 // });312 // }313 // })314 // )315 // )316 // ).map((p) => p.value);317
318 // const InnerPDFs = (319 // await Promise.allSettled(320 // Links.map(321 // (pdf) =>322 // new Promise(async (res, rej) => {323 // try {324 // const pdfDataArray = [];325
326 // // Loop through all the PDF links in the `PDFLinks` array327 // for (const pdfLink of pdf.PDFLinks) {328 // try {329 // // Fetch the PDF content from the current link330 // const link = pdfLink;331 // const pdfResponse =332 // await page.request.fetch(pdfLink);333
334 // // Parse the fetched PDF using pdf-parse335 // const pdfText = await pdfParse(336 // (337 // await pdfResponse.body()338 // ).buffer339 // );340
341 // // Store the parsed information for this PDF342 // pdfDataArray.push({343 // link,344 // text: pdfText.text,345 // });346 // } catch (innerError) {347 // pdfDataArray.push({348 // link: pdfLink,349 // error:350 // innerError.message ||351 // innerError.code ||352 // true,353 // });354 // }355 // }356
357 // res({358 // ...pdf,359 // pdfDataArray,360 // });361 // } catch (e) {362 // // console.log(e);363 // res({364 // ...pdf,365 // error: e.message || e.code || true,366 // });367 // }368 // })369 // )370 // )371 // ).map((p) => p.value);372
373 const PDFs = (374 await Promise.allSettled(375 result.PDFs.map(376 (pdf) =>377 new Promise(async (res, rej) => {378 try {379 const pdfResponse = await page.request.fetch(380 pdf.link381 );382
383 // Parse the PDF using pdf-parse384 const pdfText = await pdfParse(385 (386 await pdfResponse.body()387 ).buffer388 );389
390 res({391 ...pdf,392 text: pdfText.text,393 });394 } catch (e) {395 // console.log(e);396 res({397 ...pdf,398 error: e.message || e.code || true,399 });400 }401 })402 )403 )404 ).map((p) => p.value);405
406 // If the request has large data errors, mark the data for manual processing407 if (request.errorMessages.includes("Data item is too large")) {408 await Dataset.pushData({409 url: request.url,410 ...result,411 PDFs: PDFs.map((item) => ({412 ...item,413 text: "Please retrieve manually due to size limitations",414 })),415 Links: Links.map((item) => ({416 ...item,417 text: "Please retrieve manually due to size limitations",418 })),419 });420 } else {421 await Dataset.pushData({422 url: request.url,423 ...result,424 // Links,425 PDFs,426 // InnerPDFs427 });428 }429 } catch (error) {430 log.error(431 `An unexpected error occurred: ${error.message || error.code}`432 );433 }434});435
436router.addHandler("detail7", async ({ request, page, log }) => {437 try {438 const title = await page.title();439 log.info(`${title}`, { url: request.loadedUrl });440 const result = await page.evaluate(() => {441 const result = {442 Category: "Rules and Regulations",443 Title:444 document.querySelector(".page-title")?.innerText || "N/A",445 MainParagraphText:446 document.querySelector(".field-type-text_with_summary")447 ?.innerText || "N/A",448 Links: [],449 PDFs: [],450 };451
452 const linkElements = document.querySelectorAll(453 ".field-type-text_with_summary a"454 );455 for (const el of Array.from(linkElements)) {456 const obj = {457 linkText: el.innerText || "N/A",458 link: el.href || "",459 };460 const numericValue = Number(obj.linkText);461
462 if (463 isNaN(numericValue) &&464 !obj.link.includes("mailto") &&465 obj.link !== ""466 ) {467 if (obj.link.endsWith(".pdf")) {468 result.PDFs.push(obj);469 } else result.Links.push(obj);470 }471 }472
473 return result;474 });475
476 const PDFs = (477 await Promise.allSettled(478 result.PDFs.map(479 (pdf) =>480 new Promise(async (res, rej) => {481 try {482 const pdfResponse = await page.request.fetch(483 pdf.link484 );485
486 // Parse the PDF using pdf-parse487 const pdfText = await pdfParse(488 (489 await pdfResponse.body()490 ).buffer491 );492
493 res({494 ...pdf,495 text: pdfText.text,496 });497 } catch (e) {498 // console.log(e);499 res({500 ...pdf,501 error: e.message || e.code || true,502 });503 }504 })505 )506 )507 ).map((p) => p.value);508
509 if (request.errorMessages.includes("Data item is too large")) {510 await Dataset.pushData({511 url: request.url,512 ...result,513 PDFs: PDFs.map((item) => ({514 ...item,515 text: "Please retrieve manually due to size limitations",516 })),517 Links: Links.map((item) => ({518 ...item,519 text: "Please retrieve manually due to size limitations",520 })),521 });522 } else {523 await Dataset.pushData({524 url: request.url,525 ...result,526 // Links,527 PDFs,528 // InnerPDFs529 });530 }531 } catch (error) {532 log.error(533 `An unexpected error occurred: ${error.message || error.code}`534 );535 }536});537
538module.exports = { router };