Federal Credit Union Act avatar
Federal Credit Union Act

Pricing

Pay per usage

Go to Store
Federal Credit Union Act

Federal Credit Union Act

Developed by

Yash Agarwal

Maintained by Community

0.0 (0)

Pricing

Pay per usage

1

Monthly users

1

Runs succeeded

>99%

Last modified

a year ago

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.idea
4dist
5node_modules
6apify_storage
7crawlee_storage
8storage
9
10# Added by Apify CLI
11.venv

Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-playwright-chrome:20
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY --chown=myuser package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14    && npm install --omit=dev --omit=optional \
15    && echo "Installed NPM packages:" \
16    && (npm list --omit=dev --all || true) \
17    && echo "Node.js version:" \
18    && node --version \
19    && echo "NPM version:" \
20    && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY --chown=myuser . ./
26
27
28# Run the image. If you know you won't need headful browsers,
29# you can remove the XVFB start script for a micro perf gain.
30CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

package.json

1{
2    "name": "my-crawler",
3    "version": "0.0.1",
4    "type": "module",
5    "description": "This is an example of a Crawlee project.",
6    "dependencies": {
7        "apify": "^3.2.0",
8        "crawlee": "^3.0.0",
9        "playwright": "*"
10    },
11    "scripts": {
12        "start": "node src/main.js",
13        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1",
14        "postinstall": "npx crawlee install-playwright-browsers"
15    },
16    "author": "It's not you it's me",
17    "license": "ISC"
18}

start.bat

Download

start.sh

Download

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "Federal-credit-union-act",
4	"version": "0.0",
5	"buildTag": "latest",
6	"environmentVariables": {}
7}

src/main.js

1import { Actor } from "apify";
2import { PlaywrightCrawler, Dataset } from "crawlee";
3
4Actor.main(async () => {
5  const crawler = new PlaywrightCrawler({
6    maxRequestsPerCrawl: 100, 
7    async requestHandler({ request, page, log }) {
8      log.info(`Processing ${request.url}`);
9  
10      const title = await page.title();
11        log.info(`${title}`, { url: request.loadedUrl });
12        const result = await page.evaluate(() => {
13            const result = {
14                Category: "Rules and Regulations",
15                Title:
16                    document.querySelector(".chapter-head")?.innerText || "N/A",
17                MainParagraphText:
18                    document.querySelector(".documentViewer")
19                        ?.innerText || "N/A",
20                Links: [],
21                PDFs: [],
22            };
23
24            const linkElements = document.querySelectorAll(
25                ".documentViewer a"
26            );
27            for (const el of Array.from(linkElements)) {
28                const obj = {
29                    linkText: el.innerText || "N/A",
30                    link: el.href || "",
31                };
32                const numericValue = Number(obj.linkText);
33
34                if (
35                    isNaN(numericValue) &&
36                    !obj.link.includes("mailto") &&
37                    obj.link !== ""
38                ) {
39                    if (obj.link.endsWith(".pdf")) {
40                        result.PDFs.push(obj);
41                    } else result.Links.push(obj);
42                }
43            }
44
45            return result;
46        });
47  
48      // Save the data to the dataset
49      await Dataset.pushData({
50        url: request.url,
51        ...result,
52      });
53    },
54  });
55  
56  // Run the crawler with the initial URL
57  await crawler.run(['https://uscode.house.gov/view.xhtml?path=/prelim@title12/chapter14&edition=prelim']);
58});

src/routes.js

1import { createPlaywrightRouter } from 'crawlee';
2
3export const router = createPlaywrightRouter();
4
5router.addDefaultHandler(async ({ enqueueLinks, log }) => {
6    log.info(`enqueueing new URLs`);
7    await enqueueLinks({
8        globs: ['https://crawlee.dev/**'],
9        label: 'detail',
10    });
11});
12
13router.addHandler('detail', async ({ request, page, log, pushData }) => {
14    const title = await page.title();
15    log.info(`${title}`, { url: request.loadedUrl });
16
17    await pushData({
18        url: request.loadedUrl,
19        title,
20    });
21});

Pricing

Pricing model

Pay per usage

This Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.