YouTube Suggested Videos

Under maintenance

Pricing

Pay per usage

Try for free

Go to Apify Store

YouTube Suggested Videos

Under maintenance

Try for free

Pricing

Pay per usage

Rating

0.0

(0)

Developer

Arron Taylor

Maintained by Community

Actor stats

Bookmarked

Total users

Monthly active users

5 months ago

Last modified

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-puppeteer-chrome:20

# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright

# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY --chown=myuser package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
    && npm install --omit=dev --omit=optional \
    && echo "Installed NPM packages:" \
    && (npm list --omit=dev --all || true) \
    && echo "Node.js version:" \
    && node --version \
    && echo "NPM version:" \
    && npm --version \
    && rm -r ~/.npm

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY --chown=myuser . ./


# Run the image. If you know you won't need headful browsers,
# you can remove the XVFB start script for a micro perf gain.
CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "suggested-videos",
    "title": "Project Puppeteer Crawler JavaScript",
    "description": "Crawlee and Puppeteer project in JavaScript.",
    "version": "0.0",
    "meta": {
        "templateId": "js-crawlee-puppeteer-chrome"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
    "title": "PuppeteerCrawler Template",
    "type": "object",
    "schemaVersion": 1,
    "properties": {
        "startUrls": {
            "title": "Start URLs",
            "type": "array",
            "description": "URLs to start with.",
            "editor": "requestListSources",
            "prefill": [
                {
                    "url": "https://apify.com"
                }
            ]
        }
    }
}

src/main.js

1import { Actor } from 'apify';
2import { PuppeteerCrawler, Dataset } from 'crawlee';
3
4await Actor.init();
5
6// Get input configuration from Apify
7const {
8    startUrls = [{
9        "url": "https://www.youtube.com/watch?v=lE4UXdJSJM4"
10    }],
11    maxResults = 10,
12    proxyConfig = null, // Proxy configuration passed in the input
13} = await Actor.getInput() ?? {};
14
15// Create the proxy configuration based on input if available
16const proxyConfiguration = proxyConfig
17    ? await Actor.createProxyConfiguration(proxyConfig)
18    : await Actor.createProxyConfiguration();
19
20const crawler = new PuppeteerCrawler({
21    proxyConfiguration,
22    maxRequestsPerCrawl: 1,
23    launchContext: {
24        useChrome: true,
25        launchOptions: {
26            headless: true, // Run in headless mode
27        },
28    },
29    requestHandler: async ({ page, request, log }) => {
30        // await page.goto(url, { waitUntil: "networkidle2" }); // Wait for network activity to idle
31
32        // Explicitly wait for the related videos section to load
33        await page.waitForSelector("a[href*='watch']");
34
35        const results = await page.evaluate(() => {
36            const links = Array.from(document.querySelectorAll("a[href*='watch']"))
37                .map((a) => a.href)
38                .filter((href) => href);
39
40            return links.map(url => ({ url }))
41        });
42
43        log.info(`Scraped ${results.length} results.`);
44        await Dataset.pushData(results);
45    },
46});
47
48await crawler.run(startUrls);
49await Actor.exit();

src/routes.js

1import { Dataset, createPuppeteerRouter } from 'crawlee';
2
3export const router = createPuppeteerRouter();
4
5router.addDefaultHandler(async ({ enqueueLinks, log }) => {
6    log.info(`enqueueing new URLs`);
7    await enqueueLinks({
8        globs: ['https://apify.com/*'],
9        label: 'detail',
10    });
11});
12
13router.addHandler('detail', async ({ request, page, log }) => {
14    const title = await page.title();
15    log.info(`${title}`, { url: request.loadedUrl });
16
17    await Dataset.pushData({
18        url: request.loadedUrl,
19        title,
20    });
21});

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
node_modules

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
    "extends": "@apify",
    "root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control

.DS_Store
.idea
dist
node_modules
apify_storage
storage

package.json

{
    "name": "crawlee-puppeteer-javascript",
    "version": "0.0.1",
    "type": "module",
    "description": "This is an example of an Apify actor.",
    "dependencies": {
        "apify": "^3.2.6",
        "crawlee": "^3.11.5",
        "puppeteer": "*"
    },
    "devDependencies": {
        "@apify/eslint-config": "^0.4.0",
        "eslint": "^8.50.0"
    },
    "scripts": {
        "start": "node src/main.js",
        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
    },
    "author": "It's not you it's me",
    "license": "ISC"
}

Scraper

code_crafter/scraper

Code Pioneer

9.3K

2.0

Reviews

hollywood-reporter/reviews

hollywood reporter

Email

contact2353/my-actor-2

TML

Jobs data

neuralhex/jobs-data

NeuralHex

YouTube Transcript Fetcher

crawlmaster/youtube-transcript-fetcher

Crawl Master

2.6

Automation Scraper

mujahid-alkausari/automation-scraper

Alkausari M

Actor 1

code_crafter/actor-1

Code Pioneer

Leads Data

neuralhex/leads-data

NeuralHex

Actor 2

code_crafter/actor-2

Code Pioneer

Test

matej/test

Matej Hamaš

# Specify the base Docker image. You can read more about # the available images at https://crawlee.dev/docs/guides/docker-images # You can also use any other image from Docker Hub. FROM apify/actor-node-puppeteer-chrome:20 # Check preinstalled packages RUN npm ls crawlee apify puppeteer playwright # Copy just package.json and package-lock.json # to speed up the build using Docker layer cache. COPY --chown=myuser package*.json ./ # Install NPM packages, skip optional and development dependencies to # keep the image small. Avoid logging too much and print the dependency # tree for debugging RUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm # Next, copy the remaining files and directories with the source code. # Since we do this after NPM install, quick build will be really fast # for most source file changes. COPY --chown=myuser . ./ # Run the image. If you know you won't need headful browsers, # you can remove the XVFB start script for a micro perf gain. CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

{ "actorSpecification": 1, "name": "suggested-videos", "title": "Project Puppeteer Crawler JavaScript", "description": "Crawlee and Puppeteer project in JavaScript.", "version": "0.0", "meta": { "templateId": "js-crawlee-puppeteer-chrome" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile" }

{ "title": "PuppeteerCrawler Template", "type": "object", "schemaVersion": 1, "properties": { "startUrls": { "title": "Start URLs", "type": "array", "description": "URLs to start with.", "editor": "requestListSources", "prefill": [ { "url": "https://apify.com" } ] } } }

1import { Actor } from 'apify'; 2import { PuppeteerCrawler, Dataset } from 'crawlee'; 3 4await Actor.init(); 5 6// Get input configuration from Apify 7const { 8 startUrls = [{ 9 "url": "https://www.youtube.com/watch?v=lE4UXdJSJM4" 10 }], 11 maxResults = 10, 12 proxyConfig = null, // Proxy configuration passed in the input 13} = await Actor.getInput() ?? {}; 14 15// Create the proxy configuration based on input if available 16const proxyConfiguration = proxyConfig 17 ? await Actor.createProxyConfiguration(proxyConfig) 18 : await Actor.createProxyConfiguration(); 19 20const crawler = new PuppeteerCrawler({ 21 proxyConfiguration, 22 maxRequestsPerCrawl: 1, 23 launchContext: { 24 useChrome: true, 25 launchOptions: { 26 headless: true, // Run in headless mode 27 }, 28 }, 29 requestHandler: async ({ page, request, log }) => { 30 // await page.goto(url, { waitUntil: "networkidle2" }); // Wait for network activity to idle 31 32 // Explicitly wait for the related videos section to load 33 await page.waitForSelector("a[href*='watch']"); 34 35 const results = await page.evaluate(() => { 36 const links = Array.from(document.querySelectorAll("a[href*='watch']")) 37 .map((a) => a.href) 38 .filter((href) => href); 39 40 return links.map(url => ({ url })) 41 }); 42 43 log.info(`Scraped ${results.length} results.`); 44 await Dataset.pushData(results); 45 }, 46}); 47 48await crawler.run(startUrls); 49await Actor.exit();

1import { Dataset, createPuppeteerRouter } from 'crawlee'; 2 3export const router = createPuppeteerRouter(); 4 5router.addDefaultHandler(async ({ enqueueLinks, log }) => { 6 log.info(`enqueueing new URLs`); 7 await enqueueLinks({ 8 globs: ['https://apify.com/*'], 9 label: 'detail', 10 }); 11}); 12 13router.addHandler('detail', async ({ request, page, log }) => { 14 const title = await page.title(); 15 log.info(`${title}`, { url: request.loadedUrl }); 16 17 await Dataset.pushData({ 18 url: request.loadedUrl, 19 title, 20 }); 21});

{ "name": "crawlee-puppeteer-javascript", "version": "0.0.1", "type": "module", "description": "This is an example of an Apify actor.", "dependencies": { "apify": "^3.2.6", "crawlee": "^3.11.5", "puppeteer": "*" }, "devDependencies": { "@apify/eslint-config": "^0.4.0", "eslint": "^8.50.0" }, "scripts": { "start": "node src/main.js", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC" }

YouTube Suggested Videos

YouTube Suggested Videos

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/main.js

src/routes.js

.dockerignore

.editorconfig

.eslintrc

.gitignore

package.json

You might also like

Scraper

Reviews

Email

Jobs data

YouTube Transcript Fetcher

Automation Scraper

Actor 1

Leads Data

Actor 2

Test

Related articles

.actor/Dockerfile

.actor/actor.json

.actor/input_schema.json

src/main.js

src/routes.js

.dockerignore

.editorconfig

.eslintrc

.gitignore

package.json