1import { Actor } from 'apify';
2import { PuppeteerCrawler, Dataset } from 'crawlee';
3
4await Actor.init();
5
6
7const {
8 startUrls = [{
9 "url": "https://www.youtube.com/watch?v=lE4UXdJSJM4"
10 }],
11 maxResults = 10,
12 proxyConfig = null,
13} = await Actor.getInput() ?? {};
14
15
16const proxyConfiguration = proxyConfig
17 ? await Actor.createProxyConfiguration(proxyConfig)
18 : await Actor.createProxyConfiguration();
19
20const crawler = new PuppeteerCrawler({
21 proxyConfiguration,
22 maxRequestsPerCrawl: 1,
23 launchContext: {
24 useChrome: true,
25 launchOptions: {
26 headless: true,
27 },
28 },
29 requestHandler: async ({ page, request, log }) => {
30
31
32
33 await page.waitForSelector("a[href*='watch']");
34
35 const results = await page.evaluate(() => {
36 const links = Array.from(document.querySelectorAll("a[href*='watch']"))
37 .map((a) => a.href)
38 .filter((href) => href);
39
40 return links.map(url => ({ url }))
41 });
42
43 log.info(`Scraped ${results.length} results.`);
44 await Dataset.pushData(results);
45 },
46});
47
48await crawler.run(startUrls);
49await Actor.exit();