1
2
3
4
5
6
7
8import { Actor } from 'apify';
9
10import { PlaywrightCrawler, RequestQueue } from 'crawlee';
11
12
13await Actor.init();
14const input = await Actor.getInput();
15console.log(111, JSON.stringify(input))
16
17const requestQueue = await RequestQueue.open();
18await requestQueue.addRequest({ url: input.target || '' });
19console.log(222)
20
21const proxyConfiguration = await Actor.createProxyConfiguration();
22const crawler = new PlaywrightCrawler({
23 proxyConfiguration,
24 requestQueue,
25 async requestHandler({ request, page, log }) {
26 log.info(`Processing ${request.url}...`);
27 const title = await page.title();
28 log.info(`${title}`, { url: request.loadedUrl });
29 page.on('response', async req => {
30 console.log(req.url())
31 if (req.url() === 'https://www.threads.net/api/graphql') {
32 const data = await req.json()
33 log.info(JSON.stringify())
34 await Actor.pushData(data.data.data)
35 console.log('success, waiting to exit')
36 await Actor.exit();
37 }
38 });
39 await page.waitForTimeout(6000)
40 },
41});
42
43await crawler.run();
44
45
46await Actor.exit();