1const Apify = require('apify');
2
3Apify.main(async () => {
4 const userAgent = 'Mozilla/5.0 (Linux; U; Android 3.2; nl-nl; GT-P6800 Build/HTJ85B) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13';
5 const { hashtag, extendOutputFunction, proxyConfig } = await Apify.getInput();
6
7
8 const requestList = await Apify.openRequestList('hashtags', [
9 { url: `https://mobile.twitter.com/search?q=%23${hashtag}` }
10 ]);
11 const requestQueue = await Apify.openRequestQueue();
12 const proxyConfiguration = await Apify.createProxyConfiguration({
13 ...proxyConfig,
14 });
15
16
17 const browser = await Apify.launchPuppeteer({
18 proxyUrl: proxyConfiguration.newUrl(),
19 userAgent
20 });
21 const page = await browser.newPage();
22 await page.setJavaScriptEnabled(false);
23 await page.goto('https://mobile.twitter.com/home', {
24 waitUntil: 'networkidle0',
25 timeout: 30000
26 });
27 await Apify.utils.sleep(1000);
28 const cookies = await page.cookies();
29 await browser.close();
30
31 const crawler = new Apify.PuppeteerCrawler({
32 requestList,
33 requestQueue,
34 useSessionPool: true,
35 proxyConfiguration,
36 launchPuppeteerOptions: {
37 userAgent,
38 stealth: true,
39 },
40
41
42 gotoFunction: async ({ page, request, session, puppeteerPool }) => {
43 await page.setCookie(...cookies);
44
45 try {
46 return page.goto(request.url, {
47 waitUntil: 'networkidle0',
48 });
49 } catch (e) {
50 session.retire();
51 await puppeteerPool.retire(page.browser());
52
53 throw e;
54 }
55 },
56
57 handlePageFunction: async ({ page, request }) => {
58 console.log(`Processing ${request.url}...`);
59
60 await Apify.utils.puppeteer.injectJQuery(page);
61
62
63 try {
64 const nextHref = await page.$eval('.w-button-more a', el => el.href);
65 await requestQueue.addRequest({ url: nextHref });
66 } catch (err) {
67 console.log(`Url ${request.url} is the last page!`);
68 }
69
70
71 const pageFunction = ($tweets) => {
72 const data = [];
73
74 $tweets.forEach(($tweet) => {
75 data.push({
76 username: $tweet.querySelector('.username').innerText,
77 tweet: $tweet.querySelector('.tweet-text').innerText,
78 });
79 });
80
81 return data;
82 };
83
84 const data = await page.$$eval('table.tweet', pageFunction);
85
86 const userFnData = extendOutputFunction ? await page.evaluate(async (fn) => {
87 const result = eval(fn)(window.jQuery);
88 return typeof result === 'object' ? result : {};
89 }, extendOutputFunction): {};
90
91 await Apify.pushData(data.map((res) => ({ ...res, ...userFnData })));
92 },
93
94 handleFailedRequestFunction: async ({ request }) => {
95 console.log(`Request ${request.url} failed 4 times`);
96 },
97 });
98
99 await crawler.run();
100});