1
2
3
4
5
6
7
8
9
10import { Octokit } from "@octokit/rest";
11import Apify from "apify";
12import { sleepUntil } from "./_utils/common";
13const { log } = Apify.utils;
14
15Apify.main(async () => {
16 const input = await Apify.getInput();
17 const {
18 queries = [`meteor`],
19 token,
20 debug = false,
21 } = input ?? {};
22 if (debug) Apify.utils.log.setLevel(Apify.utils.log.LEVELS.DEBUG);
23
24 if (!token)
25 log.warning(
26 `No token provided, will use anonymous access, which is severely limited and may cause rate limit issues.`
27 );
28
29
30 const requestQueue = await Apify.openRequestQueue();
31 for (const query of queries) {
32 const requestLike = {
33 url: `https://dummy.com`,
34 uniqueKey: query,
35 userData: {
36 query,
37
38 },
39 };
40 await requestQueue.addRequest(requestLike);
41 }
42
43
44 const octokit = new Octokit({ auth: token });
45
46 let rateLimitReset;
47
48 const crawler = new Apify.BasicCrawler({
49 handleRequestTimeoutSecs: 60 * 2,
50 maxRequestRetries: 0,
51 requestQueue,
52 handleRequestFunction: async (context) => {
53 const { request } = context;
54 const results = [];
55 const { filter, query } = request.userData;
56 log.info(
57 `Processing query "${query}", specifying filter "${filter || ``}"`
58 );
59
60 let totalCount;
61 const q = filter ? `${query} stars:<${filter}` : query;
62
63 try {
64
65 await octokit.paginate(
66 `GET /search/repositories`,
67 { q, per_page: 100 },
68 (response) => {
69 totalCount = response.data[`total_count`];
70
71 results.push(...response.data.map(pickRepo));
72 const rateLimitLimit = response.headers[`x-ratelimit-limit`];
73 const rateLimitUsed = response.headers[`x-ratelimit-used`];
74 rateLimitReset = new Date(
75 parseInt(response.headers[`x-ratelimit-reset`]) * 1000
76 );
77 log.debug(
78 `Scraped: ${
79 results.length
80 } | Rate limit: ${rateLimitUsed}/${rateLimitLimit} (resets ${rateLimitReset.toISOString()}`
81 );
82 }
83 );
84 } catch (err) {
85 if (err.message.includes(`rate limit exceeded`)) {
86 const resetPlusSlightDelay = rateLimitReset
87 ? new Date(rateLimitReset.getTime() + 5000)
88 : new Date(new Date().getTime() + 65 * 1000);
89 log.warning(
90 `Rate limit exceeded, will retry at ${resetPlusSlightDelay.toISOString()}`
91 );
92 await sleepUntil(rateLimitReset);
93 } else {
94 throw err;
95 }
96 }
97
98 if (totalCount > results.length) {
99
100 console.log(
101 `Total count ${totalCount} is higher than current count ${results.length}, continuing...`
102 );
103 const lastResult = results[results.length - 1];
104 const lastResultStars = lastResult.stars + 1;
105 const nextFilter = Math.min(
106 (filter || Infinity) - 1,
107 lastResultStars
108 );
109 if (nextFilter > 0) {
110 await requestQueue.addRequest({
111 url: request.url,
112 userData: { query, filter: nextFilter },
113 uniqueKey: `${query}|${nextFilter}`,
114 });
115 }
116 }
117 await Apify.pushData(results);
118 },
119 });
120
121 await crawler.run();
122 log.info(`That's all folks!`);
123});
124
125function pickRepo(repo) {
126 return {
127 owner: repo.owner.login,
128 name: repo.name,
129 url: repo.html_url,
130 fork: repo.fork,
131 description: repo.description,
132 created_at: repo.created_at,
133 updated_at: repo.updated_at,
134 pushed_at: repo.pushed_at,
135 homepage: repo.homepage,
136 size: repo.size,
137 stars: repo.stargazers_count,
138 open_issues: repo.open_issues_count,
139 forks: repo.forks_count,
140 language: repo.language,
141 archived: repo.archived,
142 disabled: repo.disabled,
143 };
144}