1
2
3
4
5
6
7
8import { Actor, log } from "apify";
9
10
11
12interface Input {
13 repoUrls: string[];
14 includeReadme: boolean;
15 includeFileTree: boolean;
16 fileTreeDepth: number;
17 includeDependencies: boolean;
18 includeIssues: boolean;
19 issueLimit: number;
20 includeContributors: boolean;
21 includeLanguages: boolean;
22 includeReleases: boolean;
23 releaseLimit: number;
24 githubToken?: string;
25 outputFormat: "full" | "compact" | "ai-optimized";
26}
27
28interface RepoData {
29 repoUrl: string;
30 name: string;
31 fullName: string;
32 description: string | null;
33 stars: number;
34 forks: number;
35 watchers: number;
36 openIssuesCount: number;
37 language: string | null;
38 license: string | null;
39 topics: string[];
40 createdAt: string;
41 updatedAt: string;
42 pushedAt: string;
43 defaultBranch: string;
44 isArchived: boolean;
45 isFork: boolean;
46 size: number;
47 homepage: string | null;
48 readme?: string;
49 fileTree?: FileNode[];
50 dependencies?: DependencyInfo;
51 issues?: IssueData[];
52 contributors?: ContributorData[];
53 languages?: Record<string, number>;
54 releases?: ReleaseData[];
55 aiSummary?: string;
56}
57
58interface FileNode {
59 path: string;
60 type: "file" | "dir";
61 size?: number;
62 children?: FileNode[];
63}
64
65interface DependencyInfo {
66 packageManager: string;
67 dependencies: Record<string, string>;
68 devDependencies?: Record<string, string>;
69 engines?: Record<string, string>;
70}
71
72interface IssueData {
73 number: number;
74 title: string;
75 state: string;
76 labels: string[];
77 createdAt: string;
78 updatedAt: string;
79 commentsCount: number;
80 body?: string;
81}
82
83interface ContributorData {
84 login: string;
85 contributions: number;
86 avatarUrl: string;
87 profileUrl: string;
88}
89
90interface ReleaseData {
91 tagName: string;
92 name: string | null;
93 publishedAt: string;
94 isPrerelease: boolean;
95 body?: string;
96}
97
98
99
100const GITHUB_API = "https://api.github.com";
101
102async function githubFetch(
103 path: string,
104 token?: string,
105): Promise<any> {
106 const headers: Record<string, string> = {
107 Accept: "application/vnd.github.v3+json",
108 "User-Agent": "apify-github-repo-analyzer/1.0",
109 };
110 if (token) {
111 headers.Authorization = `Bearer ${token}`;
112 }
113
114 const res = await fetch(`${GITHUB_API}${path}`, { headers });
115
116 if (res.status === 403 && res.headers.get("x-ratelimit-remaining") === "0") {
117 const resetAt = Number(res.headers.get("x-ratelimit-reset")) * 1000;
118 const waitMs = Math.max(resetAt - Date.now(), 1000);
119 log.warning(`Rate limited. Waiting ${Math.ceil(waitMs / 1000)}s...`);
120 await new Promise((r) => setTimeout(r, waitMs));
121 return githubFetch(path, token);
122 }
123
124 if (res.status === 404) {
125 throw new Error(`Not found: ${path}`);
126 }
127
128 if (!res.ok) {
129 const body = await res.text();
130 throw new Error(`GitHub API ${res.status}: ${body.slice(0, 200)}`);
131 }
132
133 return res.json();
134}
135
136function parseRepoUrl(url: string): { owner: string; repo: string } {
137 const match = url.match(
138 /github\.com\/([^/]+)\/([^/\s?#]+)/,
139 );
140 if (!match) throw new Error(`Invalid GitHub URL: ${url}`);
141 return { owner: match[1], repo: match[2].replace(/\.git$/, "") };
142}
143
144
145
146async function fetchRepoInfo(
147 owner: string,
148 repo: string,
149 token?: string,
150): Promise<any> {
151 return githubFetch(`/repos/${owner}/${repo}`, token);
152}
153
154async function fetchReadme(
155 owner: string,
156 repo: string,
157 token?: string,
158): Promise<string | null> {
159 try {
160 const data = await githubFetch(
161 `/repos/${owner}/${repo}/readme`,
162 token,
163 );
164
165 if (data.content && data.encoding === "base64") {
166 return Buffer.from(data.content, "base64").toString("utf-8");
167 }
168
169 if (data.download_url) {
170 const res = await fetch(data.download_url);
171 return res.text();
172 }
173 return null;
174 } catch {
175 return null;
176 }
177}
178
179async function fetchFileTree(
180 owner: string,
181 repo: string,
182 branch: string,
183 maxDepth: number,
184 token?: string,
185): Promise<FileNode[]> {
186 try {
187 const data = await githubFetch(
188 `/repos/${owner}/${repo}/git/trees/${branch}?recursive=1`,
189 token,
190 );
191
192 if (!data.tree) return [];
193
194
195 const filtered = data.tree.filter((item: any) => {
196 const depth = item.path.split("/").length;
197 return depth <= maxDepth;
198 });
199
200
201 return filtered.map((item: any) => ({
202 path: item.path,
203 type: item.type === "tree" ? "dir" : "file",
204 size: item.size || undefined,
205 }));
206 } catch {
207 return [];
208 }
209}
210
211async function fetchDependencies(
212 owner: string,
213 repo: string,
214 token?: string,
215): Promise<DependencyInfo | null> {
216
217 try {
218 const data = await githubFetch(
219 `/repos/${owner}/${repo}/contents/package.json`,
220 token,
221 );
222 if (data.content) {
223 const content = JSON.parse(
224 Buffer.from(data.content, "base64").toString("utf-8"),
225 );
226 return {
227 packageManager: "npm",
228 dependencies: content.dependencies || {},
229 devDependencies: content.devDependencies,
230 engines: content.engines,
231 };
232 }
233 } catch {
234
235 }
236
237
238 try {
239 const data = await githubFetch(
240 `/repos/${owner}/${repo}/contents/requirements.txt`,
241 token,
242 );
243 if (data.content) {
244 const content = Buffer.from(data.content, "base64").toString("utf-8");
245 const deps: Record<string, string> = {};
246 for (const line of content.split("\n")) {
247 const trimmed = line.trim();
248 if (trimmed && !trimmed.startsWith("#")) {
249 const [pkg, ver] = trimmed.split(/[>=<~!]+/, 2);
250 deps[pkg.trim()] = ver?.trim() || "*";
251 }
252 }
253 return {
254 packageManager: "pip",
255 dependencies: deps,
256 };
257 }
258 } catch {
259
260 }
261
262
263 try {
264 const data = await githubFetch(
265 `/repos/${owner}/${repo}/contents/go.mod`,
266 token,
267 );
268 if (data.content) {
269 const content = Buffer.from(data.content, "base64").toString("utf-8");
270 const deps: Record<string, string> = {};
271 const requireBlock = content.match(/require\s*\(([\s\S]*?)\)/);
272 if (requireBlock) {
273 for (const line of requireBlock[1].split("\n")) {
274 const match = line.trim().match(/^(\S+)\s+(\S+)/);
275 if (match) deps[match[1]] = match[2];
276 }
277 }
278 return {
279 packageManager: "go",
280 dependencies: deps,
281 };
282 }
283 } catch {
284
285 }
286
287 return null;
288}
289
290async function fetchIssues(
291 owner: string,
292 repo: string,
293 limit: number,
294 token?: string,
295): Promise<IssueData[]> {
296 try {
297 const data = await githubFetch(
298 `/repos/${owner}/${repo}/issues?state=open&per_page=${limit}&sort=updated`,
299 token,
300 );
301 return data
302 .filter((item: any) => !item.pull_request)
303 .map((issue: any) => ({
304 number: issue.number,
305 title: issue.title,
306 state: issue.state,
307 labels: issue.labels.map((l: any) => l.name),
308 createdAt: issue.created_at,
309 updatedAt: issue.updated_at,
310 commentsCount: issue.comments,
311 body: issue.body?.slice(0, 500),
312 }));
313 } catch {
314 return [];
315 }
316}
317
318async function fetchContributors(
319 owner: string,
320 repo: string,
321 token?: string,
322): Promise<ContributorData[]> {
323 try {
324 const data = await githubFetch(
325 `/repos/${owner}/${repo}/contributors?per_page=20`,
326 token,
327 );
328 return data.map((c: any) => ({
329 login: c.login,
330 contributions: c.contributions,
331 avatarUrl: c.avatar_url,
332 profileUrl: c.html_url,
333 }));
334 } catch {
335 return [];
336 }
337}
338
339async function fetchLanguages(
340 owner: string,
341 repo: string,
342 token?: string,
343): Promise<Record<string, number>> {
344 try {
345 return await githubFetch(
346 `/repos/${owner}/${repo}/languages`,
347 token,
348 );
349 } catch {
350 return {};
351 }
352}
353
354async function fetchReleases(
355 owner: string,
356 repo: string,
357 limit: number,
358 token?: string,
359): Promise<ReleaseData[]> {
360 try {
361 const data = await githubFetch(
362 `/repos/${owner}/${repo}/releases?per_page=${limit}`,
363 token,
364 );
365 return data.map((r: any) => ({
366 tagName: r.tag_name,
367 name: r.name,
368 publishedAt: r.published_at,
369 isPrerelease: r.prerelease,
370 body: r.body?.slice(0, 300),
371 }));
372 } catch {
373 return [];
374 }
375}
376
377
378
379function generateAiSummary(data: RepoData): string {
380 const parts: string[] = [];
381
382 parts.push(`# ${data.fullName}`);
383 if (data.description) parts.push(`\n${data.description}`);
384 parts.push(
385 `\n## Stats: ${data.stars}⭐ ${data.forks}🍴 ${data.language || "N/A"} | License: ${data.license || "N/A"}`,
386 );
387
388 if (data.topics.length > 0) {
389 parts.push(`Topics: ${data.topics.join(", ")}`);
390 }
391
392 if (data.dependencies) {
393 const depCount = Object.keys(data.dependencies.dependencies).length;
394 parts.push(
395 `\n## Dependencies (${data.dependencies.packageManager}): ${depCount} packages`,
396 );
397 const topDeps = Object.entries(data.dependencies.dependencies).slice(0, 10);
398 for (const [name, version] of topDeps) {
399 parts.push(`- ${name}: ${version}`);
400 }
401 }
402
403 if (data.fileTree && data.fileTree.length > 0) {
404 parts.push(`\n## File Structure (${data.fileTree.length} items)`);
405 const dirs = data.fileTree.filter((f) => f.type === "dir").slice(0, 20);
406 for (const d of dirs) {
407 parts.push(`📁 ${d.path}/`);
408 }
409 const keyFiles = data.fileTree
410 .filter(
411 (f) =>
412 f.type === "file" &&
413 /\.(ts|js|py|go|rs|java|md|toml|yaml|yml|json)$/i.test(f.path) &&
414 !f.path.includes("node_modules") &&
415 f.path.split("/").length <= 2,
416 )
417 .slice(0, 15);
418 for (const f of keyFiles) {
419 parts.push(`📄 ${f.path}`);
420 }
421 }
422
423 if (data.issues && data.issues.length > 0) {
424 parts.push(`\n## Open Issues (${data.issues.length} shown)`);
425 for (const issue of data.issues.slice(0, 5)) {
426 parts.push(
427 `- #${issue.number}: ${issue.title} [${issue.labels.join(", ")}]`,
428 );
429 }
430 }
431
432 return parts.join("\n");
433}
434
435function toCompact(data: RepoData): Partial<RepoData> {
436 return {
437 repoUrl: data.repoUrl,
438 name: data.name,
439 fullName: data.fullName,
440 description: data.description,
441 stars: data.stars,
442 forks: data.forks,
443 language: data.language,
444 license: data.license,
445 topics: data.topics,
446 updatedAt: data.updatedAt,
447 defaultBranch: data.defaultBranch,
448 dependencies: data.dependencies
449 ? {
450 packageManager: data.dependencies.packageManager,
451 dependencies: data.dependencies.dependencies,
452 }
453 : undefined,
454 languages: data.languages,
455 };
456}
457
458
459
460await Actor.init();
461
462const input = (await Actor.getInput<Input>()) ?? ({} as Input);
463
464const {
465 repoUrls = [],
466 includeReadme = true,
467 includeFileTree = true,
468 fileTreeDepth = 3,
469 includeDependencies = true,
470 includeIssues = false,
471 issueLimit = 20,
472 includeContributors = false,
473 includeLanguages = true,
474 includeReleases = false,
475 releaseLimit = 5,
476 githubToken,
477 outputFormat = "ai-optimized",
478} = input;
479
480if (repoUrls.length === 0) {
481 throw new Error("repoUrls is required and must contain at least one URL");
482}
483
484log.info(`Analyzing ${repoUrls.length} repositories...`);
485
486for (const url of repoUrls) {
487 try {
488 const { owner, repo } = parseRepoUrl(url);
489 log.info(`Processing: ${owner}/${repo}`);
490
491
492 const info = await fetchRepoInfo(owner, repo, githubToken);
493
494 const repoData: RepoData = {
495 repoUrl: url,
496 name: info.name,
497 fullName: info.full_name,
498 description: info.description,
499 stars: info.stargazers_count,
500 forks: info.forks_count,
501 watchers: info.subscribers_count,
502 openIssuesCount: info.open_issues_count,
503 language: info.language,
504 license: info.license?.spdx_id || null,
505 topics: info.topics || [],
506 createdAt: info.created_at,
507 updatedAt: info.updated_at,
508 pushedAt: info.pushed_at,
509 defaultBranch: info.default_branch,
510 isArchived: info.archived,
511 isFork: info.fork,
512 size: info.size,
513 homepage: info.homepage,
514 };
515
516
517 const tasks: Promise<void>[] = [];
518
519 if (includeReadme) {
520 tasks.push(
521 fetchReadme(owner, repo, githubToken).then((readme) => {
522 repoData.readme = readme || undefined;
523 }),
524 );
525 }
526
527 if (includeFileTree) {
528 tasks.push(
529 fetchFileTree(
530 owner,
531 repo,
532 info.default_branch,
533 fileTreeDepth,
534 githubToken,
535 ).then((tree) => {
536 repoData.fileTree = tree;
537 }),
538 );
539 }
540
541 if (includeDependencies) {
542 tasks.push(
543 fetchDependencies(owner, repo, githubToken).then((deps) => {
544 repoData.dependencies = deps || undefined;
545 }),
546 );
547 }
548
549 if (includeIssues) {
550 tasks.push(
551 fetchIssues(owner, repo, issueLimit, githubToken).then((issues) => {
552 repoData.issues = issues;
553 }),
554 );
555 }
556
557 if (includeContributors) {
558 tasks.push(
559 fetchContributors(owner, repo, githubToken).then((contributors) => {
560 repoData.contributors = contributors;
561 }),
562 );
563 }
564
565 if (includeLanguages) {
566 tasks.push(
567 fetchLanguages(owner, repo, githubToken).then((langs) => {
568 repoData.languages = langs;
569 }),
570 );
571 }
572
573 if (includeReleases) {
574 tasks.push(
575 fetchReleases(owner, repo, releaseLimit, githubToken).then(
576 (releases) => {
577 repoData.releases = releases;
578 },
579 ),
580 );
581 }
582
583 await Promise.all(tasks);
584
585
586 if (outputFormat === "ai-optimized") {
587 repoData.aiSummary = generateAiSummary(repoData);
588 }
589
590
591 const output =
592 outputFormat === "compact" ? toCompact(repoData) : repoData;
593 await Actor.pushData(output);
594
595 log.info(
596 `✅ ${owner}/${repo}: ${repoData.stars}⭐, ${repoData.forks}🍴, ${repoData.language}`,
597 );
598 } catch (err) {
599 log.error(`❌ Failed to analyze ${url}: ${err}`);
600 await Actor.pushData({
601 repoUrl: url,
602 error: err instanceof Error ? err.message : String(err),
603 });
604 }
605}
606
607log.info("Analysis complete.");
608await Actor.exit();