1import { Actor } from 'apify';
2
3await Actor.init();
4
5const input = await Actor.getInput() ?? {};
6
7const {
8 handles = [],
9 searchQueries = [],
10 maxPostsPerSource = 100,
11 includeLikes = false,
12 includeReposts = false,
13 blueskyHandle = '',
14 blueskyPassword = '',
15} = input;
16
17
18const BLUESKY_API = 'https://public.api.bsky.app';
19const BLUESKY_AUTH_API = 'https://bsky.social';
20let authToken = null;
21
22async function fetchJson(url, useAuth = false) {
23 const headers = {};
24 if (useAuth && authToken) {
25 headers['Authorization'] = `Bearer ${authToken}`;
26 }
27 const response = await fetch(url, { headers });
28 if (!response.ok) throw new Error(`HTTP ${response.status}: ${url}`);
29 return response.json();
30}
31
32async function authenticate() {
33 if (!blueskyHandle || !blueskyPassword) return false;
34 try {
35 const response = await fetch(`${BLUESKY_AUTH_API}/xrpc/com.atproto.server.createSession`, {
36 method: 'POST',
37 headers: { 'Content-Type': 'application/json' },
38 body: JSON.stringify({
39 identifier: blueskyHandle,
40 password: blueskyPassword,
41 }),
42 });
43 if (!response.ok) {
44 console.error(`Authentication failed: ${response.status}`);
45 return false;
46 }
47 const data = await response.json();
48 authToken = data.accessJwt;
49 console.log(`Authenticated as ${data.handle}`);
50 return true;
51 } catch (e) {
52 console.error(`Authentication error: ${e.message}`);
53 return false;
54 }
55}
56
57async function resolveHandle(handle) {
58 const cleanHandle = handle.replace('@', '').replace('bsky.app/', '');
59 const data = await fetchJson(`${BLUESKY_API}/xrpc/com.atproto.identity.resolveHandle?handle=${cleanHandle}`);
60 return data.did;
61}
62
63async function getProfile(did) {
64 const data = await fetchJson(`${BLUESKY_API}/xrpc/app.bsky.actor.getProfile?actor=${did}`);
65 return {
66 did: data.did,
67 handle: data.handle,
68 displayName: data.displayName,
69 description: data.description,
70 avatar: data.avatar,
71 banner: data.banner,
72 followersCount: data.followersCount,
73 followsCount: data.followsCount,
74 postsCount: data.postsCount,
75 createdAt: data.createdAt,
76 };
77}
78
79async function getAuthorFeed(did, limit = 50, cursor = null) {
80 let url = `${BLUESKY_API}/xrpc/app.bsky.feed.getAuthorFeed?actor=${did}&limit=${Math.min(limit, 100)}`;
81 if (cursor) url += `&cursor=${cursor}`;
82 return fetchJson(url);
83}
84
85async function searchPosts(query, limit = 50, cursor = null) {
86
87 const baseUrl = authToken ? BLUESKY_AUTH_API : BLUESKY_API;
88 let url = `${baseUrl}/xrpc/app.bsky.feed.searchPosts?q=${encodeURIComponent(query)}&limit=${Math.min(limit, 100)}`;
89 if (cursor) url += `&cursor=${cursor}`;
90 return fetchJson(url, true);
91}
92
93function extractPostData(feedItem) {
94 const post = feedItem.post || feedItem;
95 const record = post.record || {};
96
97 return {
98 uri: post.uri,
99 cid: post.cid,
100 author: {
101 did: post.author?.did,
102 handle: post.author?.handle,
103 displayName: post.author?.displayName,
104 },
105 text: record.text || '',
106 createdAt: record.createdAt,
107 likeCount: post.likeCount || 0,
108 repostCount: post.repostCount || 0,
109 replyCount: post.replyCount || 0,
110 quoteCount: post.quoteCount || 0,
111 hasImages: record.embed?.$type === 'app.bsky.embed.images',
112 hasVideo: record.embed?.$type === 'app.bsky.embed.video',
113 hasLink: record.embed?.$type === 'app.bsky.embed.external',
114 externalLink: record.embed?.external?.uri || null,
115 externalTitle: record.embed?.external?.title || null,
116 labels: post.labels?.map(l => l.val) || [],
117 languages: record.langs || [],
118 hashtags: (record.text || '').match(/#\w+/g) || [],
119 mentions: record.facets?.filter(f => f.features?.some(feat => feat.$type === 'app.bsky.richtext.facet#mention')).map(f => f.features[0]?.did) || [],
120 isReply: !!record.reply,
121 parentUri: record.reply?.parent?.uri || null,
122 indexedAt: post.indexedAt,
123 };
124}
125
126try {
127
128 if (blueskyHandle && blueskyPassword) {
129 await authenticate();
130 }
131
132
133 for (const handle of handles) {
134 console.log(`\nProcessing handle: ${handle}`);
135 try {
136 const did = await resolveHandle(handle);
137 const profile = await getProfile(did);
138
139 await Actor.pushData({
140 _type: 'PROFILE',
141 ...profile,
142 scrapedAt: new Date().toISOString(),
143 });
144
145 let collected = 0;
146 let cursor = null;
147
148 while (collected < maxPostsPerSource) {
149 const remaining = maxPostsPerSource - collected;
150 const feed = await getAuthorFeed(did, remaining, cursor);
151 const items = feed.feed || [];
152
153 if (items.length === 0) break;
154
155 for (const item of items) {
156 const postData = extractPostData(item);
157 postData._type = 'POST';
158 postData.source = `profile:${handle}`;
159 postData.scrapedAt = new Date().toISOString();
160 await Actor.pushData(postData);
161 collected++;
162 }
163
164 cursor = feed.cursor;
165 if (!cursor) break;
166 }
167
168 console.log(`Collected ${collected} posts from ${handle}`);
169 } catch (e) {
170 console.error(`Error processing ${handle}: ${e.message}`);
171 }
172 }
173
174
175 if (searchQueries.length > 0 && !authToken) {
176 console.log('\nNote: Search requires Bluesky credentials. Provide blueskyHandle and blueskyPassword in input to enable search.');
177 console.log('Profile scraping (handles) works without credentials.');
178 }
179
180 for (const query of searchQueries) {
181 if (!authToken) {
182 console.log(`Skipping search "${query}" — authentication required.`);
183 continue;
184 }
185 console.log(`\nSearching: "${query}"`);
186 try {
187 let collected = 0;
188 let cursor = null;
189
190 while (collected < maxPostsPerSource) {
191 const remaining = maxPostsPerSource - collected;
192 const results = await searchPosts(query, remaining, cursor);
193 const posts = results.posts || [];
194
195 if (posts.length === 0) break;
196
197 for (const post of posts) {
198 const postData = extractPostData(post);
199 postData._type = 'POST';
200 postData.source = `search:${query}`;
201 postData.scrapedAt = new Date().toISOString();
202 await Actor.pushData(postData);
203 collected++;
204 }
205
206 cursor = results.cursor;
207 if (!cursor) break;
208 }
209
210 console.log(`Collected ${collected} posts for "${query}"`);
211 } catch (e) {
212 console.error(`Error searching "${query}": ${e.message}`);
213 }
214 }
215} catch (e) {
216 console.error(`Fatal error: ${e.message}`);
217}
218
219await Actor.exit();