EventBrite
DeprecatedView all Actors
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsEventBrite
sashadickey/eventbrite
Dockerfile
1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
1{
2 "name": "apify-project",
3 "version": "0.0.1",
4 "description": "",
5 "author": "It's not you it's me",
6 "license": "ISC",
7 "dependencies": {
8 "bluebird": "latest",
9 "apify": "0.21.10",
10 "mongodb": "latest",
11 "lodash": "latest"
12 },
13 "scripts": {
14 "start": "node main.js"
15 }
16}
main.js
1const Promise = require('bluebird');
2const Apify = require('apify');
3const querystring = require('querystring');
4const { MongoClient } = require('mongodb');
5const _ = require('lodash');
6
7const importObjectToCollection = async (collection, object) => {
8 try {
9 const existingObject = await collection.findOne(_.pick(object, 'event.id'));
10 if (existingObject) {
11 await collection.updateOne({ _id: existingObject._id }, object);
12 } else {
13 await collection.insert(object);
14 }
15 } catch (err) {
16 console.log(`Cannot import object ${JSON.stringify(object)}: ${err.message}`);
17 }
18};
19
20Apify.main(async () => {
21
22 const input = await Apify.getValue('INPUT');
23 const mongodbConnection = await MongoClient.connect('mongodb+srv://admin:FvFnYrzeFrq0AxDh@production-kwhlb.mongodb.net/eventbrite', { useNewUrlParser: true });
24 const collection = await mongodbConnection.db("eventbrite").collection('events');
25
26 // Get queue and enqueue first url.
27 const categoryQueue = await Apify.openRequestQueue('EBcategories');
28 const eventQueue = await Apify.openRequestQueue('EBevents');
29
30 // Enqueue Start url.
31 let rootUrl = 'https://www.eventbrite.com/d';
32
33 const categories = [
34 'business--events',
35 // 'arts--events',
36 // 'hobbies--events',
37 // 'music--events',
38 // 'health--events',
39 // 'spirituality--events',
40 // 'family-and-education--events',
41 // 'other--events',
42 // 'film-and-media--events',
43 // 'travel-and-outdoor--events',
44 // 'food-and-drink--events',
45 // 'science-and-tech--events',
46 // 'fashion--events',
47 // 'home-and-lifestyle--events',
48 // 'sports-and-fitness--events',
49 // 'charity-and-causes--events',
50 // 'community--events',
51 // 'government--events'
52 ];
53
54 const cities = input.cities;
55 cities.forEach(async city => {
56 categories.forEach(async category => categoryQueue.addRequest(new Apify.Request({ url: `${rootUrl}/${city}/${category}/?crt=regular&page=1&sort=best` })))
57 })
58
59 // Create crawler.
60 const categoryCrawler = new Apify.PuppeteerCrawler({
61 requestQueue: categoryQueue,
62 maxConcurrency: 5,
63 maxRequestRetries: 5,
64 launchPuppeteerOptions: {
65 useChrome: true
66 },
67
68 // This page is executed for each request.
69 // If request failes then it's retried 3 times.
70 // Parameter page is Puppeteers page object with loaded page.
71 handlePageFunction: async ({ page, request }) => {
72 console.log(`Processing ${request.url}...`);
73 await page.waitForSelector('script[type="application/ld+json"]', 4000);
74 let pageJson = JSON.parse(await page.$eval('script[type="application/ld+json"]', a => a.innerText.trim().replace(/[^\x00-\x7F]/g, "").replace(/[\s]{2,}/g, ' ')));
75 try {
76 // Only continue to next page if we found at least one event
77 if(pageJson.length > 0) {
78
79 eventsObject = []
80
81 pageJson.forEach(eventObject => {
82 let obj = {url: null, eid: null}
83 obj.url = eventObject.url
84 switch (obj.url.substr(obj.url.length - 3)) {
85 case 'es2':
86 rgx = /(\d+)\?aff/.exec(eventObject.url)
87 break;
88 default:
89 rgx = /(\d{8,})/.exec(eventObject.url)
90 }
91 //console.log('RGX: ' + rgx);
92 if (rgx && rgx[1]) {
93 obj.eid = rgx[1]
94 }
95 else {
96 error = "Could Not get eid of URL: " + eventObject.url
97 console.log('---- ------ ----- ---- ' + error);
98 }
99 eventsObject.push(obj)
100 });
101 eventsObject.forEach(event => {
102 console.log(event.url);
103 });
104
105 await Promise.map(eventsObject, eventObject => eventQueue.addRequest(new Apify.Request({ url: eventObject.url, userData: eventObject })));
106
107 console.log('--- ---- PROMISE MAPPED')
108
109 // Enqueue next page.
110 let nextUrl = request.url.replace(/page=(\d+)/, ((match, pageNumber) => {
111 return 'page=' + (parseInt(pageNumber) + 1);
112 }));
113 console.log('ADDING NEXT URL TO QUEUE')
114 await categoryQueue.addRequest(new Apify.Request({ url: nextUrl }));
115 }
116 } catch (err) {
117 console.log("++++++ ++++++ ERR IS: " + err);
118 // console.log(`Url ${request.url} is the last page!`);
119
120 }
121
122 },
123
124 // If request failed 4 times then this function is executed.
125 handleFailedRequestFunction: async ({ request }) => {
126 console.log(`Request ${request.url} failed 4 times`);
127 },
128 });
129
130 await categoryCrawler.run();
131
132 const dataset = await Apify.openDataset('EBevents');
133
134 const eventCrawler = new Apify.PuppeteerCrawler({
135 requestQueue: eventQueue,
136 maxConcurrency: 5,
137 maxRequestRetries: 0,
138
139 // This page is executed for each request.
140 // If request failes then it's retried 3 times.
141 // Parameter page is Puppeteers page object with loaded page.
142 handlePageFunction: async ({ page, request }) => {
143 console.log(`Processing ${request.url}...`);
144
145
146 let event = {
147 "owner":{
148 "name": null,
149 "description": null,
150 "url": null,
151 },
152 "place":{
153 "name": null,
154 "location": {
155 "street": null,
156 "street2": null,
157 "city": null,
158 "country": null,
159 "region": null,
160 "postcode": null,
161 "latitude": null,
162 "longitude": null,
163 }
164 },
165 "event": {
166 "id": null,
167 "name": null,
168 "url": null,
169 "description": null,
170 "dates": [{
171 "start": null,
172 "end": null
173 }],
174 "images": [{
175 "source": null,
176 "width": null,
177 "height": null
178 }],
179 "tickets": {
180 "url": null,
181 },
182 "timezone": null,
183 "updated_time": null,
184 "tags": null,
185 "suggestions": null,
186 }
187 };
188
189 // Map event attributes to HTML selectors
190 event.event.url = request.url;
191
192 // Place
193 try {
194 const jsonEventData = JSON.parse(await page.$eval('script[type="text/javascript"] ~ script[type="application/ld+json"]', a => a.innerText.trim().replace(/[^\x00-\x7F]/g, "").replace(/[\s]{2,}/g, ' ') ));
195 // Try to get suggested events
196 try {
197 event.event.suggestions = await page.$$eval('section.listing-related-events a.poster-card__main', card => card.map(ele => ele.getAttribute('data-eid')));
198 }catch(error) {
199 console.log('Suggestions Error: '+ error);
200 }
201 // If no suggestions, try selector for irregulary formatted event page
202 await page.waitForSelector('div.related-events a.poster-card__main', 10000);
203 if (event.event.suggestions[0] == null) {
204 try{
205 event.event.suggestions = await page.$$eval('div.related-events a.poster-card__main', card => card.map(ele => ele.getAttribute('data-eid')));
206 }catch(error) {
207 console.log('Suggestions Error: '+ error);
208 }
209 }
210 console.log('++++++++++++++++++++++++++++' + event.event.suggestions)
211 event.event.id = request.userData.eid;
212 event.event.name = jsonEventData.name;
213 event.event.description = jsonEventData.description;
214 event.event.images[0].source = jsonEventData.image;
215 event.event.dates[0].start = jsonEventData.startDate;
216 event.event.dates[0].end = jsonEventData.endDate;
217
218 event.event.tickets.url = jsonEventData.offers[0].url;
219 event.event.tickets.lowPrice = jsonEventData.offers[0].lowPrice;
220 event.event.tickets.highPrice = jsonEventData.offers[0].highPrice;
221 event.event.tickets.inventory = jsonEventData.offers[0].inventoryLevel;
222 event.event.tickets.startDate = jsonEventData.offers[0].availabilityStarts;
223 event.event.tickets.endDate = jsonEventData.offers[0].availabilityEnds;
224 event.event.tickets.currency = jsonEventData.offers[0].priceCurrency;
225
226 event.owner.url = jsonEventData.organizer.url;
227 event.owner.name = jsonEventData.organizer.name;
228 event.owner.description = jsonEventData.organizer.description;
229
230 event.place.name = jsonEventData.location.name;
231 event.place.location.street = jsonEventData.location.address.streetAddress;
232 event.place.location.city = jsonEventData.location.address.addressLocality;
233 event.place.location.region = jsonEventData.location.address.addressRegion;
234 event.place.location.postcode = jsonEventData.location.address.postalCode;
235 event.place.location.country = jsonEventData.location.address.addressCountry;
236
237 // Try to get coordinates from view map link
238 try {
239 const viewMapLink = querystring.parse(await page.$eval('a.js-view-map-link', a => a.href));
240 if(viewMapLink.sll) {
241 event.place.location.latitude = viewMapLink.sll.split(',')[0];
242 event.place.location.longitude = viewMapLink.sll.split(',')[1];
243 }
244 } catch(e) {}
245
246 }
247 catch(error){
248 console.log('ERROR IS : ' + error);
249 }
250
251 // Save data.
252 await importObjectToCollection(collection, event);
253
254 },
255
256 // If request failed 4 times then this function is executed.
257 handleFailedRequestFunction: async ({ request }) => {
258 console.log(`Request ${request.url} failed 4 times`);
259 },
260 });
261
262 await eventCrawler.run();
263
264});
Developer
Maintained by Community
Categories