
EventBrite
Deprecated
Pricing
Pay per usage
Go to Store

EventBrite
Deprecated
0.0 (0)
Pricing
Pay per usage
0
Total users
3
Monthly users
2
Last modified
3 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-chrome:v0.21.10
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "bluebird": "latest", "apify": "0.21.10", "mongodb": "latest", "lodash": "latest" }, "scripts": { "start": "node main.js" }}
main.js
1const Promise = require('bluebird');2const Apify = require('apify');3const querystring = require('querystring');4const { MongoClient } = require('mongodb');5const _ = require('lodash');6
7const importObjectToCollection = async (collection, object) => {8 try {9 const existingObject = await collection.findOne(_.pick(object, 'event.id'));10 if (existingObject) {11 await collection.updateOne({ _id: existingObject._id }, object);12 } else {13 await collection.insert(object);14 }15 } catch (err) {16 console.log(`Cannot import object ${JSON.stringify(object)}: ${err.message}`);17 }18};19
20Apify.main(async () => {21 22 const input = await Apify.getValue('INPUT');23 const mongodbConnection = await MongoClient.connect('mongodb+srv://admin:FvFnYrzeFrq0AxDh@production-kwhlb.mongodb.net/eventbrite', { useNewUrlParser: true });24 const collection = await mongodbConnection.db("eventbrite").collection('events');25
26 // Get queue and enqueue first url.27 const categoryQueue = await Apify.openRequestQueue('EBcategories');28 const eventQueue = await Apify.openRequestQueue('EBevents');29
30 // Enqueue Start url.31 let rootUrl = 'https://www.eventbrite.com/d';32 33 const categories = [34 'business--events',35 // 'arts--events',36 // 'hobbies--events',37 // 'music--events',38 // 'health--events',39 // 'spirituality--events',40 // 'family-and-education--events',41 // 'other--events',42 // 'film-and-media--events',43 // 'travel-and-outdoor--events',44 // 'food-and-drink--events',45 // 'science-and-tech--events',46 // 'fashion--events',47 // 'home-and-lifestyle--events',48 // 'sports-and-fitness--events',49 // 'charity-and-causes--events',50 // 'community--events',51 // 'government--events'52 ];53 54 const cities = input.cities;55 cities.forEach(async city => {56 categories.forEach(async category => categoryQueue.addRequest(new Apify.Request({ url: `${rootUrl}/${city}/${category}/?crt=regular&page=1&sort=best` })))57 })58
59 // Create crawler.60 const categoryCrawler = new Apify.PuppeteerCrawler({61 requestQueue: categoryQueue,62 maxConcurrency: 5,63 maxRequestRetries: 5,64 launchPuppeteerOptions: {65 useChrome: true66 },67
68 // This page is executed for each request.69 // If request failes then it's retried 3 times.70 // Parameter page is Puppeteers page object with loaded page.71 handlePageFunction: async ({ page, request }) => {72 console.log(`Processing ${request.url}...`);73 await page.waitForSelector('script[type="application/ld+json"]', 4000);74 let pageJson = JSON.parse(await page.$eval('script[type="application/ld+json"]', a => a.innerText.trim().replace(/[^\x00-\x7F]/g, "").replace(/[\s]{2,}/g, ' ')));75 try {76 // Only continue to next page if we found at least one event77 if(pageJson.length > 0) {78
79 eventsObject = []80
81 pageJson.forEach(eventObject => {82 let obj = {url: null, eid: null}83 obj.url = eventObject.url84 switch (obj.url.substr(obj.url.length - 3)) {85 case 'es2':86 rgx = /(\d+)\?aff/.exec(eventObject.url)87 break;88 default:89 rgx = /(\d{8,})/.exec(eventObject.url)90 }91 //console.log('RGX: ' + rgx);92 if (rgx && rgx[1]) {93 obj.eid = rgx[1]94 }95 else {96 error = "Could Not get eid of URL: " + eventObject.url97 console.log('---- ------ ----- ---- ' + error);98 }99 eventsObject.push(obj)100 });101 eventsObject.forEach(event => {102 console.log(event.url);103 });104
105 await Promise.map(eventsObject, eventObject => eventQueue.addRequest(new Apify.Request({ url: eventObject.url, userData: eventObject })));106
107 console.log('--- ---- PROMISE MAPPED')108
109 // Enqueue next page.110 let nextUrl = request.url.replace(/page=(\d+)/, ((match, pageNumber) => {111 return 'page=' + (parseInt(pageNumber) + 1);112 }));113 console.log('ADDING NEXT URL TO QUEUE')114 await categoryQueue.addRequest(new Apify.Request({ url: nextUrl }));115 }116 } catch (err) {117 console.log("++++++ ++++++ ERR IS: " + err);118 // console.log(`Url ${request.url} is the last page!`);119
120 }121
122 },123
124 // If request failed 4 times then this function is executed.125 handleFailedRequestFunction: async ({ request }) => {126 console.log(`Request ${request.url} failed 4 times`);127 },128 });129
130 await categoryCrawler.run();131
132 const dataset = await Apify.openDataset('EBevents');133
134 const eventCrawler = new Apify.PuppeteerCrawler({135 requestQueue: eventQueue,136 maxConcurrency: 5,137 maxRequestRetries: 0,138
139 // This page is executed for each request.140 // If request failes then it's retried 3 times.141 // Parameter page is Puppeteers page object with loaded page.142 handlePageFunction: async ({ page, request }) => {143 console.log(`Processing ${request.url}...`);144
145
146 let event = {147 "owner":{148 "name": null,149 "description": null,150 "url": null,151 },152 "place":{153 "name": null,154 "location": {155 "street": null,156 "street2": null,157 "city": null,158 "country": null,159 "region": null,160 "postcode": null,161 "latitude": null,162 "longitude": null,163 }164 },165 "event": {166 "id": null,167 "name": null,168 "url": null,169 "description": null,170 "dates": [{171 "start": null,172 "end": null173 }],174 "images": [{175 "source": null,176 "width": null,177 "height": null178 }],179 "tickets": {180 "url": null,181 },182 "timezone": null,183 "updated_time": null,184 "tags": null,185 "suggestions": null,186 }187 };188
189 // Map event attributes to HTML selectors190 event.event.url = request.url;191 192 // Place193 try {194 const jsonEventData = JSON.parse(await page.$eval('script[type="text/javascript"] ~ script[type="application/ld+json"]', a => a.innerText.trim().replace(/[^\x00-\x7F]/g, "").replace(/[\s]{2,}/g, ' ') ));195 // Try to get suggested events196 try {197 event.event.suggestions = await page.$$eval('section.listing-related-events a.poster-card__main', card => card.map(ele => ele.getAttribute('data-eid')));198 }catch(error) {199 console.log('Suggestions Error: '+ error);200 }201 // If no suggestions, try selector for irregulary formatted event page202 await page.waitForSelector('div.related-events a.poster-card__main', 10000);203 if (event.event.suggestions[0] == null) {204 try{205 event.event.suggestions = await page.$$eval('div.related-events a.poster-card__main', card => card.map(ele => ele.getAttribute('data-eid')));206 }catch(error) {207 console.log('Suggestions Error: '+ error);208 }209 }210 console.log('++++++++++++++++++++++++++++' + event.event.suggestions)211 event.event.id = request.userData.eid;212 event.event.name = jsonEventData.name;213 event.event.description = jsonEventData.description;214 event.event.images[0].source = jsonEventData.image;215 event.event.dates[0].start = jsonEventData.startDate;216 event.event.dates[0].end = jsonEventData.endDate;217
218 event.event.tickets.url = jsonEventData.offers[0].url;219 event.event.tickets.lowPrice = jsonEventData.offers[0].lowPrice;220 event.event.tickets.highPrice = jsonEventData.offers[0].highPrice;221 event.event.tickets.inventory = jsonEventData.offers[0].inventoryLevel;222 event.event.tickets.startDate = jsonEventData.offers[0].availabilityStarts;223 event.event.tickets.endDate = jsonEventData.offers[0].availabilityEnds;224 event.event.tickets.currency = jsonEventData.offers[0].priceCurrency;225 226 event.owner.url = jsonEventData.organizer.url;227 event.owner.name = jsonEventData.organizer.name;228 event.owner.description = jsonEventData.organizer.description;229
230 event.place.name = jsonEventData.location.name;231 event.place.location.street = jsonEventData.location.address.streetAddress;232 event.place.location.city = jsonEventData.location.address.addressLocality;233 event.place.location.region = jsonEventData.location.address.addressRegion;234 event.place.location.postcode = jsonEventData.location.address.postalCode;235 event.place.location.country = jsonEventData.location.address.addressCountry;236
237 // Try to get coordinates from view map link238 try {239 const viewMapLink = querystring.parse(await page.$eval('a.js-view-map-link', a => a.href));240 if(viewMapLink.sll) {241 event.place.location.latitude = viewMapLink.sll.split(',')[0];242 event.place.location.longitude = viewMapLink.sll.split(',')[1];243 }244 } catch(e) {}245
246 }247 catch(error){248 console.log('ERROR IS : ' + error);249 }250
251 // Save data.252 await importObjectToCollection(collection, event);253
254 },255
256 // If request failed 4 times then this function is executed.257 handleFailedRequestFunction: async ({ request }) => {258 console.log(`Request ${request.url} failed 4 times`);259 },260 });261
262 await eventCrawler.run();263
264});