EventBrite avatar
EventBrite
Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
EventBrite

EventBrite

sashadickey/eventbrite

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY --chown=myuser:myuser . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "bluebird": "latest",
9        "apify": "0.21.10",
10        "mongodb": "latest",
11        "lodash": "latest"
12    },
13    "scripts": {
14        "start": "node main.js"
15    }
16}

main.js

1const Promise = require('bluebird');
2const Apify = require('apify');
3const querystring = require('querystring');
4const { MongoClient } = require('mongodb');
5const _ = require('lodash');
6
7const importObjectToCollection = async (collection, object) => {
8    try {
9        const existingObject = await collection.findOne(_.pick(object, 'event.id'));
10        if (existingObject) {
11            await collection.updateOne({ _id: existingObject._id }, object);
12        } else {
13            await collection.insert(object);
14        }
15    } catch (err) {
16        console.log(`Cannot import object ${JSON.stringify(object)}: ${err.message}`);
17    }
18};
19
20Apify.main(async () => {
21    
22    const input = await Apify.getValue('INPUT');
23    const mongodbConnection = await MongoClient.connect('mongodb+srv://admin:FvFnYrzeFrq0AxDh@production-kwhlb.mongodb.net/eventbrite', { useNewUrlParser: true });
24    const collection = await mongodbConnection.db("eventbrite").collection('events');
25
26    // Get queue and enqueue first url.
27    const categoryQueue = await Apify.openRequestQueue('EBcategories');
28    const eventQueue = await Apify.openRequestQueue('EBevents');
29
30    // Enqueue Start url.
31    let rootUrl = 'https://www.eventbrite.com/d';
32    
33    const categories = [
34        'business--events',
35        // 'arts--events',
36        // 'hobbies--events',
37        // 'music--events',
38        // 'health--events',
39        // 'spirituality--events',
40        // 'family-and-education--events',
41        // 'other--events',
42        // 'film-and-media--events',
43        // 'travel-and-outdoor--events',
44        // 'food-and-drink--events',
45        // 'science-and-tech--events',
46        // 'fashion--events',
47        // 'home-and-lifestyle--events',
48        // 'sports-and-fitness--events',
49        // 'charity-and-causes--events',
50        // 'community--events',
51        // 'government--events'
52    ];
53    
54    const cities = input.cities;
55    cities.forEach(async city => {
56        categories.forEach(async category => categoryQueue.addRequest(new Apify.Request({ url: `${rootUrl}/${city}/${category}/?crt=regular&page=1&sort=best` })))
57    })
58
59    // Create crawler.
60    const categoryCrawler = new Apify.PuppeteerCrawler({
61        requestQueue: categoryQueue,
62        maxConcurrency: 5,
63        maxRequestRetries: 5,
64        launchPuppeteerOptions: {
65            useChrome: true
66        },
67
68        // This page is executed for each request.
69        // If request failes then it's retried 3 times.
70        // Parameter page is Puppeteers page object with loaded page.
71        handlePageFunction: async ({ page, request }) => {
72            console.log(`Processing ${request.url}...`);
73            await page.waitForSelector('script[type="application/ld+json"]', 4000);
74            let pageJson = JSON.parse(await page.$eval('script[type="application/ld+json"]', a => a.innerText.trim().replace(/[^\x00-\x7F]/g, "").replace(/[\s]{2,}/g, ' ')));
75            try {
76                // Only continue to next page if we found at least one event
77                if(pageJson.length > 0) {
78
79                    eventsObject = []
80
81                    pageJson.forEach(eventObject => {
82                        let obj = {url: null, eid: null}
83                        obj.url = eventObject.url
84                        switch (obj.url.substr(obj.url.length - 3)) {
85                            case 'es2':
86                                rgx = /(\d+)\?aff/.exec(eventObject.url)
87                                break;
88                            default:
89                                rgx = /(\d{8,})/.exec(eventObject.url)
90                        }
91                        //console.log('RGX:           ' + rgx);
92                        if (rgx && rgx[1]) {
93                            obj.eid = rgx[1]
94                        }
95                        else {
96                            error = "Could Not get eid of URL:  " + eventObject.url
97                            console.log('----      ------             -----           ---- ' + error);
98                        }
99                        eventsObject.push(obj)
100                    });
101                    eventsObject.forEach(event => {
102                        console.log(event.url);
103                    });
104
105                    await Promise.map(eventsObject, eventObject => eventQueue.addRequest(new Apify.Request({ url: eventObject.url, userData: eventObject })));
106
107                    console.log('--- ---- PROMISE MAPPED')
108
109                    // Enqueue next page.
110                    let nextUrl = request.url.replace(/page=(\d+)/, ((match, pageNumber) => {
111                        return 'page=' + (parseInt(pageNumber) + 1);
112                    }));
113                    console.log('ADDING NEXT URL TO QUEUE')
114                    await categoryQueue.addRequest(new Apify.Request({ url: nextUrl }));
115                }
116            } catch (err) {
117                console.log("++++++ ++++++           ERR IS:          " + err);
118                // console.log(`Url ${request.url} is the last page!`);
119
120            }
121
122        },
123
124        // If request failed 4 times then this function is executed.
125        handleFailedRequestFunction: async ({ request }) => {
126            console.log(`Request ${request.url} failed 4 times`);
127        },
128    });
129
130    await categoryCrawler.run();
131
132    const dataset = await Apify.openDataset('EBevents');
133
134    const eventCrawler = new Apify.PuppeteerCrawler({
135        requestQueue: eventQueue,
136        maxConcurrency: 5,
137        maxRequestRetries: 0,
138
139        // This page is executed for each request.
140        // If request failes then it's retried 3 times.
141        // Parameter page is Puppeteers page object with loaded page.
142        handlePageFunction: async ({ page, request }) => {
143            console.log(`Processing ${request.url}...`);
144
145
146            let event = {
147                "owner":{
148                    "name": null,
149                    "description": null,
150                    "url": null,
151                },
152                "place":{
153                    "name": null,
154                    "location": {
155                        "street": null,
156                        "street2": null,
157                        "city": null,
158                        "country": null,
159                        "region": null,
160                        "postcode": null,
161                        "latitude": null,
162                        "longitude": null,
163                    }
164                },
165                "event": {
166                    "id": null,
167                    "name": null,
168                    "url": null,
169                    "description": null,
170                    "dates": [{
171                        "start": null,
172                        "end": null
173                    }],
174                    "images": [{
175                        "source": null,
176                        "width": null,
177                        "height": null
178                    }],
179                    "tickets": {
180                        "url": null,
181                    },
182                    "timezone": null,
183                    "updated_time": null,
184                    "tags": null,
185                    "suggestions": null,
186                }
187            };
188
189            // Map event attributes to HTML selectors
190            event.event.url = request.url;
191            
192            // Place
193            try {
194                const jsonEventData = JSON.parse(await page.$eval('script[type="text/javascript"] ~ script[type="application/ld+json"]', a => a.innerText.trim().replace(/[^\x00-\x7F]/g, "").replace(/[\s]{2,}/g, ' ') ));
195                // Try to get suggested events
196                try {
197                    event.event.suggestions = await page.$$eval('section.listing-related-events a.poster-card__main', card => card.map(ele => ele.getAttribute('data-eid')));
198                }catch(error) {
199                    console.log('Suggestions Error:       '+ error);
200                }
201                // If no suggestions, try selector for irregulary formatted event page
202                await page.waitForSelector('div.related-events a.poster-card__main', 10000);
203                if (event.event.suggestions[0] == null) {
204                    try{
205                        event.event.suggestions = await page.$$eval('div.related-events a.poster-card__main', card => card.map(ele => ele.getAttribute('data-eid')));
206                    }catch(error) {
207                        console.log('Suggestions Error:       '+ error);
208                    }
209                }
210                console.log('++++++++++++++++++++++++++++' + event.event.suggestions)
211                event.event.id = request.userData.eid;
212                event.event.name = jsonEventData.name;
213                event.event.description = jsonEventData.description;
214                event.event.images[0].source = jsonEventData.image;
215                event.event.dates[0].start = jsonEventData.startDate;
216                event.event.dates[0].end = jsonEventData.endDate;
217
218                event.event.tickets.url = jsonEventData.offers[0].url;
219                event.event.tickets.lowPrice = jsonEventData.offers[0].lowPrice;
220                event.event.tickets.highPrice = jsonEventData.offers[0].highPrice;
221                event.event.tickets.inventory = jsonEventData.offers[0].inventoryLevel;
222                event.event.tickets.startDate = jsonEventData.offers[0].availabilityStarts;
223                event.event.tickets.endDate = jsonEventData.offers[0].availabilityEnds;
224                event.event.tickets.currency = jsonEventData.offers[0].priceCurrency;
225              
226                event.owner.url = jsonEventData.organizer.url;
227                event.owner.name = jsonEventData.organizer.name;
228                event.owner.description = jsonEventData.organizer.description;
229
230                event.place.name = jsonEventData.location.name;
231                event.place.location.street = jsonEventData.location.address.streetAddress;
232                event.place.location.city = jsonEventData.location.address.addressLocality;
233                event.place.location.region = jsonEventData.location.address.addressRegion;
234                event.place.location.postcode = jsonEventData.location.address.postalCode;
235                event.place.location.country = jsonEventData.location.address.addressCountry;
236
237                // Try to get coordinates from view map link
238                try {
239                    const viewMapLink = querystring.parse(await page.$eval('a.js-view-map-link', a => a.href));
240                    if(viewMapLink.sll) {
241                        event.place.location.latitude = viewMapLink.sll.split(',')[0];
242                        event.place.location.longitude = viewMapLink.sll.split(',')[1];
243                    }
244                } catch(e) {}
245
246            }
247            catch(error){
248                console.log('ERROR IS :                        ' + error);
249            }
250
251            // Save data.
252            await importObjectToCollection(collection, event);
253
254        },
255
256        // If request failed 4 times then this function is executed.
257        handleFailedRequestFunction: async ({ request }) => {
258            console.log(`Request ${request.url} failed 4 times`);
259        },
260    });
261
262    await eventCrawler.run();
263
264});
Developer
Maintained by Community
Categories