1import { ApifyClient } from 'apify-client';
2
3
4
5const client = new ApifyClient({
6 token: '<YOUR_API_TOKEN>',
7});
8
9
10const input = {
11 "startUrls": [
12 {
13 "url": "https://www.doctolib.fr/infectiologue/75001-paris"
14 }
15 ],
16 "pageFunction": async function pageFunction(context) {
17
18 let data = {}
19 let userData = context.request.userData
20 data.url = context.request.url
21
22 const isDoctorProfile = userData && userData.label === 'doctor'
23 let isProbablyDoctorProfile
24 if(!isDoctorProfile){
25 isProbablyDoctorProfile = await context.innerTextwrapper(context,'body.profiles, body.online_booking-drafts')
26 }
27 const isDoctorPage = isDoctorProfile || isProbablyDoctorProfile
28
29 data.isDoctorPage = isDoctorPage
30
31 if(isDoctorPage){
32 context.log.info(`Doctor page ${isDoctorProfile ? 'from search' : '' } ${isProbablyDoctorProfile ? 'guessing' : '' }`);
33 data.nom = await context.page.locator('#main-content h1').innerText({timeout:6000})
34 data.tarif = await context.innerTextwrapper(context,'#payment_means')
35 data.horaire_contact = await context.innerTextwrapper(context,'#openings_and_contact')
36 data.description = await context.innerTextwrapper(context,'.dl-profile-bio')
37 data.specialite = await context.innerTextwrapper(context,'.dl-profile-header-speciality')
38 data.expertise = await context.innerTextwrapper(context,'#skills')
39 try{
40 data.website = await context.page.locator('.dl-profile-row-section div', { hasText: 'Website' }).locator('a').getAttribute('href',{timeout:2000})
41 }catch(e){
42 context.log.info('Website not found',e);
43 }
44
45 try{
46 data.phones = await context.getPhones(data.horaire_contact)
47 }catch(e){
48 context.log.info('Phones not found',e);
49 }
50 try{
51 data.image = await context.page.locator('.dl-profile img').first().getAttribute('src',{timeout:2000})
52 if(data.image.startsWith('/')){ data.image = 'https:' + data.image}
53 }catch(e){
54 context.log.info('Image not found',e);
55 }
56 }else{
57 context.log.info('we are not on a doctor page: so a search or pagination page.');
58 data.message = 'you can remove these rows in the settings with "hideSearchPages" '
59 userData.label = 'doctor';
60 const elements = context.page.locator('.search-result-card a[href]');
61 const links = await elements.evaluateAll(elems => elems.map(elem => elem.getAttribute('href')));
62 let extenstion = 'fr'
63 if(context.request.url.includes('doctolib.de')){ extenstion = 'de' }
64 if(context.request.url.includes('doctolib.it')){ extenstion = 'it' }
65 links.forEach(async link => {
66 if(link.startsWith('/')){ link = `https://www.doctolib.${extenstion}${link}` }
67 await context.enqueueRequest(link, userData , true);
68 })
69
70 }
71 context.log.info(`ending this page now`);
72
73 return data;
74 }
75};
76
77
78const run = await client.actor("anchor/doctolib").call(input);
79
80
81console.log('Results from dataset');
82console.log(`💾 Check your data here: https://console.apify.com/storage/datasets/${run.defaultDatasetId}`);
83const { items } = await client.dataset(run.defaultDatasetId).listItems();
84items.forEach((item) => {
85 console.dir(item);
86});
87
88