1from apify_client import ApifyClient
2
3
4
5client = ApifyClient("<YOUR_API_TOKEN>")
6
7
8run_input = {
9 "startUrls": [{ "url": "https://www.doctolib.fr/infectiologue/75001-paris" }],
10 "pageFunction": """async function pageFunction(context) {
11
12 let data = {}
13 let userData = context.request.userData
14 data.url = context.request.url
15
16 const isDoctorProfile = userData && userData.label === 'doctor'
17 let isProbablyDoctorProfile
18 if(!isDoctorProfile){
19 isProbablyDoctorProfile = await context.innerTextwrapper(context,'body.profiles, body.online_booking-drafts')
20 }
21 const isDoctorPage = isDoctorProfile || isProbablyDoctorProfile
22
23 data.isDoctorPage = isDoctorPage
24
25 if(isDoctorPage){
26 context.log.info(`Doctor page ${isDoctorProfile ? 'from search' : '' } ${isProbablyDoctorProfile ? 'guessing' : '' }`);
27 data.nom = await context.page.locator('#main-content h1').innerText({timeout:6000})
28 data.tarif = await context.innerTextwrapper(context,'#payment_means')
29 data.horaire_contact = await context.innerTextwrapper(context,'#openings_and_contact')
30 data.description = await context.innerTextwrapper(context,'.dl-profile-bio')
31 data.specialite = await context.innerTextwrapper(context,'.dl-profile-header-speciality')
32 data.expertise = await context.innerTextwrapper(context,'#skills')
33 try{
34 data.website = await context.page.locator('.dl-profile-row-section div', { hasText: 'Website' }).locator('a').getAttribute('href',{timeout:2000})
35 }catch(e){
36 context.log.info('Website not found',e);
37 }
38
39 try{
40 data.phones = await context.getPhones(data.horaire_contact)
41 }catch(e){
42 context.log.info('Phones not found',e);
43 }
44 try{
45 data.image = await context.page.locator('.dl-profile img').first().getAttribute('src',{timeout:2000})
46 if(data.image.startsWith('/')){ data.image = 'https:' + data.image}
47 }catch(e){
48 context.log.info('Image not found',e);
49 }
50 }else{
51 context.log.info('we are not on a doctor page: so a search or pagination page.');
52 data.message = 'you can remove these rows in the settings with \"hideSearchPages\" '
53 userData.label = 'doctor';
54 const elements = context.page.locator('.search-result-card a[href]');
55 const links = await elements.evaluateAll(elems => elems.map(elem => elem.getAttribute('href')));
56 let extenstion = 'fr'
57 if(context.request.url.includes('doctolib.de')){ extenstion = 'de' }
58 if(context.request.url.includes('doctolib.it')){ extenstion = 'it' }
59 links.forEach(async link => {
60 if(link.startsWith('/')){ link = `https://www.doctolib.${extenstion}${link}` }
61 await context.enqueueRequest(link, userData , true);
62 })
63
64 }
65 context.log.info(`ending this page now`);
66
67 return data;
68}
69""",
70}
71
72
73run = client.actor("anchor/doctolib").call(run_input=run_input)
74
75
76print("💾 Check your data here: https://console.apify.com/storage/datasets/" + run["defaultDatasetId"])
77for item in client.dataset(run["defaultDatasetId"]).iterate_items():
78 print(item)
79
80