1"""
2This module defines the `main()` coroutine for the Apify Actor, executed from the `__main__.py` file.
3
4Feel free to modify this file to suit your specific needs.
5
6To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
7https://docs.apify.com/sdk/python
8"""
9
10from urllib.parse import urljoin
11from seleniumwire import webdriver
12
13from selenium.webdriver.common.by import By
14from selenium.webdriver.common.action_chains import ActionChains
15from apify import Actor
16import re
17from urllib.parse import urljoin
18from bs4 import BeautifulSoup
19import requests
20import requests.exceptions
21import time
22import undetected_chromedriver as uc
23import random
24from .datascrapify import StartProcess, parse_proxy_url,on_aborting_event
25from apify_shared.consts import ActorEventTypes as Event
26import asyncio
27
28
29
30
31
32
33def scrape_contact_emails(link):
34 res = requests.get(link)
35 domain = link.split(".")
36 mailaddr = link
37 soup = BeautifulSoup(res.text,"lxml")
38 links = soup.find_all("a")
39 contact_link = ''
40 final_result = ""
41 try:
42
43 emails = soup.find_all(text=re.compile('.*@'+domain[1]+'.'+domain[2].replace("/","")))
44 emails.sort(key=len)
45 print(emails[0].replace("\n",""))
46 final_result = emails[0]
47 except:
48
49 try:
50 flag = 0
51 for link in links:
52 if "contact" in link.get("href") or "Contact" in link.get("href") or "CONTACT" in link.get("href") or 'contact' in link.text or 'Contact' in link.text or 'CONTACT' in link.text:
53 if len(link.get("href"))>2 and flag<2:
54 flag = flag + 1
55 contact_link = link.get("href")
56
57 except:
58 pass
59
60 domain = domain[0]+"."+domain[1]+"."+domain[2]
61 if(len(contact_link)<len(domain)):
62 domain = domain+contact_link.replace("/","")
63 else:
64 domain = contact_link
65
66 try:
67
68 res = requests.get(domain)
69 soup = BeautifulSoup(res.text,"lxml")
70 emails = soup.find_all(text=re.compile('.*@'+mailaddr[7:].replace("/","")))
71 emails.sort(key=len)
72 try:
73 print(emails[0].replace("\n",""))
74 final_result = emails[0]
75 return final_result
76 except:
77 pass
78 except Exception as e:
79 pass
80
81 return ""
82
83async def main() -> None:
84 """
85 The main coroutine is being executed using `asyncio.run()`, so do not attempt to make a normal function
86 out of it, it will not work. Asynchronous execution is required for communication with Apify platform,
87 and it also enhances performance in the field of web scraping significantly.
88 """
89 async with Actor:
90
91 actor_input = await Actor.get_input() or {}
92 Keyword_val = actor_input.get('Keyword')
93 location_val = actor_input.get('location')
94 social_network_val = actor_input.get('social_network')
95 Country_val = actor_input.get('Country')
96 Email_Type_val = actor_input.get('Email_Type')
97 Other_Email_Type_val = actor_input.get('Other_Email_Type')
98 proxy_settings = actor_input.get('proxySettings')
99
100 proxy_configuration = await Actor.create_proxy_configuration(groups=['GOOGLE_SERP'])
101
102 proxyurl = ''
103 if proxy_configuration and proxy_settings:
104 proxyurl =await proxy_configuration.new_url()
105
106
107
108 if not Keyword_val:
109 Actor.log.info('Please insert keyword')
110 await Actor.push_data({'Email': 'Please insert keyword'})
111 await Actor.exit()
112 return
113
114 if Keyword_val=='TestKeyword':
115 Actor.log.info('Please insert keyword')
116 await Actor.push_data({'Email': 'Please insert Your Keyword'})
117 await Actor.exit()
118 return
119 '''
120 me = await Actor.apify_client.user('me').get()
121 username=me["username"]
122 isPaying='PayingUser'
123 if(me["isPaying"]==False):
124 isPaying='FreeUser'
125
126 try:
127 proxyurl=''
128 USE_Proxy=False
129 if proxyurl:
130 USE_Proxy=True
131 my_proxy_settings = parse_proxy_url(proxyurl)
132 # Call the function from the imported module
133 await StartProcess(
134 "Apify_camp_LinkedinEmail_"+username+"_"+isPaying,
135 "ALLINONE",
136 Keyword_val,
137 location_val,
138 social_network_val,
139 Country_val,
140 "Google",
141 USE_Proxy,
142 my_proxy_settings
143 )
144
145 finally:
146 # Code that always executes (e.g., cleanup)
147 print("This block always runs, regardless of exceptions.")
148
149 await Actor.exit();
150 '''
151
152 l1 = ["it","www","gm","fr","sp","uk","al","ag","ar","am","as","au","aj","bg","bo","be","bh","bk","br","bu","ca","ci","ch","co","cs","hr","cu","ez","dk","ec","eg","en","fi","gg","gr","hk","hu","ic","in","id","ir","iz","ei","is","jm","ja","ke","kn","ks","ku","lg","ly","ls","lh","lu","mc","mk","my","mt","mx","md","mn","mj","mo","np","nl","nz","ni","no","pk","we","pm","pa","pe","rp","pl","po","rq","qa","ro","rs","sm","sa","sg","ri","sn","lo","si","sf","sw","sz","sy","tw","th","ts","tu","ua","ae","uy","uz","ve"]
153 l2= ["Italy","United States","Germany","France","Spain","United Kingdom","Albania","Algeria","Argentina","Armenia","Australia","Austria","Azerbaijan","Bangladesh","Belarus","Belgium","Belize","Bosnia and Herzegovina","Brazil","Bulgaria","Canada","Chile","China","Colombia","Costa Rica","Croatia","Cuba","Czechia","Denmark","Ecuador","Egypt","Estonia","Finland","Georgia","Greece","Hong Kong","Hungary","Iceland","India","Indonesia","Iran","Iraq","Ireland","Israel","Jamaica","Japan","Kenya","Korea","Korea, Republic of","Kuwait","Latvia","Libya","Liechtenstein","Lithuania","Luxembourg","Macao","Macedonia","Malaysia","Malta","Mexico","Moldova, Republic of","Monaco","Montenegro","Morocco","Nepal","Netherlands","New Zealand","Nigeria","Norway","Pakistan","Palestine, State of","Panama","Paraguay","Peru","Philippines","Poland","Portugal","Puerto Rico","Qatar","Romania","Russia","San Marino","Saudi Arabia","Senegal","Serbia","Singapore","Slovakia","Slovenia","South Africa","Sweden","Switzerland","Syrian Arab Republic","Taiwan","Thailand","Tunisia","Turkey","Ukraine","United Arab Emirates","Uruguay","Uzbekistan","Venezuela"]
154 select_index=1
155 select_country='United States'
156 for count, ele in enumerate(l1):
157 if(ele==Country_val):
158 select_index=count
159 break
160
161
162
163 for count, ele in enumerate(l2):
164 if(count==select_index):
165 select_country=ele
166 break
167
168 print(select_country)
169
170 concatstring = ""
171 concatstring = concatstring + Keyword_val
172 option = "( @gmail.com OR @hotmail.com OR @yahoo.com)";
173 if Email_Type_val=="1":
174 if not Other_Email_Type_val:
175 Actor.log.info('Please insert Email Type Domain')
176 await Actor.push_data({'Email': 'Please insert Email Type Domain'})
177 await Actor.exit()
178 return
179 if Other_Email_Type_val.find("@") > -1:
180 option = " ( " + Other_Email_Type_val + " )"
181 else:
182 option = " ( @" + Other_Email_Type_val + " )"
183 concatstring = concatstring + option
184 if location_val:
185 concatstring = concatstring+ " in "+ location_val
186
187
188
189 if social_network_val:
190 concatstring = concatstring + " site:"
191
192 if social_network_val == "linkedin.com/" or social_network_val == "pinterest.com/" :
193 concatstring = concatstring + Country_val + ".";
194
195
196
197
198 if social_network_val == "amazon.com/" :
199 if Country_val=='gm':
200 Country_val='de'
201 elif Country_val=='sp':
202 Country_val='es'
203 elif Country_val=='fr':
204 Country_val='fr'
205 elif Country_val=='uk':
206 Country_val='co.uk'
207 elif Country_val=='as':
208 Country_val='com.au'
209 elif Country_val=='www':
210 Country_val='com'
211 elif Country_val=='in':
212 Country_val='in'
213 elif Country_val=='be':
214 Country_val='com.be'
215 elif Country_val=='br':
216 Country_val='com.br'
217 elif Country_val=='ca':
218 Country_val='ca'
219 elif Country_val=='ch':
220 Country_val='cn'
221 elif Country_val=='eg':
222 Country_val='eg'
223 elif Country_val=='it':
224 Country_val='it'
225 elif Country_val=='ja':
226 Country_val='co.jp'
227 elif Country_val=='mx':
228 Country_val='com.mx'
229 elif Country_val=='nl':
230 Country_val='nl'
231 elif Country_val=='pl':
232 Country_val='pl'
233 elif Country_val=='sa':
234 Country_val='sa'
235 elif Country_val=='sn':
236 Country_val='sg'
237 elif Country_val=='sw':
238 Country_val='se'
239 elif Country_val=='tu':
240 Country_val='com.tr'
241 elif Country_val=='ae':
242 Country_val='ae'
243 elif Country_val=='ae':
244 Country_val='ae'
245 else :
246 Country_val=='com'
247
248 social_network_val=social_network_val.replace('.com','.'+Country_val)
249
250
251 concatstring = concatstring + "" + social_network_val + "";
252
253 SearchEngine='Google'
254 desktop_user_agents = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
255 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
256 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.139 Safari/537.36",
257 "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
258
259 "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
260 "Mozilla/5.0 (Windows NT 10.0; rv:124.0) Gecko/20100101 Firefox/124.0",
261 "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
262
263 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.86 Safari/537.36 Edg/123.0.2420.65",
264 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.112 Safari/537.36 Edg/122.0.2365.66",
265
266 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/96.0.0.0",
267
268 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.129 Safari/537.36",
269 "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
270
271 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13.5; rv:124.0) Gecko/20100101 Firefox/124.0",
272 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0",
273
274 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
275 "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15",
276
277 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.184 Safari/537.36",
278 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
279
280 "Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0",
281 "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
282 "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:123.0) Gecko/20100101 Firefox/123.0",
283
284 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
285
286 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.86 Safari/537.36 Edg/123.0.2420.65",
287
288 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 OPR/96.0.0.0"
289 ]
290
291 mobile_user_agents = [
292
293 "Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.105 Mobile Safari/537.36",
294 "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
295 "Mozilla/5.0 (Linux; Android 11; Pixel 4a) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.140 Mobile Safari/537.36",
296
297
298 "Mozilla/5.0 (Linux; Android 13; SAMSUNG SM-S911B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/24.0 Chrome/123.0.0.0 Mobile Safari/537.36",
299 "Mozilla/5.0 (Linux; Android 12; SAMSUNG SM-A525F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/23.0 Chrome/121.0.0.0 Mobile Safari/537.36",
300
301
302 "Mozilla/5.0 (Linux; Android 13; M2101K6G) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.105 Mobile Safari/537.36",
303
304
305 "Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
306 "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1",
307 "Mozilla/5.0 (iPhone; CPU iPhone OS 15_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Mobile/15E148 Safari/604.1",
308
309
310 "Mozilla/5.0 (iPad; CPU OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
311 "Mozilla/5.0 (iPad; CPU OS 15_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1",
312
313
314 "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) CriOS/123.0.0.0 Mobile/15E148 Safari/604.1",
315
316
317 "Mozilla/5.0 (Linux; Android 13; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 OPR/76.0.4017.123",
318
319
320 "Mozilla/5.0 (Linux; Android 13; Pixel 6 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36 EdgA/123.0.2420.64",
321
322
323 "Mozilla/5.0 (Android 13; Mobile; rv:124.0) Gecko/124.0 Firefox/124.0",
324 "Mozilla/5.0 (Android 12; Mobile; rv:122.0) Gecko/122.0 Firefox/122.0"
325]
326
327
328 all_user_agents = desktop_user_agents + mobile_user_agents
329 random_user_agent = random.choice(all_user_agents)
330
331
332 qry="https://google.com/search?q="+ concatstring
333 Actor.log.info(concatstring)
334
335
336
337 all_users = []
338 query =concatstring
339 max_pages = 100
340 results = []
341 for page in range(max_pages):
342 try:
343 start = page * 10
344 Actor.log.info('Check Page '+str(page))
345 url = f"http://www.google.com/search?q={query}&num=10&hl=en&start={start}"
346
347
348
349 proxies=None
350 if proxyurl:
351 proxies = {'http': proxyurl, 'https': proxyurl}
352
353 response = requests.get(
354 url,
355 proxies=proxies,
356 headers={
357 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
358 "AppleWebKit/537.36 (KHTML, like Gecko) "
359 "Chrome/120.0.0.0 Safari/537.36"
360 }
361 )
362
363 if response.status_code != 200:
364 Actor.log.warning(f"⚠️ Request failed: {response.status_code} {response.reason}")
365 continue
366
367 soup = BeautifulSoup(response.text, "html.parser")
368 result_blocks = soup.select("div.g, div.tF2Cxc")
369
370 for j, block in enumerate(result_blocks):
371 Email=''
372 title_el = block.select_one("h3")
373 link_el = block.select_one("a")
374 snippet_el = block.select_one(".VwiC3b, .IsZvec, .aCOpRe")
375
376
377
378 title= title_el.get_text(strip=True)
379 url =link_el.get("href")
380 snippet= snippet_el.get_text(strip=True)
381 print(title)
382 alltext = title+snippet
383 match = re.findall(r'[a-zA-Z0-9\.\-+_]+@[a-zA-Z0-9\.\-+_]+\.[a-zA-Z]+', alltext)
384 Actor.log.info('match email '+str(len(match)))
385 if len(match)>0:
386 for i in match:
387 Email=i
388 Actor.log.info('email '+Email)
389 else:
390 match_website = re.findall(r'\\b(?:https?://|www\\.)\\S+\\b', alltext)
391 for i in match_website:
392 Website = "http://www."+i;
393 Actor.log.info('Website '+Website)
394 Email=scrape_contact_emails(Website)
395 if Email :
396 existindb=False
397
398 if len(all_users)>0:
399 for item in all_users:
400 if item['Email'] == Email :
401 existindb=True
402 break
403
404 if existindb==False:
405 all_users.append({'Email': Email});
406 await Actor.push_data({'Email': Email, 'title': title,'Description':alltext,'Detail_Link':url})
407
408
409 await asyncio.sleep(random.uniform(1.5, 3.0))
410 except:
411 await Actor.exit()
412
413
414 await Actor.exit()
415
416
417
418 Actor.log.info('Launching Chrome WebDriver...')
419 chrome_options = uc.ChromeOptions()
420 chrome_options.add_argument(f'user-agent={random_user_agent}')
421 chrome_options.add_argument("--disable-blink-features=AutomationControlled")
422
423
424 chrome_options.add_argument('--no-sandbox')
425 chrome_options.add_argument('--disable-dev-shm-usage')
426 chrome_options.add_argument("--disable-blink-features=AutomationControlled")
427
428
429
430
431 seleniumwire_options={}
432 if(proxyurl):
433 print('apply proxy')
434 chrome_options.add_argument(f" - proxy-server={proxyurl}")
435
436
437
438
439
440
441
442
443 driver = uc.Chrome(options=chrome_options, use_subprocess=False,version_main = 137)
444 driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
445 "source": """
446 Object.defineProperty(navigator, 'webdriver', {
447 get: () => undefined
448 })
449 """
450 })
451
452
453
454
455
456
457 driver.get("https://httpbin.io/ip")
458
459 ip_address = driver.find_element(By.TAG_NAME, "body").text
460
461 print(ip_address)
462 try:
463 driver.get(qry)
464 except Exception as e:
465 print(e)
466 await Actor.push_data({'Email': e})
467 driver.quit()
468
469 checkount=0
470 try:
471 while(driver.current_url.find("sorry/index")>-1 and checkount<100):
472 print('captcha')
473 print(driver.current_url)
474 time.sleep(3)
475 checkount=checkount+1
476 except Exception as e:
477 print(e)
478
479 all_users = []
480 start=0
481
482 if(SearchEngine=='Yahoo'):
483 print('yahoo')
484 else:
485 try:
486 while True:
487 consent = driver.find_elements(By.XPATH,"//iframe[contains(@src, 'consent.google.com')]")
488 if len(consent)>0 :
489 driver.switch_to.frame(consent[0]);
490 driver.find_element(By.id("id")).click();
491
492 popup = driver.find_elements(By.CSS_SELECTOR,"button[class='yt-spec-button-shape-next yt-spec-button-shape-next--filled yt-spec-button-shape-next--mono yt-spec-button-shape-next--size-m']");
493 if (len(popup) > 0):
494 Actor.log.info("popup")
495 for n in range(0,len(popup)):
496 if (popup[n].text=="Accept all"):
497 popup[n].click();
498
499 popup_accept = driver.find_elements(By.CSS_SELECTOR,"button[id='L2AGLb']");
500 if (len(popup_accept) > 0):
501 popup_accept[0].click();
502 time.sleep(1)
503
504 checkconsent=0;
505 while len(driver.find_elements(By.CSS_SELECTOR,"div[class='HTjtHe'][style*='display: block']")) > 0 and checkconsent<100:
506 Actor.log.info('checkconsent '+str(checkconsent))
507 Actor.log.info("homeurl" + driver.current_url)
508 checkconsent=checkconsent+1
509 time.sleep(1)
510
511 AllClasses = driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc")
512 print(len(AllClasses))
513
514
515
516
517
518
519 homeurl = driver.current_url
520 AllClasses = driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc")
521 if social_network_val=="youtube.com/" or social_network_val=="instagram.com/":
522 AllClasses = driver.find_elements(By.XPATH,"//div[contains(@class,'MjjYud')]")
523 Actor.log.info("homeurl" + driver.current_url)
524
525 checkount=0
526 try:
527 while(driver.current_url.index("sorry/index")>-1 and checkount<100):
528 print('captcha')
529 time.sleep(3)
530 checkount=checkount+1
531 except Exception as e:
532 print(e)
533
534 if(len(AllClasses)==0):
535 print('check new attribut')
536 AllClasses = driver.find_elements(By.CSS_SELECTOR,"div.fP1Qef")
537 print(len(AllClasses))
538 print(driver.find_elements(By.CSS_SELECTOR,"div[id='main']"))
539
540
541
542 if len(AllClasses)>0:
543 start=1
544 Actor.log.info("Result" + str(len(AllClasses)))
545 for gr in range(0,len(AllClasses)):
546 try:
547 BusinessName=""
548 DetailsLink=""
549 Email=""
550 Address=""
551 businessdetail = AllClasses[gr].find_elements(By.CSS_SELECTOR,"h3.LC20lb")
552 if len(businessdetail) > 0:
553 BusinessName = businessdetail[0].text;
554 DetailsLink = businessdetail[0].find_element(By.XPATH,"parent::*").get_attribute("href")
555 if not DetailsLink:
556 businessdetailnew = AllClasses[gr].find_elements(By.CSS_SELECTOR,"div.TbwUpd")
557 if len(businessdetailnew) > 0:
558 DetailsLink = businessdetailnew[0].find_elements(By.XPATH,"parent::*").get_attribute("href")
559 Ele_addressdetail = AllClasses[gr].find_elements(By.CSS_SELECTOR,"div.VwiC3b")
560 if len(Ele_addressdetail) > 0:
561 Address = Ele_addressdetail[0].text.replace(";", "-").replace(",", "-");
562
563 alltext = AllClasses[gr].text
564 match = re.findall(r'[a-zA-Z0-9\.\-+_]+@[a-zA-Z0-9\.\-+_]+\.[a-zA-Z]+', alltext)
565 Actor.log.info('match email '+str(len(match)))
566 if len(match)>0:
567 for i in match:
568 Email=i
569 Actor.log.info('email '+Email)
570 else:
571 match_website = re.findall(r'\\b(?:https?://|www\\.)\\S+\\b', BusinessName+Address)
572 for i in match_website:
573 Website = "http://www."+i;
574 Actor.log.info('Website '+Website)
575 Email=scrape_contact_emails(Website)
576 if Email :
577 existindb=False
578
579 if len(all_users)>0:
580 for item in all_users:
581 if item['Email'] == Email :
582 existindb=True
583 break
584 if existindb==False:
585 all_users.append({'Email': Email});
586 await Actor.push_data({'Email': Email, 'title': BusinessName,'Description':alltext,'Detail_Link':DetailsLink})
587
588
589 except Exception as err:
590 Actor.log.info(f"Unexpected {err=}, {type(err)=}")
591
592 if len(driver.find_elements(By.CSS_SELECTOR,"a#pnnext")) > 0:
593 Actor.log.info('Click Next')
594 str_url = driver.current_url
595
596 driver.execute_script("arguments[0].click();", driver.find_elements(By.CSS_SELECTOR,"a#pnnext")[0]);
597 time.sleep(3)
598 while driver.current_url == str_url:
599 tryresult = 0;
600 time.sleep(1);
601 tryresult = tryresult + 1;
602 if tryresult > 20:
603 driver.quit();
604 break;
605 else:
606 Actor.log.info('Click Next_1')
607 AllCOUNT_PREVIOUS = len(driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc"))
608 if social_network_val=="youtube.com/" or social_network_val=="instagram.com/":
609 AllCOUNT_PREVIOUS = len(driver.find_elements(By.XPATH,"//div[contains(@class,'MjjYud')]"))
610 Actor.log.info('Click Next_1_T1')
611 str_url = driver.current_url
612 action = ActionChains(driver)
613
614
615 tryresult = 0;
616 if len(driver.find_elements(By.CSS_SELECTOR,"span.RVQdVd")) > 0:
617 action = ActionChains(driver)
618 selectedlink1 = driver.find_element(By.CSS_SELECTOR,"span.RVQdVd")
619 action.move_to_element(selectedlink1).click().perform()
620
621 time.sleep(3)
622 Actor.log.info('Click Next_1_T2')
623 AllCOUNT_Now = len(driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc"))
624 if social_network_val=="youtube.com/":
625 AllCOUNT_Now = len(driver.find_elements(By.XPATH,"//div[contains(@class,'MjjYud')]"))
626
627 while (AllCOUNT_PREVIOUS == AllCOUNT_Now):
628 time.sleep(1)
629 tryresult = tryresult + 1;
630 Actor.log.info('Click Next_1_T3_tryresult '+str(tryresult))
631 if (tryresult > 20):
632 driver.quit();
633 break;
634
635 AllCOUNT_Now = len(driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc"))
636 if social_network_val=="youtube.com/" or social_network_val=="instagram.com/" :
637 AllCOUNT_Now = len(driver.find_elements(By.XPATH,"//div[contains(@class,'MjjYud')]"))
638 print('AllCOUNT_PREVIOUS'+str(AllCOUNT_PREVIOUS))
639 print('AllCOUNT_Now'+str(AllCOUNT_Now))
640 if(AllCOUNT_PREVIOUS==AllCOUNT_Now):
641 break
642 time.sleep(3)
643 else:
644 if(start==0):
645 await Actor.push_data({'Email': 'No Data Found, Due to google not respond. May be proxy problem'})
646 driver.quit();
647 break;
648 print('done')
649 except Exception:
650 Actor.log.exception(f'Cannot extract data from .')
651
652 driver.quit()