# First, specify the base Docker image.
# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.
# You can also use any other image from Docker Hub.
FROM apify/actor-python-selenium:3.11

# Second, copy just requirements.txt into the Actor image,
# since it should be the only file that affects the dependency install in the next step,
# in order to speed up the build
COPY requirements.txt ./

# Install the packages specified in requirements.txt,
# Print the installed Python version, pip version
# and all installed packages with their versions for debugging
RUN echo "Python version:" \
 && python --version \
 && echo "Pip version:" \
 && pip --version \
 && echo "Installing dependencies:" \
 && pip install -r requirements.txt \
 && echo "All installed Python packages:" \
 && pip freeze

# Next, copy the remaining files and directories with the source code.
# Since we do this after installing the dependencies, quick build will be really fast
# for most source file changes.
COPY . ./

# Use compileall to ensure the runnability of the Actor Python code.
RUN python3 -m compileall -q .

# Specify how to launch the source code of your Actor.
# By default, the "python3 -m src" command is run
CMD ["python3", "-m", "src"]

.actor/actor.json

{
    "actorSpecification": 1,
    "name": "MemberInformation-actor",
    "title": "Data Extraction for Member Information",
    "description": "Extracts member information using provided data.",
    "version": "0.1",
    "meta": {
        "templateId": "python-dictionary"
    },
    "input": "./input_schema.json",
    "dockerfile": "./Dockerfile",
    "storages": {
        "dataset": {
            "actorSpecification": 1,
            "title": "Member Information Data",
            "views": {
                "member": {
                    "title": "Member Information Data",
                    "transformation": {
                        "fields": [
                            "First Name",
                            "Last Name",
                            "MBI",
                            "DOB",
                            "Medicaid #",
                            "LIS",
                            "Carrier",
                            "Carrier Member #",
                            "Status",
                            "Plan Type",
                            "Contract #",
                            "Plan Name",
                            "Monthly premium",
                            "PCP on file",
                            "Phone",
                            "Email",
                            "Address Line 1",
                            "Address Line 2",
                            "City",
                            "State",
                            "Zip",
                            "County",
                            "Application ID",
                            "Election Code",
                            "Application Date",
                            "Approval Date",
                            "Effective Date",
                            "Termination Date",
                            "Termination Reason",
                            "Writing Agent",
                            "Writing Agent NPN",
                            "Agent Writing ID"
                        ]
                    },
                    "display": {
                        "component": "table",
                        "properties": {
                            "First Name": {
                                "label": "First Name",
                                "format": "text"
                            },
                            "Last Name": {
                                "label": "Last Name",
                                "format": "text"
                            },
                            "MBI": {
                                "label": "MBI",
                                "format": "text"
                            },
                            "DOB": {
                                "label": "Date of Birth",
                                "format": "text"
                            },
                            "Medicaid #": {
                                "label": "Medicaid Number",
                                "format": "text"
                            },
                            "LIS": {
                                "label": "LIS Indicator",
                                "format": "text"
                            },
                            "Carrier": {
                                "label": "Carrier",
                                "format": "text"
                            },
                            "Carrier Member #": {
                                "label": "Carrier Member Number",
                                "format": "text"
                            },
                            "Status": {
                                "label": "Status",
                                "format": "text"
                            },
                            "Plan Type": {
                                "label": "Plan Type",
                                "format": "text"
                            },
                            "Contract #": {
                                "label": "Contract Number",
                                "format": "text"
                            },
                            "Plan Name": {
                                "label": "Plan Name",
                                "format": "text"
                            },
                            "Monthly premium": {
                                "label": "Monthly Premium",
                                "format": "text"
                            },
                            "PCP on file": {
                                "label": "Primary Care Physician on File",
                                "format": "text"
                            },
                            "Phone": {
                                "label": "Phone",
                                "format": "text"
                            },
                            "Email": {
                                "label": "Email",
                                "format": "text"
                            },
                            "Address Line 1": {
                                "label": "Address Line 1",
                                "format": "text"
                            },
                            "Address Line 2": {
                                "label": "Address Line 2",
                                "format": "text"
                            },
                            "City": {
                                "label": "City",
                                "format": "text"
                            },
                            "State": {
                                "label": "State",
                                "format": "text"
                            },
                            "Zip": {
                                "label": "Zip",
                                "format": "text"
                            },
                            "County": {
                                "label": "County",
                                "format": "text"
                            },
                            "Application ID": {
                                "label": "Application ID",
                                "format": "text"
                            },
                            "Election Code": {
                                "label": "Election Code",
                                "format": "text"
                            },
                            "Application Date": {
                                "label": "Application Date",
                                "format": "text"
                            },
                            "Approval Date": {
                                "label": "Approval Date",
                                "format": "text"
                            },
                            "Effective Date": {
                                "label": "Effective Date",
                                "format": "text"
                            },
                            "Termination Date": {
                                "label": "Termination Date",
                                "format": "text"
                            },
                            "Termination Reason": {
                                "label": "Termination Reason",
                                "format": "text"
                            },
                            "Writing Agent": {
                                "label": "Writing Agent",
                                "format": "text"
                            },
                            "Writing Agent NPN": {
                                "label": "Writing Agent NPN",
                                "format": "text"
                            },
                            "Agent Writing ID": {
                                "label": "Agent Writing ID",
                                "format": "text"
                            }
                        }
                    }
                }
            }
        }
    }
}

.actor/input_schema.json

{
  "title": "HumanaScraper",
  "type": "object",
  "schemaVersion": 1,
  "properties": {
    "username": {
      "title": "Username/Email",
      "type": "string",
      "description": "Enter the username or email address used for login.",
      "editor": "textfield"
    },
    "password": {
      "title": "Password",
      "type": "string",
      "description": "Enter the password for login.",
      "isSecret": true,
      "editor": "textfield"
    }
},
  "required": ["username", "password"]
}

src/main.py

1"""
2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging
3settings. The `main()` coroutine is then executed using `asyncio.run()`.
4
5Feel free to modify this file to suit your specific needs.
6"""
7
8import asyncio
9import logging
10
11from apify.log import ActorLogFormatter
12
13from .main import main
14
15# Configure loggers
16handler = logging.StreamHandler()
17handler.setFormatter(ActorLogFormatter())
18
19apify_client_logger = logging.getLogger('apify_client')
20apify_client_logger.setLevel(logging.INFO)
21apify_client_logger.addHandler(handler)
22
23apify_logger = logging.getLogger('apify')
24apify_logger.setLevel(logging.DEBUG)
25apify_logger.addHandler(handler)
26
27# Execute the Actor main coroutine
28asyncio.run(main())

src/main.py

1from urllib.parse import urljoin
2from selenium import webdriver
3from selenium.webdriver.chrome.options import Options as ChromeOptions
4from selenium.webdriver.common.by import By
5from apify import Actor
6import asyncio
7import time
8import random
9from selenium.webdriver.support.ui import Select
10from selenium.webdriver.common.keys import Keys
11from selenium.webdriver.common.by import By
12import requests
13from bs4 import BeautifulSoup as bs
14import csv
15import pandas as pd
16from tqdm import tqdm
17from datetime import datetime, timedelta
18import dateutil.relativedelta
19from seleniumbase import Driver
20from concurrent.futures import ThreadPoolExecutor
21
22
23async def login(browser, username, password):
24    browser.get('https://account.humana.com/')
25    time.sleep(random.uniform(4, 5))
26    # send npi
27    browser.find_element(By.CSS_SELECTOR, 'input[name="Username"]').send_keys(username)
28    # send password
29    browser.find_element(By.CSS_SELECTOR, 'input[name="Password"]').send_keys(password)
30    # click login
31    browser.find_element(By.CSS_SELECTOR, 'button[type="submit"]').click()
32    time.sleep(random.uniform(15, 20))
33    try:
34        browser.find_element(By.XPATH, '//*[@id="multiPortalAccessForm"]/ul/li[2]/button').click()
35        time.sleep(random.uniform(10,15))
36    except:
37        pass
38    try:
39        browser.find_element(By.XPATH, '//a[contains(text(),"Vantage")]').click()
40        time.sleep(2)
41    except:
42        pass
43    # browse.refresh()
44    # click p with View All Customers text
45    browser.find_element(By.XPATH, '//p[text()="View All Customers"]').click()
46    # browser.get("https://agentportal.humana.com/Vantage/apps/index.html?agenthome=-1#!/businessCenter")
47    time.sleep(random.uniform(20,30))
48    cookies = browser.get_cookies()
49    cookies = {cookie['name']: cookie['value'] for cookie in cookies}
50    return cookies
51
52
53async def get_list(cookies, headers):
54    json_data = {
55        'filters': {
56            'dateFilter': None,
57            'filterValuesIds': [],
58        },
59        'insightId': 'all',
60        'resultPaging': {
61            'amount': 50,
62            'page': 0,
63        },
64        'resultSort': {
65            'columnId': 49,
66            'order': 'asc',
67        },
68    }
69
70    response = requests.post(
71        'https://agentportal.humana.com/Vantage/api/businesscenter/search-policies-and-applications',
72        cookies=cookies,
73        headers=headers,
74        json=json_data,
75    )
76    # print(response.text)
77    data = response.json()
78    total = data['totalRecords']
79    total_pages = total//50+1
80    list_data = data['records']
81    for i in range(1, total_pages):
82        json_data['resultPaging']['page'] = i
83        response = requests.post(
84            'https://agentportal.humana.com/Vantage/api/businesscenter/search-policies-and-applications',
85            cookies=cookies,
86            headers=headers,
87            json=json_data,
88        )
89        data = response.json()
90        list_data += data['records']
91    return list_data
92
93
94async def process(full_data):
95    memberInformation = full_data['memberInformation']
96    otherPolicyInformation = full_data['otherPolicyInformation']
97    policyInformation = full_data['policyInformation']
98    compensatedAgentInfo = full_data['compensatedAgentInfo']
99    memberInformation_dict = {
100        "First Name": memberInformation['mbrFirstName'],
101        "Last Name": memberInformation['mbrLastName'] + memberInformation['mbrMiddleInit'],
102        "MBI": memberInformation['medicareId'],
103        "DOB": datetime.strptime(memberInformation['birthDate'], "%Y-%m-%dT%H:%M:%SZ").strftime("%d/%m/%Y") if memberInformation['birthDate'] is not None else None,
104        "Medicaid #": memberInformation['medicaidId'],
105        "LIS": memberInformation['lisIndicator'],
106        "Carrier":"Humana",
107        "Carrier Member #":otherPolicyInformation[0]['humanaID'],
108        "Status": otherPolicyInformation[0]['status'],
109        "Plan Type": otherPolicyInformation[0]['planType'],
110        "Contract #": otherPolicyInformation[0]['product'],
111        "Plan Name": otherPolicyInformation[0]['planAltDesc'],
112        "Monthly premium":"-",
113        "PCP on file": policyInformation['mbrName'],
114        "Phone": memberInformation['mbrPrimPhone'],
115        "Email": memberInformation['mbrEmail'],
116        "Address Line 1": memberInformation['address']['residentAddressLine1'],
117        "Address Line 1": memberInformation['address']['residentAddressLine2'],
118        "City": memberInformation['address']['residentCityName'],
119        "State": memberInformation['address']['residentStateCode'],
120        "Zip": memberInformation['address']['residentZipCode'],
121        "County": memberInformation['address']['residentCountyName'],
122        "Application ID": policyInformation['applicationId'],
123        "Election Code": policyInformation['electionTypeCode'],
124        "Application Date": datetime.strptime(policyInformation['signatureDate'], "%Y-%m-%dT%H:%M:%SZ").strftime("%d/%m/%Y") if memberInformation['birthDate'] is not None else None,
125        "Approval Date": "-",
126        "Effective Date": datetime.strptime(policyInformation['covEffDate'],"%Y-%m-%dT%H:%M:%SZ").strftime("%d/%m/%Y") if memberInformation['birthDate'] is not None else None,
127        "Termination Reason":"-",
128        "Writing Agent": otherPolicyInformation[0]['writingAgent'],
129        "Writing Agent NPN": otherPolicyInformation[0]['npn'],
130            
131    }
132
133    try:
134        memberInformation_dict["Termination Date"] =  datetime.strptime(policyInformation['covTermDate'],"%Y-%m-%dT%H:%M:%SZ").strftime("%d/%m/%Y")
135    except:
136        memberInformation_dict["Termination Date"] = ''
137
138    try:
139        memberInformation_dict["Agent Writing ID"] = compensatedAgentInfo['aorSan']
140    except:
141        memberInformation_dict["Agent Writing ID"] = ''
142    return memberInformation_dict
143
144
145async def get_details(cookies, headers, d):
146    today = datetime.today()
147    month = today.month
148    year = today.year
149    day = today.day
150    url = f"https://agentportal.humana.com/Vantage/api/businesscenter/member?recordId={d['id']}&salesMemKey={d['salesMemKey']}&modifiedDTime={year}%2F{month}%2F{day}T00:00:00Z"
151    response = requests.get(
152        url,
153        cookies=cookies,
154        headers=headers,
155    )
156    full_data = response.json()
157    memberInformation_dict = await process(full_data)
158    print(f"Got Data for {d['id']}")
159    await Actor.push_data(memberInformation_dict)
160    return memberInformation_dict
161
162async def main():
163    async with Actor:
164        # Read the Actor input
165        actor_input = await Actor.get_input() or {}
166        username = actor_input.get("username")
167        password = actor_input.get("password")
168        # check both start_date and end_date are not empty
169        # Launch a new Selenium Chrome WebDriver
170        Actor.log.info('Launching Chrome WebDriver...')
171        # chrome_options = ChromeOptions()
172        # if Actor.config.headless:
173        #     chrome_options.add_argument('--headless')
174        # chrome_options.add_argument('--no-sandbox')
175        # chrome_options.add_argument('--disable-dev-shm-usage')
176        # chrome_options.add_argument('--window-size=1920,1080')
177        # browser = webdriver.Chrome(options=chrome_options)
178        browser = Driver(uc=True, headless=True)
179        # ruh the login function
180        cookies = await login(browser, username, password)
181        Actor.log.info("Logged In!")
182        headers = {
183            'Accept': 'application/json, text/plain, */*',
184            'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
185            'Authorization': 'Basic VmFudGFnZVdlYkFwcDpwN1JFdmVkIzE=',
186            'Connection': 'keep-alive',
187            'Content-Type': 'application/json',
188            'Origin': 'https://agentportal.humana.com',
189            'Referer': 'https://agentportal.humana.com/Vantage/apps/index.html?agenthome=-1',
190            'Sec-Fetch-Dest': 'empty',
191            'Sec-Fetch-Mode': 'cors',
192            'Sec-Fetch-Site': 'same-origin',
193            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
194            'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
195            'sec-ch-ua-mobile': '?0',
196            'sec-ch-ua-platform': '"Windows"',
197        }
198        headers['x-dtpc'] = cookies['dtPC']
199        list_data = await get_list(cookies, headers)
200        Actor.log.info("Getting Data....")
201        csv_df = []
202        with ThreadPoolExecutor(max_workers=5) as executor:
203            tasks = [get_details(cookies, headers, d) for d in tqdm(list_data)]
204            await asyncio.gather(*tasks)
205        Actor.log.info("Completed.")
206        browser.quit()
207if __name__ == "__main__":
208    asyncio.run(main())

.dockerignore

# configurations
.idea

# crawlee and apify storage folders
apify_storage
crawlee_storage
storage

# installed files
.venv

# git folder
.git

.editorconfig

root = true

[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control

.idea
.DS_Store

apify_storage
storage

.venv/
.env/
__pypackages__
dist/
build/
*.egg-info/
*.egg

__pycache__

.mypy_cache
.dmypy.json
dmypy.json
.pytest_cache
.ruff_cache

.scrapy
*.log

requirements.txt

1# Add your dependencies here.
2# See https://pip.pypa.io/en/latest/reference/requirements-file-format/
3# for how to format them
4apify ~= 1.2.0
5selenium ~= 4.14.0
6seleniumbase
7tqdm
8bs4
9pandas

Gumtree.com(.au) Scraper | Richest Output | ($2 / 1K)

memo23/gumtree-cheerio

Extract detailed listings including pricing history, seller info, vehicle specs (VIN, mileage, condition), electronics details, and furniture dimensions. Get rich media (images, videos), location data, and competitor analysis.

Muhamed Didovic

Google Maps Scraper

rigelbytes/google-maps-scraper

Scrape UNLIMITED business data from Google Maps for just $45/month! Extract names, addresses, reviews, ratings, and more. Perfect for market research, lead generation, and competitor analysis. Easy to use, proxy-supported, and highly customizable. Start scraping smarter today!

Rigel Bytes

Goodreads Scraper

epctex/goodreads-scraper

Scrape goodreads.com for data on millions of books. Crawl book details for images, ISBN, author, description, title, buy links, number of reviews, page number, language, and all other details. You can specify search terms, filters, and much more.

epctex

353

Web Crawler

rigelbytes/webcrawler

This web crawler is designed to provide users with complete flexibility by allowing them to use their **own proxies**. The scraper collects all pages from the website and returns extracts the **MetaData**, **Title**, and **Content** of the page in MarkDown.

Rigel Bytes

Immobilienscout24

rigelbytes/immobilienscout24

Scrape UNLIMITED listings from Immobilienscout24.de for just $25/month. Get detailed property info, contact details, and export data in JSON, CSV, or Excel. Perfect for market research and lead generation!

Rigel Bytes

instagram-posts

rigelbytes/instagram-posts

Extract all posts from public Instagram profiles. Get captions, likes, comments, images, and carousel details — perfect for social media analysis and research.

Rigel Bytes

Daraz

rigelbytes/daraz

Scrape product listings from Daraz.pk with the custom query, optional proxy, and pagination for e-commerce insights.

Rigel Bytes

Indeed Company: Reviews, Interview, Salary, Jobs, About Scraper

memo23/apify-indeed-reviews

Unlock 360° workforce intelligence - scrape reviews, salaries, jobs, interviews, company profiles, and cultural metrics from Indeed in one click. Transform raw data into recruitment strategies, competitive analysis, and market trends with enterprise-grade HR analytics.

Muhamed Didovic

125

5.0

Indeed Review, Interview, Location, Salary, Jobs-Pay Per Result

memo23/apify-indeed-reviews-ppr

Muhamed Didovic

Facebook Comments Scraper

apify/facebook-comments-scraper

Extract data from hundreds of Facebook comments from one or multiple Facebook posts. Get comment text, timestamp, likes count and basic commenter info. Download the data in JSON, CSV, Excel and use it in apps, spreadsheets, and reports.

Apify

12K

4.7

LinkedIn Jobs & Company Scraper

fetchclub/linkedin-jobs-scraper

Actively Maintained - Cheap Rental & Run Cost - LinkedIn Jobs Scraper + Companies - to extract job listings worldwide. Export results for analysis, connect via API, & integrate with other apps. Please note that LinkedIn may block some requests leading their being to fewer results than expected.