Street Fighter 6 CFN Scraper
Pricing
Pay per usage
Go to Store
Street Fighter 6 CFN Scraper
SF6 scraper for the Capcom Fighters Network (Buckler's Boot Camp)
0.0 (0)
Pricing
Pay per usage
2
Total users
29
Monthly users
3
Runs succeeded
67%
Last modified
7 months ago
.actor/Dockerfile
# First, specify the base Docker image.# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.# You can also use any other image from Docker Hub.FROM apify/actor-python-playwright
# Second, copy just requirements.txt into the Actor image,# since it should be the only file that affects the dependency install in the next step,# in order to speed up the buildCOPY requirements.txt ./
# Install the packages specified in requirements.txt,# Print the installed Python version, pip version# and all installed packages with their versions for debuggingRUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze
# Next, copy the remaining files and directories with the source code.# Since we do this after installing the dependencies, quick build will be really fast# for most source file changes.COPY . ./
# Use compileall to ensure the runnability of the Actor Python code.RUN python3 -m compileall -q .
# Specify how to launch the source code of your Actor.# By default, the "python3 -m src" command is runCMD ["python3", "-m", "src"]
.actor/actor.json
{ "actorSpecification": 1, "name": "street-fighter-6-cfn-scraper", "title": "Street Fighter 6 CFN Scraper", "description": "SF6 scraper for the Capcom Fighters Network (Buckler's Boot Camp)", "version": "1.0", "meta": { "templateId": "python-beautifulsoup" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile", "storages": { "dataset": { "actorSpecification": 1, "title": "URLs and their titles", "views": { "titles": { "title": "URLs and their titles", "transformation": { "fields": [ "url", "title" ] }, "display": { "component": "table", "properties": { "url": { "label": "URL", "format": "text" }, "title": { "label": "Title", "format": "text" } } } } } } }}
.actor/input_schema.json
{ "title": "Street Fighter 6 CFN Scraper", "type": "object", "schemaVersion": 1, "properties": { "email": { "title": "CFN Email", "type": "string", "description": "Your CFN email address", "editor": "textfield" }, "password": { "title": "CFN Password", "type": "string", "description": "Your CFN password", "editor": "textfield" }, "rank_to_search": { "title": "Only Search for Specific Rank (Index)", "type": "integer", "description": "Finds the starting point of a specific rank (0 is Master, 35 is Rookie 1). If blank, the scraper will start from Master and attempt to find all of the ranks. However, the site will usually block you after a few tries, so it might be better to search for a specific rank instead.", "editor": "number" }, "initial_page_jump": { "title": "Initial Page Jump", "type": "integer", "description": "How many pages should we skip per attempt? Leave blank unless your page to start searching is very close to the target page.", "editor": "number" }, "start_page": { "title": "Start Searching at This Page", "type": "integer", "description": "Start searching for target rank on this page (will speed up execution if accurate). If blank, we'll use our predetermined page numbers (accurate as of March 2024).", "editor": "number" } }, "required": ["email", "password"]}
src/__main__.py
1"""2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging3settings. The `main()` coroutine is then executed using `asyncio.run()`.4
5Feel free to modify this file to suit your specific needs.6"""7
8import asyncio9import logging10
11from apify.log import ActorLogFormatter12
13from .main import main14
15# Configure loggers16handler = logging.StreamHandler()17handler.setFormatter(ActorLogFormatter())18
19apify_client_logger = logging.getLogger('apify_client')20apify_client_logger.setLevel(logging.INFO)21apify_client_logger.addHandler(handler)22
23apify_logger = logging.getLogger('apify')24apify_logger.setLevel(logging.DEBUG)25apify_logger.addHandler(handler)26
27# Execute the Actor main coroutine28asyncio.run(main())
src/main.py
1from urllib.parse import urljoin2from apify import Actor3from apify.log import ActorLogFormatter4import logging5from playwright.async_api import async_playwright, Page, expect6from bs4 import BeautifulSoup7import math8
9browser = None10context = None11page = None12readable_content = None13
14was_above_target = None15iterations = 016page_jump = 100017user_dict = {}18current_page_int = 119current_target_lp = 020
21email = ""22password = ""23start_pages = []24search_only_this_rank = 025initial_page_jump = 026
27base_start_url = "https://www.streetfighter.com/6/buckler/ranking/league?character_filter=2&character_id=luke&platform=1&user_status=1&home_filter=1&home_category_id=0&home_id=1&league_rank=0&page="28
29target_lp_per_rank = [30 25000, # Master31 23800, 22600, 21400, 20200, 19000, # Diamond32 17800, 16600, 15400, 14200, 13000, # Platinum33 12200, 11400, 10600, 9800, 9000, # Gold34 8200, 7400, 6600, 5800, 5000, # Silver35 4600, 4200, 3800, 3400, 3000, # Bronze36 2600, 2200, 1800, 1400, 1000, # Iron37 800, 600, 400, 200, 0 # Rookie38 ]39
40ranks = [41 "Master",42 "Diamond 5", "Diamond 4", "Diamond 3", "Diamond 2", "Diamond 1",43 "Platinum 5", "Platinum 4", "Platinum 3", "Platinum 2", "Platinum 1",44 "Gold 5", "Gold 4", "Gold 3", "Gold 2", "Gold 1",45 "Silver 5", "Silver 4", "Silver 3", "Silver 2", "Silver 1",46 "Bronze 5", "Bronze 4", "Bronze 3", "Bronze 2", "Bronze 1",47 "Iron 5", "Iron 4", "Iron 3", "Iron 2", "Iron 1",48 "Rookie 5", "Rookie 4", "Rookie 3", "Rookie 2", "Rookie 1",49]50
51estimated_start_pages = [52 6102, #master53 6462, 7248, 8300, 9637, 12306, #diamond54 13994, 15815, 18101, 20841, 26407, #platinum55 28248, 29682, 31164, 32676, 35215, #gold56 36138, 37770, 39772, 41749, 44936, #silver57 45672, 47114, 48594, 49774, 51799, #bronze58 52580, 54017, 55291, 56674, 59273, #iron59 59795, 60333, 60962, 61552, 61552 #rookie60]61
62placement_of_users_per_rank = []63players_in_each_rank = []64
65async def main():66 async with Actor:67 # get global variables from the user's input68 actor_input = await Actor.get_input() or {}69 GetInfoFromActorInput(actor_input)70
71 # set up the Playwright browser and begin the search72 async with async_playwright() as playwright:73 await DoSearch(playwright)74
75def GetInfoFromActorInput(actor_input):76 global email77 global password78 global start_pages79 global search_only_this_rank80
81 email = actor_input.get('email') 82 password = actor_input.get('password')83
84 start_pages = estimated_start_pages85 start_page_override = actor_input.get('start_page')86
87 search_only_this_rank = actor_input.get('rank_to_search')88 initial_page_jump = actor_input.get('initial_page_jump')89
90 if search_only_this_rank is not None and start_page_override is not None:91 start_pages[search_only_this_rank] = start_page_override92 93async def DoSearch(playwright):94 global placement_of_users_per_rank95 global players_in_each_rank96
97 # create the browser and log in98 await SetUp(playwright)99
100 # get the total number of players101 total_players_str = await page.locator("span[class='ranking_ranking_now__last__oqSXS']").last.text_content()102 total_players_str = total_players_str[1:]103 total_players_str = total_players_str.strip()104 total_players_int = int(total_players_str)105 106 # we might want to only search for one rank at a time, because the site will kick us out after we load a few pages107 if (search_only_this_rank is not None):108 start_index = search_only_this_rank109 end_index = search_only_this_rank + 1110 #otherwise, we'll attempt to search all of them111 else:112 start_index = 0113 end_index = len(ranks)114
115 print("Start rank index: " + str(start_index) + "\nEnd rank index: " + str(end_index - 1))116
117 # find the starting point of each rank in our list118 for i in range(start_index, end_index): 119 target_lp = target_lp_per_rank[i]120 start_page = start_pages[i]121 rank_name = ranks[i]122 print("Searching for " + rank_name + "...")123
124 #find where the rank begins125 placement_of_first_user = await FindPlacingOfFirstUserInRank(playwright, start_page, target_lp)126 print("\n" + rank_name + " begins at #" + str(placement_of_first_user) + "\nThere are " + str(total_players_int) + " players in total\n")127
128 #and add it to a list129 placement_of_users_per_rank.append(placement_of_first_user)130 131 # this part is only relevant if we somehow managed to get a list of all ranks in a single run of this Actor, which is unlikely132 if len(placement_of_users_per_rank) > 1:133 for i in range(len(placement_of_users_per_rank)):134 rank_name = ranks[i]135 current_rank_starts_at = placement_of_users_per_rank[i]136 prev_rank_starts_at = placement_of_users_per_rank[i - 1] if i > 0 else 0137 players_in_rank = current_rank_starts_at - prev_rank_starts_at138
139 percentage = GetPercentageString(players_in_rank, total_players_int)140 print(rank_name + " contains " + str(players_in_rank) + " players\nIt represents " + percentage + " of the playerbase")141 142 players_in_each_rank.append(players_in_rank)143 144 print("Placements of all users: " + str(placement_of_users_per_rank))145 print("Players in each rank: " + str(players_in_each_rank))146
147def GetPercentageString(players_in_rank, total_players):148 percentage_int = (players_in_rank / total_players) * 100149 return str(percentage_int) + "%"150
151async def SetUp(playwright):152 """This will launch the browser and log you into CFN. Call this first, and only call it once."""153 # create the Playwright browser154 global browser155 global context156 global page157 global iterations158 global page_jump159 160 browser = await playwright.firefox.launch(headless = Actor.config.headless)161 context = await browser.new_context()162 page = await context.new_page();163 login_page = "https://www.streetfighter.com/6/buckler/auth/loginep?redirect_url=/?status=login"164
165 # go to the login page166 await page.goto(login_page);167 await GetPageHtml()168
169 # fill out the age check dropdown170 await InputAgeCheck()171 await GetPageHtml()172
173 # uncomment this to check if cloudflare is blocking you for logging in too many times174 # print(readable_content)175
176 # log in177 await LogIn()178 await GetPageHtml()179
180 # uncomment this to check if cloudflare is blocking you for logging in too many times181 #print(readable_content)182
183 # it'll be easier if we just start on the ranking page184 start_url = base_start_url + str(1)185 await page.goto(start_url, timeout=60000);186
187async def FindPlacingOfFirstUserInRank(playwright, start_page, target_lp):188 global current_target_lp189 global page_jump190 191 current_target_lp = target_lp192
193 if (start_page is None):194 Actor.log.error('Start page is null!')195 return196 197 # go to the ranking page198 start_url = base_start_url + str(start_page)199 await page.goto(start_url, timeout=60000);200 201 print("Start URL: " + page.url)202 #await GetPageHtml()203 #print(readable_content)204
205 # figure out what page we're on 206 pagination = page.locator("div[class='ranking_pc__LlGv4']").locator("div[class='ranking_ranking_pager__top__etBHR']").locator("ul[class='pagination']").first207 await expect(pagination).to_be_visible(timeout=30000)208
209 page_jump = initial_page_jump or 1000210 iterations = 0211 highest_user_in_last_rank = None212
213 # loop the search function214 while True: 215 # each time this is called, we'll navigate to a new page216 highest_user_in_last_rank = await SearchForBeginningOfRank(pagination)217
218 # if we reached our goal, then break219 if highest_user_in_last_rank is not None:220 break221
222 # just in case something went wrong, we should prevent the loop from running infinitely223 iterations += 1224 if iterations > 30:225 break226 227 placement_str = await highest_user_in_last_rank.locator("dt").text_content()228 placement_str = placement_str.strip()229 placement_str = placement_str[1:]230 placement_int = int(placement_str)231
232 lp_str = await highest_user_in_last_rank.locator("dd").text_content()233 lp_str = lp_str[:-3]234 lp_int = int(lp_str)235 236 username = await highest_user_in_last_rank.locator("span[class='ranking_name__El29_']").text_content()237 238 # print the results239 print("\nHighest ranked user in previous rank: " + str(username))240 print("LP: " + lp_str + "\nPosition: " + placement_str + "\nPage: " + str(current_page_int) + "\nURL: " + page.url)241
242 # placement_int refers to the highest-placed user in the prev rank243 # so we'll add 1 to give us the first user in the current rank244 return placement_int + 1245
246async def SearchForBeginningOfRank(pagination):247 """Each time this method is called, we'll load a page and attempt to find the highest-ranked user in the previous rank. This would tell us where the current rank begins.248 If the loaded page doesn't contain the highest ranked user, nothing will be returned, and you'll have to call this again"""249 global was_above_target250 global page_jump251 global current_page_int252
253 # get info about current and next page254 current_page = pagination.locator("xpath=/li[@class='active']").first255 await expect(current_page).to_be_visible()256
257 current_page_text = await current_page.text_content()258 current_page_str = str(current_page_text)259 current_page_int = int(current_page_str)260 print("\nCurrent page: " + current_page_str)261
262 # first, we need to find the ranking page263 ranking_list = page.locator("xpath=//ul[@class='ranking_ranking_list__szajj']").first264 await expect(ranking_list).to_be_visible()265
266 # find the last user's lp267 all_users_on_page = await ranking_list.locator("xpath=/li").all()268 last_user = all_users_on_page[len(all_users_on_page) - 1]269 await expect(last_user).to_be_visible()270
271 last_user_lp_str = await last_user.locator("dd").text_content()272 last_user_lp_str = last_user_lp_str[:-3]273 last_user_lp_int = int(last_user_lp_str)274 print("LP of the last user on the page: " + str(last_user_lp_int) + "\nWe're looking for " + str(current_target_lp))275
276 # we're trying to roughly find the last page of the lower rank277 highest_lp_in_last_rank = current_target_lp - 1278
279 # if we're above the target, then count downwards280 if last_user_lp_int > highest_lp_in_last_rank:281 #every time we overshoot, we'll halve the size of the jump282 if was_above_target == True:283 page_jump = math.floor(page_jump / 2)284
285 # if we're getting close, we'll need a much lower page jump286 if last_user_lp_int == current_target_lp:287 if abs(page_jump) > 50:288 page_jump = 50289 290 page_jump = abs(page_jump)291 print("Page jump: " + str(page_jump))292 293 was_above_target = False294
295 # if we're below the target (most likely if we overshot), then count upwards296 elif last_user_lp_int < highest_lp_in_last_rank - 20:297 #every time we overshoot, we'll halve the size of the jump298 if was_above_target == False:299 page_jump = math.floor(page_jump / 2)300 301 page_jump = -abs(page_jump)302 print("Page jump: " + str(page_jump))303
304 was_above_target = True305
306 # once you've found the new rank, we have no choice but to iterate slowly and find where the old rank ends307 else:308 print("We're very close to our target! It's time to start incrementing one page at a time")309 310 # get all lp on page311 lp_list = await GetAllLpOnPage(ranking_list)312
313 # basically, we want to find the first person with the LP of the target rank314 if not current_target_lp in lp_list:315 print("We overshot a little, so we'll have to move backward one page at a time to find the first user with an LP of " + str(current_target_lp))316 page_jump = -1317
318 # if we found someone at current_target_lp, then I think that's it?319 else: 320 target_index = -1321 322 for i in range(len(lp_list)):323 if lp_list[i] < current_target_lp:324 target_index = i325 break326 327 # we've found our target, so let's return the user and let the main method take control328 return all_users_on_page[target_index]329 330 # figure out the name of the URL to move to331 332 target_page = current_page_int + page_jump 333 print("Target page: " + str(target_page))334 335 target_url = GetURLForPage(target_page)336
337 await page.goto(target_url, timeout=180000)338 await page.wait_for_url(target_url)339 await page.content()340
341def GetURLForPage(new_page_int):342 string_to_replace = "page=" + str(current_page_int)343 target_page = current_page_int + page_jump344
345 current_url = page.url346 return current_url.replace(string_to_replace, "page=" + str(target_page))347
348async def GetAllLpOnPage(ranking_list):349 children = await ranking_list.locator("xpath=/li").all()350 output = []351 # iterate over each <li>352 count = 0 353 for i in range(len(children)):354 userLi = children[i]355
356 # get the lp357 lpRaw = await userLi.locator("dd").text_content()358 lpStr = str(lpRaw)359 lpStr = lpStr[:-3]360 lpInt = int(lpStr)361
362 # add the user to a dictionary363 output.append(lpInt)364
365 print("All LP on page: " + str(output))366 return output367
368async def GetPageHtml():369 """We don't really need this, but it's useful for debugging"""370 html = await page.content()371 soup = BeautifulSoup(html, features = "html.parser")372
373 global readable_content374 readable_content = soup.prettify() 375 376async def InputAgeCheck():377 """Fills out the age check dropdown"""378 # locate the dropdown (instant), then await our selection of the dropdown item379 dropdown = page.locator("select[id='country']") 380 await dropdown.select_option("Canada")381
382 # if we've made it this far without hitting an error, we can go ahead and fill out the other options383 await page.locator("select[id='birthYear']").select_option('1992')384 await page.locator("select[id='birthMonth']").select_option('1')385 await page.locator("select[id='birthDay']").select_option('15')386
387 # press submit388 await page.locator("button[name='submit']").click()389
390 # wait for the new page to load391 await page.wait_for_timeout(3000)392
393 print("Passed age check!\n")394
395async def LogIn():396 # fill out email and pw397 email_field = page.locator("input[type='email']")398 await email_field.fill(email)399
400 pw_field = page.locator("input[type='password']")401 await pw_field.fill(password)402
403 # press submit404 await page.locator("button[name='submit']").click()405
406 # wait for the new page to load407 await page.wait_for_timeout(10000)408
409 print("Logged in!\n")
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed files.venv
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.gitignore
# This file tells Git which files shouldn't be added to source control
.idea.DS_Store
apify_storagestorage
.venv/.env/__pypackages__dist/build/*.egg-info/*.egg
__pycache__
.mypy_cache.dmypy.jsondmypy.json.pytest_cache.ruff_cache
.scrapy*.log
requirements.txt
1# Feel free to add your Python dependencies below. For formatting guidelines, see:2# https://pip.pypa.io/en/latest/reference/requirements-file-format/3
4apify ~= 1.6.05beautifulsoup4 ~= 4.12.26httpx ~= 0.25.27types-beautifulsoup4 ~= 4.12.0.7