Street Fighter 6 CFN Scraper
Pricing
Pay per usage
Go to Apify Store
Street Fighter 6 CFN Scraper
SF6 scraper for the Capcom Fighters Network (Buckler's Boot Camp)
0.0 (0)
Pricing
Pay per usage
2
38
3
Last modified
a year ago
Pricing
Pay per usage
SF6 scraper for the Capcom Fighters Network (Buckler's Boot Camp)
0.0 (0)
Pricing
Pay per usage
2
38
3
Last modified
a year ago
# First, specify the base Docker image.# You can see the Docker images from Apify at https://hub.docker.com/r/apify/.# You can also use any other image from Docker Hub.FROM apify/actor-python-playwright
# Second, copy just requirements.txt into the Actor image,# since it should be the only file that affects the dependency install in the next step,# in order to speed up the buildCOPY requirements.txt ./
# Install the packages specified in requirements.txt,# Print the installed Python version, pip version# and all installed packages with their versions for debuggingRUN echo "Python version:" \ && python --version \ && echo "Pip version:" \ && pip --version \ && echo "Installing dependencies:" \ && pip install -r requirements.txt \ && echo "All installed Python packages:" \ && pip freeze
# Next, copy the remaining files and directories with the source code.# Since we do this after installing the dependencies, quick build will be really fast# for most source file changes.COPY . ./
# Use compileall to ensure the runnability of the Actor Python code.RUN python3 -m compileall -q .
# Specify how to launch the source code of your Actor.# By default, the "python3 -m src" command is runCMD ["python3", "-m", "src"]
{ "actorSpecification": 1, "name": "street-fighter-6-cfn-scraper", "title": "Street Fighter 6 CFN Scraper", "description": "SF6 scraper for the Capcom Fighters Network (Buckler's Boot Camp)", "version": "1.0", "buildTag": "latest", "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
{ "title": "Street Fighter 6 CFN Scraper", "type": "object", "schemaVersion": 1, "properties": { "email": { "title": "CFN Email", "type": "string", "description": "Your CFN email address", "editor": "textfield" }, "password": { "title": "CFN Password", "type": "string", "description": "Your CFN password", "editor": "textfield" }, "rank_to_search": { "title": "Only Search for Specific Rank (Index)", "type": "integer", "description": "Finds the starting point of a specific rank (0 is Master, 35 is Rookie 1). If blank, the scraper will start from Master and attempt to find all of the ranks. However, the site will usually block you after a few tries, so it might be better to search for a specific rank instead.", "prefill": 1, "editor": "number" }, "initial_page_jump": { "title": "Initial Page Jump", "type": "integer", "description": "How many pages should we skip per attempt? Leave blank unless your page to start searching is very close to the target page.", "editor": "number" }, "start_page": { "title": "Start Searching at This Page", "type": "integer", "description": "Start searching for target rank on this page (will speed up execution if accurate). If blank, we'll use our predetermined page numbers (accurate as of March 2024).", "editor": "number" }, "start_page_array": { "title": "Start Searching at These Pages", "type": "array", "description": "Start searching for EACH target rank on these pages (will speed up execution if accurate). There should be one entry for each rank that we need to find (35 in total, because we don't need to search for Rookie 1). Input should be an array of numbers, like [4, 50, 70].", "prefill": [ 6102, 6462, 7248, 8418, 9637, 12306, 13994, 15815, 18101, 20841, 26615, 28248, 29682, 31164, 32676, 35215, 36138, 37770, 39772, 41749, 44936, 45672, 47114, 48594, 49774, 51799, 52580, 54017, 55291, 56674, 59273, 59795, 60333, 60962, 61552 ], "editor": "json" }, "skip_LP": { "title": "Skip searching for regular ranks", "type": "boolean", "description": "Should we skip searching for the regular ranks? Should probably be false unless you're trying to test something.", "editor": "checkbox" }, "skip_MR": { "title": "Skip searching for Master Rate", "type": "boolean", "description": "Should we skip searching for MR? Should probably be false unless you're trying to test something.", "editor": "checkbox" } }, "required": ["email", "password"]}
1"""2This module serves as the entry point for executing the Apify Actor. It handles the configuration of logging3settings. The `main()` coroutine is then executed using `asyncio.run()`.4
5Feel free to modify this file to suit your specific needs.6"""7
8import asyncio9import logging10
11from apify.log import ActorLogFormatter12
13from .main import main14
15# Configure loggers16handler = logging.StreamHandler()17handler.setFormatter(ActorLogFormatter())18
19apify_client_logger = logging.getLogger('apify_client')20apify_client_logger.setLevel(logging.INFO)21apify_client_logger.addHandler(handler)22
23apify_logger = logging.getLogger('apify')24apify_logger.setLevel(logging.DEBUG)25apify_logger.addHandler(handler)26
27# Execute the Actor main coroutine28asyncio.run(main())
1from apify import Actor2from playwright.async_api import async_playwright, expect3from bs4 import BeautifulSoup4import math5import numpy6
7#region variables8# objects, sorta9browser = None10context = None11page = None12readable_content = None13
14# variables to help us with the search15was_above_target = None16iterations = 017page_jump = 100018default_page_jump = 100019page_jump_when_close = 5020current_page_int = 121current_target_lp = 022close_to_target_threshold = 2023searching_for_MR = False24ranking_list = None25all_users_on_page = None26last_user_lp_int = 027overshot_1500_once = False28began_fine_search = False29final_page_MR = 030
31# from Apify Input32email = ""33password = ""34start_pages = []35search_only_this_rank = 036initial_page_jump = 037skip_LP = False38skip_MR = False39
40# const41base_start_url = "https://www.streetfighter.com/6/buckler/ranking/league?character_filter=2&character_id=luke&platform=1&user_status=1&home_filter=1&home_category_id=0&home_id=1&league_rank=0&page="42login_page = "https://www.streetfighter.com/6/buckler/auth/loginep?redirect_url=/?status=login"43master_url = "https://www.streetfighter.com/6/buckler/ranking/master?character_filter=2&character_id=luke&platform=1&home_filter=1&home_category_id=0&home_id=1&page=1&season_type=1"44
45target_lp_per_rank = [46 25000, # Master47 23800, 22600, 21400, 20200, 19000, # Diamond48 17800, 16600, 15400, 14200, 13000, # Platinum49 12200, 11400, 10600, 9800, 9000, # Gold50 8200, 7400, 6600, 5800, 5000, # Silver51 4600, 4200, 3800, 3400, 3000, # Bronze52 2600, 2200, 1800, 1400, 1000, # Iron53 800, 600, 400, 200 # Rookie54 ]55
56ranks = [57 "Master",58 "Diamond 5", "Diamond 4", "Diamond 3", "Diamond 2", "Diamond 1",59 "Platinum 5", "Platinum 4", "Platinum 3", "Platinum 2", "Platinum 1",60 "Gold 5", "Gold 4", "Gold 3", "Gold 2", "Gold 1",61 "Silver 5", "Silver 4", "Silver 3", "Silver 2", "Silver 1",62 "Bronze 5", "Bronze 4", "Bronze 3", "Bronze 2", "Bronze 1",63 "Iron 5", "Iron 4", "Iron 3", "Iron 2", "Iron 1",64 "Rookie 5", "Rookie 4", "Rookie 3", "Rookie 2"65]66
67estimated_start_pages = [68 6924, #master69 7345, 8187, 9310, 10756, 13511, #diamond70 15264, 17165, 19529, 22393, 28175, #platinum71 30096, 31578, 33109, 34669, 37285, #gold72 38229, 39900, 41955, 41749, 44936, #silver73 47992, 49314, 50827, 52197, 54275, #bronze74 55075, 56391, 57853, 59267, 59273, #iron75 62472, 63021, 63673, 64285 #rookie76]77
78target_MR_per_bucket = [79 1000, 1100, 1200, 1300, 1400,80 1500, 1501, 1600, 1700, 1800,81 1900, 200082]83
84estimated_start_pages_MR = [85 4700, 4700, 4700, 4236, 3678,86 2520, 1932, 845, 405, 186,87 80, 3488]89
90MR_bucket_names = [91 "Unrated (No MR)", "Master 1", # we don't have to search for these two92 "Master 2", "Master 3", "Master 4", "Master 5", "Master 6",93 "Unrated (1500 MR)", "Master 7", "Master 8", "Master 9",94 "Master 10", "Master 11", "Master 12"95]96
97# output98total_players_int = 099page_of_each_rank_start = []100placement_of_users_per_rank = []101players_in_each_rank = []102
103placement_of_users_per_MR_bucket = []104page_of_each_rank_start_MR = []105#endregion106
107async def main():108 async with Actor:109 # get global variables from the user's input110 actor_input = await Actor.get_input() or {}111 GetInfoFromActorInput(actor_input)112 113 if not skip_LP:114 # set up the Playwright browser and begin the search115 async with async_playwright() as playwright:116 # if we're searching for every single rank, we could be awaiting here for several hours117 # otherwise, if you're searching for a single rank, it'll probably take <5 mins118 await DoSearch(playwright, Actor)119 120 # the indexes will be wrong in this case, so let's just use our old method121 if search_only_this_rank is not None:122 await SendOutputToApify(Actor)123 124 if not skip_MR and search_only_this_rank is None:125 # now we have to do the same for MR...126 async with async_playwright() as playwright:127 await DoSearchForMR(playwright)128 129 # send the output to apify again130 await SendMRResultsToApify(Actor)131
132#region search133async def DoSearch(playwright, Actor):134 """Performs the entire operation and stores the results in global variables"""135 global placement_of_users_per_rank136 global players_in_each_rank137 global total_players_int 138 global searching_for_MR139 140 searching_for_MR = False141 142 # we might want to only search for one rank at a time, because the site will kick us out after we load a few pages143 if (search_only_this_rank is not None):144 start_index = search_only_this_rank145 end_index = search_only_this_rank + 1146 print("Searching for " + ranks[start_index])147 148 #otherwise, we'll attempt to search all of them149 else:150 start_index = 0151 end_index = len(ranks)152 print("Searching for all ranks") 153
154 # find the starting point of each rank in our list155 for i in range(start_index, end_index):156 # create the browser and log in157 await CreateBrowser(playwright)158 159 # get the total number of players (this only needs to be done once)160 if total_players_int == 0:161 total_players_int = await GetTotalPlayers()162
163 target_lp = target_lp_per_rank[i]164 start_page = start_pages[i]165 rank_name = ranks[i]166 print("Searching for " + rank_name + "...")167
168 #find where the rank begins169 placement_of_first_user = await FindPlacingOfFirstUserInRank(start_page, target_lp)170 print("\n" + rank_name + " begins at #" + str(placement_of_first_user) + "\nThere are " + str(total_players_int) + " players in total\n")171 172 percentile = (total_players_int - placement_of_first_user) / total_players_int173 percentile = percentile * 100174 percentile = round(percentile, 2)175 print(rank_name + " is the " + str(percentile) + "th percentile\n")176 177 #and add it to a list178 placement_of_users_per_rank.append(placement_of_first_user)179 180 # if we're looping through all the ranks, we're better off sending the output after each rank, just in case the scraper crashes partway through the run181 if search_only_this_rank is None:182 await SendSingleOutputToApify(Actor, i)183 184async def CreateBrowser(playwright):185 """This will launch the browser and log you into CFN. Call this first."""186 # create the Playwright browser187 global browser188 global context189 global page190 global iterations191 global page_jump192 193 browser = await playwright.firefox.launch(headless = Actor.config.headless)194 context = await browser.new_context()195 page = await context.new_page()196
197 # go to the login page198 await page.goto(login_page)199 await page.content()200 await page.wait_for_timeout(1000)201 # await GetPageHtml()202
203 # fill out the age check dropdown204 await InputAgeCheck()205 await page.content()206 await page.wait_for_timeout(1000)207 208 # uncomment this to check if cloudflare is blocking you for logging in too many times209 # await GetPageHtml()210 # print(readable_content)211
212 # log in213 await LogIn()214 await page.content()215 await page.wait_for_timeout(1000)216
217 # uncomment this to check if cloudflare is blocking you for logging in too many times218 # await GetPageHtml()219 #print(readable_content)220
221 # it'll be easier if we just start on the ranking page222 start_url = base_start_url + str(1)223 await page.goto(start_url, timeout=60000)224
225async def FindPlacingOfFirstUserInRank(start_page, target_lp):226 """This is our main method. If all goes according to plan, you can await this, and it'll return the placing of the first user in the target rank."""227 global current_target_lp228 global page_jump229 global page_of_each_rank_start230 global began_fine_search231 232 current_target_lp = target_lp233
234 if (start_page is None):235 Actor.log.error('Start page is null!')236 return237 238 # go to the ranking page239 start_url = base_start_url + str(start_page)240 await page.goto(start_url, timeout=60000);241 242 print("Start URL: " + page.url)243 #await GetPageHtml()244 #print(readable_content)245
246 # figure out what page we're on 247 pagination = page.locator("div[class='ranking_pc__LlGv4']").locator("div[class='ranking_ranking_pager__top__etBHR']").locator("ul[class='pagination']").first248 await expect(pagination).to_be_visible(timeout=30000)249
250 page_jump = initial_page_jump or 1000251 iterations = 0252 highest_user_in_last_rank = None253 began_fine_search = False254 255 # loop the search function256 while True: 257 # each time this is called, we'll navigate to a new page258 highest_user_in_last_rank = await SearchForBeginningOfRank(pagination)259
260 # if we reached our goal, then break261 if highest_user_in_last_rank is not None:262 break263
264 # just in case something went wrong, we should prevent the loop from running infinitely265 iterations += 1266 if iterations > 30:267 break268 269 placement_str = await highest_user_in_last_rank.locator("dt").text_content()270 placement_str = placement_str.strip()271 placement_str = placement_str[1:]272 placement_int = int(placement_str)273
274 lp_str = await highest_user_in_last_rank.locator("dd").text_content()275 lp_str = lp_str[:-3]276 277 username = await highest_user_in_last_rank.locator("span[class='ranking_name__El29_']").text_content()278 279 # print the results280 print("\nHighest ranked user in previous rank: " + str(username))281 print("LP: " + lp_str + "\nPosition: " + placement_str + "\nPage: " + str(current_page_int) + "\nURL: " + page.url)282
283 page_of_each_rank_start.append(current_page_int)284
285 # placement_int refers to the highest-placed user in the prev rank286 # so we'll add 1 to give us the first user in the current rank287 return placement_int + 1288
289async def RefreshInfoAboutCurrentPage(pagination):290 global current_page_int291
292 # get info about current and next page293 current_page = pagination.locator("xpath=/li[@class='active']").first294 await expect(current_page).to_be_visible()295 296 current_page_text = await current_page.text_content()297 current_page_str = str(current_page_text)298 current_page_int = int(current_page_str)299 print("\nCurrent page: " + current_page_str)300
301async def RefreshInfoAboutUsersOnPage():302 global ranking_list303 global all_users_on_page304 global last_user_lp_int305 306 # first, we need to find the ranking page307 ranking_list = page.locator("xpath=//ul[@class='ranking_ranking_list__szajj']").first308 await expect(ranking_list).to_be_visible()309
310 # find the last user's lp311 all_users_on_page = await ranking_list.locator("xpath=/li").all()312 last_user = all_users_on_page[len(all_users_on_page) - 1]313 await expect(last_user).to_be_visible()314
315 last_user_lp_str = await last_user.locator("dd").text_content()316 last_user_lp_str = last_user_lp_str[:-3]317 last_user_lp_int = int(last_user_lp_str)318 print("LP of the last user on the page: " + str(last_user_lp_int) + "\nWe're looking for " + str(current_target_lp))319 320async def SearchForBeginningOfRank(pagination):321 """Each time this method is called, we'll load a page and attempt to find the highest-ranked user in the previous rank. This would tell us where the current rank begins.322 If the loaded page doesn't contain the highest ranked user, nothing will be returned, and you'll have to call this again"""323 global was_above_target324 global page_jump325 global began_fine_search326 327 await RefreshInfoAboutCurrentPage(pagination)328 await RefreshInfoAboutUsersOnPage()329 330 # we're trying to roughly find the last page of the lower rank331 highest_lp_in_last_rank = current_target_lp - 1332
333 # if we're above the target, then count downwards334 if last_user_lp_int > highest_lp_in_last_rank and not began_fine_search:335 #every time we overshoot, we'll halve the size of the jump336 if was_above_target == True:337 page_jump = math.floor(page_jump / 2)338
339 # if we're getting close, we'll need a much lower page jump340 if last_user_lp_int == current_target_lp:341 if abs(page_jump) > page_jump_when_close:342 page_jump = page_jump_when_close343 344 page_jump = abs(page_jump)345 print("Page jump: " + str(page_jump))346 347 was_above_target = False348
349 # if we're below the target (most likely if we overshot), then count upwards350 elif last_user_lp_int < highest_lp_in_last_rank - close_to_target_threshold and not began_fine_search:351 #every time we overshoot, we'll halve the size of the jump352 if was_above_target == False:353 page_jump = math.floor(page_jump / 2)354 355 page_jump = -abs(page_jump)356 print("Page jump: " + str(page_jump))357
358 was_above_target = True359
360 # once you've found the new rank, we have no choice but to iterate slowly and find where the old rank ends361 else:362 print("We're very close to our target! It's time to start incrementing one page at a time")363 364 began_fine_search = True365 366 # get all lp on page367 lp_list = await GetAllLpOnPage(ranking_list)368
369 # basically, we want to find the first person with the LP of the target rank370 if not current_target_lp in lp_list:371 print("We overshot a little, so we'll have to move backward one page at a time to find the first user with an LP of " + str(current_target_lp))372 page_jump = -1373
374 # if we found someone at current_target_lp, then I think that's it?375 else: 376 target_index = -1377 378 for i in range(len(lp_list)):379 if lp_list[i] < current_target_lp:380 target_index = i381 break382 383 # we've found our target, so let's return the user and let the main method take control384 return all_users_on_page[target_index]385 386 # this is just a hack for situations where there aren't enough users of a given rank387 # for example, you won't be able to find a whole page of people at 1000 MR. And, when we keep dividing the page jump by 2, it'll eventually round to 0388 # at that point, we should treat it like a fine search389 if page_jump == 0:390 began_fine_search = True391 392 # figure out the name of the URL to move to393 next_page = current_page_int + page_jump 394 max_page = final_page_MR if searching_for_MR else 99999395 next_page = numpy.clip(next_page, 1, max_page)396 print("Next page: " + str(next_page))397 398 target_url = GetURLForPage(next_page)399
400 # load next page401 await page.goto(target_url, timeout=180000)402 await page.wait_for_url(target_url)403 await page.content() 404 405async def DoSearchForMR(playwright):406 global placement_of_users_per_MR_bucket407 global default_page_jump408 global close_to_target_threshold409 global searching_for_MR410 global page_jump_when_close411 412 # the Master list is way shorter than the overall list, so we need a smaller page jump413 # note that you'll hit an error if you try to jump to a page that doesn't exist414 default_page_jump = 250415 416 # let's try to find the last page so we can avoid hitting an error by trying to jump past it417 await DetermineLastPage(playwright)418 print("The final page is " + str(final_page_MR))419 420 # also, MR is more evenly distributed, whereas LP tends to get clustered around the start of a rank because of rank down protection421 # so we'll need to tweak how the search works422 close_to_target_threshold = 1423 page_jump_when_close = 5424 searching_for_MR = True425 426 # find the starting point of each rank in our list427 for i in range(0, len(target_MR_per_bucket)):428 # create the browser and log in429 await CreateBrowser(playwright)430 431 target_MR = target_MR_per_bucket[i]432 start_page = estimated_start_pages_MR[i]433 rank_name = MR_bucket_names[i + 2]434 435 print("Searching for " + rank_name + "...")436
437 #find where the rank begins438 placement_of_first_user = await FindPlacingOfFirstUserInMRBucket(start_page, target_MR)439 print("\n" + rank_name + " begins at #" + str(placement_of_first_user))440
441 #and add it to a list442 placement_of_users_per_MR_bucket.append(placement_of_first_user)443 444async def FindPlacingOfFirstUserInMRBucket(start_page, target_MR):445 """This is our main method. If all goes according to plan, you can await this, and it'll return the placing of the first user in the target rank."""446 global page_jump447 global page_of_each_rank_start_MR448 global current_target_lp449 global began_fine_search450 451 current_target_lp = target_MR452 453 # go to the ranking page454 start_url = master_url.replace("page=1", "page=" + str(start_page))455 await page.goto(start_url, timeout=60000)456 457 print("Start URL: " + page.url)458
459 # figure out what page we're on 460 pagination = page.locator("div[class='ranking_pc__LlGv4']").locator("div[class='ranking_ranking_pager__top__etBHR']").locator("ul[class='pagination']").first461 await expect(pagination).to_be_visible(timeout=30000)462
463 page_jump = initial_page_jump or default_page_jump464 iterations = 0465 highest_user_in_last_rank = None466 began_fine_search = False467
468 # loop the search function469 while True: 470 # each time this is called, we'll navigate to a new page471 # there are so many 1500s that we can't afford to start at the bottom and iterate upwards one-by-one472 if current_target_lp == 1501:473 highest_user_in_last_rank = await SearchForBeginningOfRankFromAbove(pagination)474 else:475 highest_user_in_last_rank = await SearchForBeginningOfRank(pagination)476
477 # if we reached our goal, then break478 if highest_user_in_last_rank is not None:479 break480
481 # just in case something went wrong, we should prevent the loop from running infinitely482 iterations += 1483 if iterations > 30:484 break485 486 placement_str = await highest_user_in_last_rank.locator("dt").text_content()487 placement_str = placement_str.strip()488 placement_str = placement_str[1:]489 placement_int = int(placement_str)490
491 lp_str = await highest_user_in_last_rank.locator("dd").text_content()492 lp_str = lp_str[:-3]493 494 username = await highest_user_in_last_rank.locator("span[class='ranking_name__El29_']").text_content()495 496 # print the results497 print("\nHighest ranked user in previous rank: " + str(username))498 print("LP: " + lp_str + "\nPosition: " + placement_str + "\nPage: " + str(current_page_int) + "\nURL: " + page.url)499
500 page_of_each_rank_start_MR.append(current_page_int)501
502 # placement_int refers to the highest-placed user in the prev rank503 # so we'll add 1 to give us the first user in the current rank504 return placement_int + 1505
506async def SearchForBeginningOfRankFromAbove(pagination):507 """There are some finnicky differences between this and SearchForBeginningOfRank that we might as well make a separate method"""508 global was_above_target509 global page_jump510 global overshot_1500_once511 global began_fine_search512 513 await RefreshInfoAboutCurrentPage(pagination)514 await RefreshInfoAboutUsersOnPage()515 516 # we're trying to roughly find the last page of the lower rank517 lowest_mr_in_next_rank = current_target_lp518 highest_mr_in_prev_rank = current_target_lp - 1519 520 print("Annoying exception: we're starting from above and trying to find any page with " + str(lowest_mr_in_next_rank) + "\nThen we'll iterate downward to find where it began")521 522 # if we're above the target, then count downwards523 if last_user_lp_int > lowest_mr_in_next_rank and not began_fine_search:524 #every time we overshoot, we'll halve the size of the jump525 if was_above_target == True:526 page_jump = math.floor(page_jump / 2)527
528 # if we're getting close, we'll need a much lower page jump529 if last_user_lp_int == current_target_lp:530 if abs(page_jump) > page_jump_when_close:531 page_jump = page_jump_when_close532 533 page_jump = abs(page_jump)534 print("Page jump: " + str(page_jump))535 536 was_above_target = False537
538 # if we're below the target, then count upwards539 elif last_user_lp_int < lowest_mr_in_next_rank and not began_fine_search:540 #every time we overshoot, we'll halve the size of the jump541 if was_above_target == False:542 page_jump = math.floor(page_jump / 2)543 544 page_jump = -abs(page_jump)545 print("Page jump: " + str(page_jump))546
547 was_above_target = True548
549 # once you've found the new rank, we have no choice but to iterate slowly and find where the old rank ends550 else:551 print("We're very close to our target! It's time to start incrementing one page at a time")552 553 began_fine_search = True554 555 # get all lp on page556 lp_list = await GetAllLpOnPage(ranking_list)557
558 # basically, we want to find the first person with the LP of the target rank559 if not overshot_1500_once:560 if not highest_mr_in_prev_rank in lp_list:561 print("Trying to find the last user with an LP of " + str(highest_mr_in_prev_rank))562 page_jump = 5563
564 # now move backward.....? this is convoluted565 else:566 overshot_1500_once = True567 page_jump = -1568 569 else:570 page_jump = -1571 572 # finally done I think573 if lowest_mr_in_next_rank in lp_list:574 target_index = -1575 576 for i in range(len(lp_list)):577 if lp_list[i] < current_target_lp:578 target_index = i579 break580 581 # we've found our target, so let's return the user and let the main method take control582 return all_users_on_page[target_index]583 584 # this is just a hack for situations where there aren't enough users of a given rank585 # for example, you won't be able to find a whole page of people at 1000 MR. And, when we keep dividing the page jump by 2, it'll eventually round to 0586 # at that point, we should treat it like a fine search587 if page_jump == 0:588 began_fine_search = True589 590 # figure out the name of the URL to move to591 next_page = current_page_int + page_jump592 next_page = numpy.clip(next_page, 1, final_page_MR)593 print("Next page: " + str(next_page))594 595 target_url = GetURLForPage(next_page)596
597 # load next page598 await page.goto(target_url, timeout=180000)599 await page.wait_for_url(target_url)600 await page.content()601#endregion602
603#region actor input/output604def GetInfoFromActorInput(actor_input):605 global email606 global password607 global start_pages608 global search_only_this_rank609 global initial_page_jump610 global skip_LP611 global skip_MR612 613 email = actor_input.get('email') 614 password = actor_input.get('password')615
616 start_pages = estimated_start_pages617 start_page_override = actor_input.get('start_page')618 619 initial_page_jump = actor_input.get('initial_page_jump')620
621 search_only_this_rank = actor_input.get('rank_to_search')622
623 if search_only_this_rank is not None and start_page_override is not None:624 start_pages[search_only_this_rank] = start_page_override625 626 start_page_array = actor_input.get('start_page_array')627
628 if search_only_this_rank is None and start_page_array is not None:629 if len(start_page_array) == len(start_pages):630 for i in range(0, len(start_pages)):631 start_pages[i] = start_page_array[i]632 else:633 Apify.log.error("Error! Start page override length: " + str(len(start_page_array)) + "\nShould be equal to " + str(len(start_pages)))634
635 skip_LP = actor_input.get('skip_LP')636 skip_MR = actor_input.get('skip_MR')637 638async def SendMRResultsToApify(Actor):639 output = []640 641 for i in range(len(placement_of_users_per_MR_bucket)):642 rank_name = "Master " + (str(i + 1))643 current_rank_starts_at = placement_of_users_per_MR_bucket[i]644 645 output.append(646 {647 "Rank Name": rank_name,648 "Starts at Placing": current_rank_starts_at,649 "Page of Rank Start": page_of_each_rank_start_MR[i]650 }651 )652 653 Actor.log.info("Output: " + str(output))654 655 dataset = await Actor.open_dataset()656 await dataset.push_data(output)657
658async def SendSingleOutputToApify(Actor, index):659 output =[(660 {661 "Rank Name": ranks[index],662 "Starts at Placing": placement_of_users_per_rank[index],663 "Page of Rank Start": page_of_each_rank_start[index]664 }665 )]666 667 Actor.log.info("Output: " + str(output))668 669 dataset = await Actor.open_dataset()670 await dataset.push_data(output)671
672async def SendOutputToApify(Actor):673 output = []674
675 # if we were only searching for a single rank, the output should only contain one entry676 if search_only_this_rank is not None:677 output.append(678 {679 "Rank Name": ranks[search_only_this_rank],680 "Starts at Placing": placement_of_users_per_rank[0],681 "Page of Rank Start": page_of_each_rank_start[0]682 }683 )684 685 Actor.log.info("Output: " + str(output))686 687 dataset = await Actor.open_dataset()688 await dataset.push_data(output)689 690 # otherwise, if we managed to get a list of all ranks in a single run of this Actor...691 else:692 for i in range(len(placement_of_users_per_rank)):693 rank_name = ranks[i]694 current_rank_starts_at = placement_of_users_per_rank[i]695 696 output.append(697 {698 "Rank Name": rank_name,699 "Starts at Placing": current_rank_starts_at,700 "Page of Rank Start": page_of_each_rank_start[i]701 }702 )703 704 # as we've come all this way, it wouldn't hurt to calculate the distribution here, even though we'll need the spreadsheet for this anyway705 prev_rank_starts_at = placement_of_users_per_rank[i - 1] if i > 0 else 0706 players_in_rank = current_rank_starts_at - prev_rank_starts_at707
708 percentage = GetPercentageString(players_in_rank, total_players_int)709 print(rank_name + " contains " + str(players_in_rank) + " players\nIt represents " + percentage + " of the playerbase")710 711 players_in_each_rank.append(players_in_rank)712 713 Actor.log.info("Output: " + str(output))714 715 dataset = await Actor.open_dataset()716 await dataset.push_data(output)717 718 Actor.log.info("Players in each rank:\n" + str(players_in_each_rank).replace(",", "\n"))719#endregion720
721#region login722async def InputAgeCheck():723 """Fills out the age check dropdown"""724 # locate the dropdown (instant), then await our selection of the dropdown item725 dropdown = page.locator("select[id='country']") 726 await dropdown.select_option("Canada")727
728 # if we've made it this far without hitting an error, we can go ahead and fill out the other options729 await page.locator("select[id='birthYear']").select_option('1992')730 await page.locator("select[id='birthMonth']").select_option('1')731 await page.locator("select[id='birthDay']").select_option('15')732
733 # press submit734 await page.locator("button[name='submit']").click()735
736 # wait for the new page to load737 await page.wait_for_timeout(3000)738
739 print("Passed age check!\n")740
741async def LogIn():742 # fill out email and pw743 email_field = page.locator("input[type='email']")744 await email_field.fill(email)745
746 pw_field = page.locator("input[type='password']")747 await pw_field.fill(password)748
749 # press submit750 await page.locator("button[name='submit']").click()751
752 # wait for the new page to load753 await page.wait_for_timeout(10000)754
755 print("Logged in!\n")756#endregion757
758#region utility I guess759async def GetAllLpOnPage(ranking_list):760 children = await ranking_list.locator("xpath=/li").all()761 output = []762 763 # iterate over each <li> 764 for i in range(len(children)):765 userLi = children[i]766
767 # get the lp768 lpRaw = await userLi.locator("dd").text_content()769 lpStr = str(lpRaw)770 lpStr = lpStr[:-3]771 lpInt = int(lpStr)772
773 # add the user to a dictionary774 output.append(lpInt)775
776 print("All LP on page: " + str(output))777 return output778
779async def GetPageHtml():780 """We don't really need this, but it's useful for debugging"""781 html = await page.content()782 soup = BeautifulSoup(html, features = "html.parser")783
784 global readable_content785 readable_content = soup.prettify()786 787async def GetTotalPlayers(): 788 total_players_str = await page.locator("span[class='ranking_ranking_now__last__oqSXS']").last.text_content()789 total_players_str = total_players_str[1:]790 total_players_str = total_players_str.strip()791 return int(total_players_str)792 793def GetPercentageString(players_in_rank, total_players):794 percentage_int = (players_in_rank / total_players) * 100795 return str(percentage_int) + "%"796
797def GetURLForPage(desired_page):798 string_to_replace = "page=" + str(current_page_int)799 string_to_add = "page=" + str(desired_page)800 801 current_url = page.url802 output = current_url.replace(string_to_replace, string_to_add)803 return output804
805async def DetermineLastPage(playwright):806 """Navigates to the last page and records it in final_page_MR"""807 global final_page_MR808 809 # we'll need a browser810 await CreateBrowser(playwright)811 812 # go to the ranking page813 await page.goto(master_url, timeout=60000)814 815 pagination = page.locator("div[class='ranking_pc__LlGv4']").locator("div[class='ranking_ranking_pager__top__etBHR']").locator("ul[class='pagination']").first816 817 # go to the last page818 await pagination.get_by_text("Last").click()819 await page.wait_for_timeout(10000)820 821 # we already had a method to retrieve info about the current page, so let's use that822 await RefreshInfoAboutCurrentPage(pagination)823 824 # save the result in a global variable825 final_page_MR = current_page_int826 827 if final_page_MR == 1:828 Actor.log.error('Failed to determine final page')829 await Actor.exit()830#endregion
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed files.venv
# git folder.git
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
# This file tells Git which files shouldn't be added to source control
.idea.DS_Store
apify_storagestorage
.venv/.env/__pypackages__dist/build/*.egg-info/*.egg
__pycache__
.mypy_cache.dmypy.jsondmypy.json.pytest_cache.ruff_cache
.scrapy*.log
1# Feel free to add your Python dependencies below. For formatting guidelines, see:2# https://pip.pypa.io/en/latest/reference/requirements-file-format/3
4apify ~= 1.6.05beautifulsoup4 ~= 4.12.26httpx ~= 0.25.27types-beautifulsoup4 ~= 4.12.0.78numpy ~= 1.26.3
1const { ApifyClient } = require("apify-client");2
3const client = new ApifyClient({4 token: GetApifyToken()5});6
7// Configure start options8const startOptions = {9 memory: 819210};11
12const input = {13 initial_page_jump: 50014}15
16// we'll append to these as the run progresses17startsAt = [];18pageNumbers = [];19
20getAll();21
22// getMaster();23// getDiamond();24// getPlatinum();25// getGold();26// getSilver();27// getBronze();28// getIron();29// getRookie();30
31/**We're putting this in a local .txt file so that we don't have to push it to git lol32 * Usage: Create a file called apify_key.txt two folders above this one, and paste your Apify API key.33*/34function GetApifyToken() {35 const fs = require("fs");36 contents = fs.readFileSync("../apify_key.txt").toString()37 // console.log(contents);38
39 return contents;40}41
42/** Retrieves the output from the Apify actor and appends it to some lists that we can use later. */43async function GetOutputFromRun(run) {44 // this variable MUST be called items for whatever reason45 const { items } = await client.dataset(run.defaultDatasetId).listItems();46
47 if (items === undefined) {48 console.error("help");49 }50
51 //now that we've retrieved the Apify output, we can append it to our own lists52 //this allows us to chain multiple tasks together and combine their outputs53 items.forEach((entry) => {54 placing = entry["Starts at Placing"]55 56 pageNum = entry["Page of Rank Start"];57 rankName = entry["Rank Name"];58 59 startsAt.push({ [rankName]: placing });60 pageNumbers.push({ [rankName]: pageNum });61 });62}63
64async function getAll() {65 await getRookie();66 await getIron();67 await getBronze();68 await getSilver();69 await getGold();70 await getPlatinum();71 await getDiamond();72 await getMaster();73
74 console.log("done all");75}76
77async function getMaster() {78 await DoTask("3ternal/master");79
80 PrintOnComplete();81}82
83async function getDiamond() {84 await DoTask("3ternal/diamond-1");85 await DoTask("3ternal/diamond-2");86 await DoTask("3ternal/diamond-3");87 await DoTask("3ternal/diamond-4");88 await DoTask("3ternal/diamond-5");89
90 PrintOnComplete();91}92
93async function getPlatinum() {94 await DoTask("3ternal/platinum-1");95 await DoTask("3ternal/platinum-2");96 await DoTask("3ternal/platinum-3");97 await DoTask("3ternal/platinum-4");98 await DoTask("3ternal/platinum-5");99
100 PrintOnComplete();101}102
103async function getGold() {104 await DoTask("3ternal/gold-1");105 await DoTask("3ternal/gold-2");106 await DoTask("3ternal/gold-3");107 await DoTask("3ternal/gold-4");108 await DoTask("3ternal/gold-5");109
110 PrintOnComplete();111}112
113async function getSilver() {114 await DoTask("3ternal/silver-1");115 await DoTask("3ternal/silver-2");116 await DoTask("3ternal/silver-3");117 await DoTask("3ternal/silver-4");118 await DoTask("3ternal/silver-5");119
120 PrintOnComplete();121}122
123async function getBronze() {124 await DoTask("3ternal/bronze-1");125 await DoTask("3ternal/bronze-2");126 await DoTask("3ternal/bronze-3");127 await DoTask("3ternal/bronze-4");128 await DoTask("3ternal/bronze-5");129
130 PrintOnComplete();131}132
133async function getIron() {134 await DoTask("3ternal/iron-1");135 await DoTask("3ternal/iron-2");136 await DoTask("3ternal/iron-3");137 await DoTask("3ternal/iron-4");138 await DoTask("3ternal/iron-5");139 140 PrintOnComplete();141}142
143async function getRookie() {144 await DoTask("3ternal/rookie-2");145 await DoTask("3ternal/rookie-3");146 await DoTask("3ternal/rookie-4");147 await DoTask("3ternal/rookie-5");148
149 PrintOnComplete();150}151
152async function DoTask(taskName) {153 console.log("Starting task: " + taskName);154 155 await client.task(taskName).updateInput(input);156 run = await client.task(taskName).call(undefined, startOptions);157
158 await GetOutputFromRun(run);159}160
161/** This should be the final action of the whole process.162 * By printing the placings with line breaks, we should easily be able to copy-paste the whole output into the spreadsheet for easy updating.163*/164function PrintOnComplete() {165 fixedString2 = JSON.stringify(startsAt).replaceAll(",", "\n");166 console.log("Starts at:\n" + fixedString2);167
168 fixedString1 = JSON.stringify(pageNumbers).replaceAll(",", "\n");169 console.log("Page numbers:\n" + fixedString1);170
171 //it'll be easier to update the start page numbers if we convert the array into something that's copy-pastable172 pageNumbers = pageNumbers.reverse();173
174 PrintValuesCopyPastable(startsAt);175 PrintValuesCopyPastable(pageNumbers);176}177
178function PrintValuesCopyPastable(json) {179 var copyPastableString = ""180 var values = Object.values(json);181 var keys = Object.values(values);182 for (let i = 0; i < json.length; i++) {183 var obj = json[i];184
185 var key = Object.keys(obj)[0];186 var value = Object.values(obj)[0];187
188 copyPastableString += value;189 190 if (i < json.length - 1){191 copyPastableString += ", ";192 }193 }194
195 console.log(copyPastableString);196}