
Apple Ads Library Scraper
Pricing
Pay per usage
Go to Store

Apple Ads Library Scraper
The Apple Ads Library Scraper is a key tool for Mobile Growth Managers and User Acquisition Managers to monitor key competitors' insights as they plan to launch their Apple Search ads on the Apple store.
0.0 (0)
Pricing
Pay per usage
0
Total users
3
Monthly users
1
Runs succeeded
>99%
Last modified
3 months ago
.actor/Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://crawlee.dev/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node-playwright-chrome:20
# Check preinstalled packagesRUN npm ls crawlee apify puppeteer playwright
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image. If you know you won't need headful browsers,# you can remove the XVFB start script for a micro perf gain.CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
.actor/actor.json
{ "actorSpecification": 1, "name": "apple-ads-library-scraper", "title": "Project Playwright Crawler JavaScript", "description": "Crawlee and Playwright project in JavaScript.", "version": "0.0", "meta": { "templateId": "js-crawlee-playwright-chrome" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
.actor/input_schema.json
{ "title": "Apple Ads Library Scraper Input", "type": "object", "schemaVersion": 1, "properties": { "startUrls": { "title": "Start URLs", "type": "array", "description": "URLs to start with.", "default": [ { "url": "https://adrepository.apple.com/" } ], "editor": "hidden" }, "DEVELOPER_OR_APP": { "type": "string", "title": "Developer or App Name", "description": "Filter ads by a specific developer or app. Leave empty to scrape all ads.", "default": "", "editor": "textfield" }, "COUNTRY_OR_REGION": { "type": "string", "title": "Country or Region", "description": "Filter ads by a specific country or region.", "default": "Austria", "editor": "select", "enum": [ "Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", "Hungary", "Ireland", "Italy", "Latvia", "Luxembourg", "Netherlands", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden" ] }, "DATE_RANGE": { "type": "string", "title": "Date Range", "description": "Filter ads by a specific date range.", "default": "Last 90 days", "editor": "select", "enum": [ "Last 90 days", "Last 180 days", "Last Year" ] } }, "required": ["COUNTRY_OR_REGION", "DATE_RANGE"]}
src/main.js
1import { Actor } from 'apify';2import { PlaywrightCrawler } from 'crawlee';3
4await Actor.init();5
6// Get input from the schema7const input = await Actor.getInput();8const country = input.COUNTRY_OR_REGION || 'Austria'; // Use the input key from schema9
10const crawler = new PlaywrightCrawler({11 requestHandler: async ({ page, request, log }) => {12 log.info(`Processing ${request.url} with label: ${request.label}`);13
14 if (request.label === 'START') {15 // Navigate to the main page16 await page.goto('https://adrepository.apple.com/');17 log.info(`Selecting Country or Region: ${country}`);18 await page.getByLabel('Country or Region').click();19 await page.getByLabel(country).check();20 await page.getByRole('button', { name: 'Apply' }).click();21
22 log.info('Waiting for results to load...');23 await page.waitForTimeout(3000); // Allow results to begin loading24 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });25
26 // Locate buttons and process them manually27 const buttons = page.locator('.br-md-bottom .button');28 const buttonCount = await buttons.count();29 log.info(`Found ${buttonCount} buttons for ad details.`);30
31 for (let i = 0; i < buttonCount; i++) {32 log.info(`Processing button ${i + 1} of ${buttonCount}`);33
34 // Click on the button to navigate to ad details35 await buttons.nth(i).click();36 await page.waitForTimeout(3000); // Wait for ad details to load37
38 // Extract ad details39 const adDetails = {40 app: await page.locator('text=App').locator('xpath=following-sibling::*').nth(0).textContent(),41 developer: await page.locator('text=Developer').locator('xpath=following-sibling::*').nth(0).textContent(),42 legalName: await page.locator('text=Legal Name').locator('xpath=following-sibling::*').nth(0).textContent(),43 placement: await page.locator('text=Placement').locator('xpath=following-sibling::*').nth(0).textContent(),44 format: await page.locator('text=Format').locator('xpath=following-sibling::*').nth(0).textContent(),45 country: await page.locator('text=Country or Region').locator('xpath=following-sibling::*').nth(0).textContent(),46 parameters: await page.locator('text=Parameters').locator('xpath=following-sibling::*').nth(0).textContent(),47 firstImpression: await page.locator('text=First Impression').locator('xpath=following-sibling::*').nth(0).textContent(),48 latestImpression: await page.locator('text=Latest Impression').locator('xpath=following-sibling::*').nth(0).textContent(),49 };50 log.info(`Extracted Ad Details: ${JSON.stringify(adDetails)}`);51 await Actor.pushData(adDetails);52
53 // Return to main page54 log.info('Returning to main page...');55 await page.goBack();56 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });57 }58
59 // Check for next page60 const nextButton = page.getByLabel('next');61 if (await nextButton.isVisible() && await nextButton.isEnabled()) {62 log.info('Navigating to the next page...');63 await nextButton.click();64 await page.waitForTimeout(5000); // Wait for the next page to load65 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });66
67 // Repeat the process for the next page68 const buttonsNextPage = page.locator('.br-md-bottom .button');69 const buttonCountNextPage = await buttonsNextPage.count();70 log.info(`Found ${buttonCountNextPage} buttons on the next page.`);71 } else {72 log.info('No more pages available.');73 }74 }75 },76});77
78// Add the start URL with a label79await crawler.addRequests([{ url: 'https://adrepository.apple.com/', label: 'START' }]);80
81await crawler.run();82await Actor.exit();
src/routes.js
1import { Router } from 'crawlee';2
3// Define routes for different scraping actions4export const routes = Router();5
6routes.addDefaultHandler(async ({ page, request, log }) => {7 log.info(`Processing URL: ${request.url}`);8
9 // Implement additional logic if required10 await page.goto(request.url);11 // Example: Collect page title12 const title = await page.title();13 log.info(`Page title: ${title}`);14});
tests/example.spec.js
1import { test, expect } from '@playwright/test';2
3test('navigate and apply filters', async ({ page }) => {4 await page.goto('https://adrepository.apple.com/');5 await page.getByLabel('Country or Region').click();6 await page.getByLabel('Austria').click();7 await page.getByRole('button', { name: 'Apply' }).click();8
9 await page.waitForSelector('div.results-container', { state: 'visible' });10 const resultsCount = await page.locator('div.results-container > div').count();11 console.log('Results count:', resultsCount);12});
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify", "root": true}
.gitignore
# This file tells Git which files shouldn't be added to source control
.DS_Store.ideadistnode_modulesapify_storagestorage
# Added by Apify CLI.venv
input.json
{ "COUNTRY_OR_REGION": "Hun", "DATE_RANGE": "Last 90 days", "DEVELOPER_OR_APP": "Example Developer"}
package.json
{ "name": "apple-ads-library-scraper", "version": "0.0.1", "type": "module", "description": "This is an example of an Apify actor.", "dependencies": { "apify": "^3.2.6", "crawlee": "^3.11.5", "playwright": "*" }, "devDependencies": { "@apify/eslint-config": "^0.4.0", "@playwright/test": "^1.49.1", "eslint": "^8.50.0" }, "scripts": { "start": "node src/main.js", "lint": "eslint ./src --ext .js,.jsx", "lint:fix": "eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1", "postinstall": "npx crawlee install-playwright-browsers" }, "author": "It's not you it's me", "license": "ISC"}