
Apple Ads Library Scraper
Pricing
Pay per usage
Go to Store

Apple Ads Library Scraper
sameh.jarour/apple-ads-library-scraper
The Apple Ads Library Scraper is a key tool for Mobile Growth Managers and User Acquisition Managers to monitor key competitors' insights as they plan to launch their Apple Search ads on the Apple store.
0.0 (0)
Pricing
Pay per usage
0
Monthly users
2
Runs succeeded
>99%
Last modified
21 days ago
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-playwright-chrome:20
5
6# Check preinstalled packages
7RUN npm ls crawlee apify puppeteer playwright
8
9# Copy just package.json and package-lock.json
10# to speed up the build using Docker layer cache.
11COPY package*.json ./
12
13# Install NPM packages, skip optional and development dependencies to
14# keep the image small. Avoid logging too much and print the dependency
15# tree for debugging
16RUN npm --quiet set progress=false \
17 && npm install --omit=dev --omit=optional \
18 && echo "Installed NPM packages:" \
19 && (npm list --omit=dev --all || true) \
20 && echo "Node.js version:" \
21 && node --version \
22 && echo "NPM version:" \
23 && npm --version \
24 && rm -r ~/.npm
25
26# Next, copy the remaining files and directories with the source code.
27# Since we do this after NPM install, quick build will be really fast
28# for most source file changes.
29COPY . ./
30
31# Run the image. If you know you won't need headful browsers,
32# you can remove the XVFB start script for a micro perf gain.
33CMD ./start_xvfb_and_run_cmd.sh && npm start --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "apple-ads-library-scraper",
4 "title": "Project Playwright Crawler JavaScript",
5 "description": "Crawlee and Playwright project in JavaScript.",
6 "version": "0.0",
7 "meta": {
8 "templateId": "js-crawlee-playwright-chrome"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile"
12}
.actor/input_schema.json
1{
2 "title": "Apple Ads Library Scraper Input",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "startUrls": {
7 "title": "Start URLs",
8 "type": "array",
9 "description": "URLs to start with.",
10 "default": [
11 {
12 "url": "https://adrepository.apple.com/"
13 }
14 ],
15 "editor": "hidden"
16 },
17 "DEVELOPER_OR_APP": {
18 "type": "string",
19 "title": "Developer or App Name",
20 "description": "Filter ads by a specific developer or app. Leave empty to scrape all ads.",
21 "default": "",
22 "editor": "textfield"
23 },
24 "COUNTRY_OR_REGION": {
25 "type": "string",
26 "title": "Country or Region",
27 "description": "Filter ads by a specific country or region.",
28 "default": "Austria",
29 "editor": "select",
30 "enum": [
31 "Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic",
32 "Denmark", "Estonia", "Finland", "France", "Germany", "Greece",
33 "Hungary", "Ireland", "Italy", "Latvia", "Luxembourg", "Netherlands",
34 "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"
35 ]
36 },
37 "DATE_RANGE": {
38 "type": "string",
39 "title": "Date Range",
40 "description": "Filter ads by a specific date range.",
41 "default": "Last 90 days",
42 "editor": "select",
43 "enum": [
44 "Last 90 days",
45 "Last 180 days",
46 "Last Year"
47 ]
48 }
49 },
50 "required": ["COUNTRY_OR_REGION", "DATE_RANGE"]
51}
src/main.js
1import { Actor } from 'apify';
2import { PlaywrightCrawler } from 'crawlee';
3
4await Actor.init();
5
6// Get input from the schema
7const input = await Actor.getInput();
8const country = input.COUNTRY_OR_REGION || 'Austria'; // Use the input key from schema
9
10const crawler = new PlaywrightCrawler({
11 requestHandler: async ({ page, request, log }) => {
12 log.info(`Processing ${request.url} with label: ${request.label}`);
13
14 if (request.label === 'START') {
15 // Navigate to the main page
16 await page.goto('https://adrepository.apple.com/');
17 log.info(`Selecting Country or Region: ${country}`);
18 await page.getByLabel('Country or Region').click();
19 await page.getByLabel(country).check();
20 await page.getByRole('button', { name: 'Apply' }).click();
21
22 log.info('Waiting for results to load...');
23 await page.waitForTimeout(3000); // Allow results to begin loading
24 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
25
26 // Locate buttons and process them manually
27 const buttons = page.locator('.br-md-bottom .button');
28 const buttonCount = await buttons.count();
29 log.info(`Found ${buttonCount} buttons for ad details.`);
30
31 for (let i = 0; i < buttonCount; i++) {
32 log.info(`Processing button ${i + 1} of ${buttonCount}`);
33
34 // Click on the button to navigate to ad details
35 await buttons.nth(i).click();
36 await page.waitForTimeout(3000); // Wait for ad details to load
37
38 // Extract ad details
39 const adDetails = {
40 app: await page.locator('text=App').locator('xpath=following-sibling::*').nth(0).textContent(),
41 developer: await page.locator('text=Developer').locator('xpath=following-sibling::*').nth(0).textContent(),
42 legalName: await page.locator('text=Legal Name').locator('xpath=following-sibling::*').nth(0).textContent(),
43 placement: await page.locator('text=Placement').locator('xpath=following-sibling::*').nth(0).textContent(),
44 format: await page.locator('text=Format').locator('xpath=following-sibling::*').nth(0).textContent(),
45 country: await page.locator('text=Country or Region').locator('xpath=following-sibling::*').nth(0).textContent(),
46 parameters: await page.locator('text=Parameters').locator('xpath=following-sibling::*').nth(0).textContent(),
47 firstImpression: await page.locator('text=First Impression').locator('xpath=following-sibling::*').nth(0).textContent(),
48 latestImpression: await page.locator('text=Latest Impression').locator('xpath=following-sibling::*').nth(0).textContent(),
49 };
50 log.info(`Extracted Ad Details: ${JSON.stringify(adDetails)}`);
51 await Actor.pushData(adDetails);
52
53 // Return to main page
54 log.info('Returning to main page...');
55 await page.goBack();
56 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
57 }
58
59 // Check for next page
60 const nextButton = page.getByLabel('next');
61 if (await nextButton.isVisible() && await nextButton.isEnabled()) {
62 log.info('Navigating to the next page...');
63 await nextButton.click();
64 await page.waitForTimeout(5000); // Wait for the next page to load
65 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
66
67 // Repeat the process for the next page
68 const buttonsNextPage = page.locator('.br-md-bottom .button');
69 const buttonCountNextPage = await buttonsNextPage.count();
70 log.info(`Found ${buttonCountNextPage} buttons on the next page.`);
71 } else {
72 log.info('No more pages available.');
73 }
74 }
75 },
76});
77
78// Add the start URL with a label
79await crawler.addRequests([{ url: 'https://adrepository.apple.com/', label: 'START' }]);
80
81await crawler.run();
82await Actor.exit();
src/routes.js
1import { Router } from 'crawlee';
2
3// Define routes for different scraping actions
4export const routes = Router();
5
6routes.addDefaultHandler(async ({ page, request, log }) => {
7 log.info(`Processing URL: ${request.url}`);
8
9 // Implement additional logic if required
10 await page.goto(request.url);
11 // Example: Collect page title
12 const title = await page.title();
13 log.info(`Page title: ${title}`);
14});
tests/example.spec.js
1import { test, expect } from '@playwright/test';
2
3test('navigate and apply filters', async ({ page }) => {
4 await page.goto('https://adrepository.apple.com/');
5 await page.getByLabel('Country or Region').click();
6 await page.getByLabel('Austria').click();
7 await page.getByRole('button', { name: 'Apply' }).click();
8
9 await page.waitForSelector('div.results-container', { state: 'visible' });
10 const resultsCount = await page.locator('div.results-container > div').count();
11 console.log('Results count:', resultsCount);
12});
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "extends": "@apify",
3 "root": true
4}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv
input.json
1{
2 "COUNTRY_OR_REGION": "Hun",
3 "DATE_RANGE": "Last 90 days",
4 "DEVELOPER_OR_APP": "Example Developer"
5}
package.json
1{
2 "name": "apple-ads-library-scraper",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is an example of an Apify actor.",
6 "dependencies": {
7 "apify": "^3.2.6",
8 "crawlee": "^3.11.5",
9 "playwright": "*"
10 },
11 "devDependencies": {
12 "@apify/eslint-config": "^0.4.0",
13 "@playwright/test": "^1.49.1",
14 "eslint": "^8.50.0"
15 },
16 "scripts": {
17 "start": "node src/main.js",
18 "lint": "eslint ./src --ext .js,.jsx",
19 "lint:fix": "eslint ./src --ext .js,.jsx --fix",
20 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1",
21 "postinstall": "npx crawlee install-playwright-browsers"
22 },
23 "author": "It's not you it's me",
24 "license": "ISC"
25}
Pricing
Pricing model
Pay per usageThis Actor is paid per platform usage. The Actor is free to use, and you only pay for the Apify platform usage.