Apple Ads Library Scraper avatar
Apple Ads Library Scraper

Pricing

Pay per usage

Go to Store
Apple Ads Library Scraper

Apple Ads Library Scraper

Developed by

Sameh George Jarour

Sameh George Jarour

Maintained by Community

The Apple Ads Library Scraper is a key tool for Mobile Growth Managers and User Acquisition Managers to monitor key competitors' insights as they plan to launch their Apple Search ads on the Apple store.

0.0 (0)

Pricing

Pay per usage

0

Total users

3

Monthly users

1

Runs succeeded

>99%

Last modified

3 months ago

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://crawlee.dev/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node-playwright-chrome:20
# Check preinstalled packages
RUN npm ls crawlee apify puppeteer playwright
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY --chown=myuser package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY --chown=myuser . ./
# Run the image. If you know you won't need headful browsers,
# you can remove the XVFB start script for a micro perf gain.
CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/actor.json

{
"actorSpecification": 1,
"name": "apple-ads-library-scraper",
"title": "Project Playwright Crawler JavaScript",
"description": "Crawlee and Playwright project in JavaScript.",
"version": "0.0",
"meta": {
"templateId": "js-crawlee-playwright-chrome"
},
"input": "./input_schema.json",
"dockerfile": "./Dockerfile"
}

.actor/input_schema.json

{
"title": "Apple Ads Library Scraper Input",
"type": "object",
"schemaVersion": 1,
"properties": {
"startUrls": {
"title": "Start URLs",
"type": "array",
"description": "URLs to start with.",
"default": [
{
"url": "https://adrepository.apple.com/"
}
],
"editor": "hidden"
},
"DEVELOPER_OR_APP": {
"type": "string",
"title": "Developer or App Name",
"description": "Filter ads by a specific developer or app. Leave empty to scrape all ads.",
"default": "",
"editor": "textfield"
},
"COUNTRY_OR_REGION": {
"type": "string",
"title": "Country or Region",
"description": "Filter ads by a specific country or region.",
"default": "Austria",
"editor": "select",
"enum": [
"Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic",
"Denmark", "Estonia", "Finland", "France", "Germany", "Greece",
"Hungary", "Ireland", "Italy", "Latvia", "Luxembourg", "Netherlands",
"Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"
]
},
"DATE_RANGE": {
"type": "string",
"title": "Date Range",
"description": "Filter ads by a specific date range.",
"default": "Last 90 days",
"editor": "select",
"enum": [
"Last 90 days",
"Last 180 days",
"Last Year"
]
}
},
"required": ["COUNTRY_OR_REGION", "DATE_RANGE"]
}

src/main.js

1import { Actor } from 'apify';
2import { PlaywrightCrawler } from 'crawlee';
3
4await Actor.init();
5
6// Get input from the schema
7const input = await Actor.getInput();
8const country = input.COUNTRY_OR_REGION || 'Austria'; // Use the input key from schema
9
10const crawler = new PlaywrightCrawler({
11 requestHandler: async ({ page, request, log }) => {
12 log.info(`Processing ${request.url} with label: ${request.label}`);
13
14 if (request.label === 'START') {
15 // Navigate to the main page
16 await page.goto('https://adrepository.apple.com/');
17 log.info(`Selecting Country or Region: ${country}`);
18 await page.getByLabel('Country or Region').click();
19 await page.getByLabel(country).check();
20 await page.getByRole('button', { name: 'Apply' }).click();
21
22 log.info('Waiting for results to load...');
23 await page.waitForTimeout(3000); // Allow results to begin loading
24 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
25
26 // Locate buttons and process them manually
27 const buttons = page.locator('.br-md-bottom .button');
28 const buttonCount = await buttons.count();
29 log.info(`Found ${buttonCount} buttons for ad details.`);
30
31 for (let i = 0; i < buttonCount; i++) {
32 log.info(`Processing button ${i + 1} of ${buttonCount}`);
33
34 // Click on the button to navigate to ad details
35 await buttons.nth(i).click();
36 await page.waitForTimeout(3000); // Wait for ad details to load
37
38 // Extract ad details
39 const adDetails = {
40 app: await page.locator('text=App').locator('xpath=following-sibling::*').nth(0).textContent(),
41 developer: await page.locator('text=Developer').locator('xpath=following-sibling::*').nth(0).textContent(),
42 legalName: await page.locator('text=Legal Name').locator('xpath=following-sibling::*').nth(0).textContent(),
43 placement: await page.locator('text=Placement').locator('xpath=following-sibling::*').nth(0).textContent(),
44 format: await page.locator('text=Format').locator('xpath=following-sibling::*').nth(0).textContent(),
45 country: await page.locator('text=Country or Region').locator('xpath=following-sibling::*').nth(0).textContent(),
46 parameters: await page.locator('text=Parameters').locator('xpath=following-sibling::*').nth(0).textContent(),
47 firstImpression: await page.locator('text=First Impression').locator('xpath=following-sibling::*').nth(0).textContent(),
48 latestImpression: await page.locator('text=Latest Impression').locator('xpath=following-sibling::*').nth(0).textContent(),
49 };
50 log.info(`Extracted Ad Details: ${JSON.stringify(adDetails)}`);
51 await Actor.pushData(adDetails);
52
53 // Return to main page
54 log.info('Returning to main page...');
55 await page.goBack();
56 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
57 }
58
59 // Check for next page
60 const nextButton = page.getByLabel('next');
61 if (await nextButton.isVisible() && await nextButton.isEnabled()) {
62 log.info('Navigating to the next page...');
63 await nextButton.click();
64 await page.waitForTimeout(5000); // Wait for the next page to load
65 await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
66
67 // Repeat the process for the next page
68 const buttonsNextPage = page.locator('.br-md-bottom .button');
69 const buttonCountNextPage = await buttonsNextPage.count();
70 log.info(`Found ${buttonCountNextPage} buttons on the next page.`);
71 } else {
72 log.info('No more pages available.');
73 }
74 }
75 },
76});
77
78// Add the start URL with a label
79await crawler.addRequests([{ url: 'https://adrepository.apple.com/', label: 'START' }]);
80
81await crawler.run();
82await Actor.exit();

src/routes.js

1import { Router } from 'crawlee';
2
3// Define routes for different scraping actions
4export const routes = Router();
5
6routes.addDefaultHandler(async ({ page, request, log }) => {
7 log.info(`Processing URL: ${request.url}`);
8
9 // Implement additional logic if required
10 await page.goto(request.url);
11 // Example: Collect page title
12 const title = await page.title();
13 log.info(`Page title: ${title}`);
14});

tests/example.spec.js

1import { test, expect } from '@playwright/test';
2
3test('navigate and apply filters', async ({ page }) => {
4 await page.goto('https://adrepository.apple.com/');
5 await page.getByLabel('Country or Region').click();
6 await page.getByLabel('Austria').click();
7 await page.getByRole('button', { name: 'Apply' }).click();
8
9 await page.waitForSelector('div.results-container', { state: 'visible' });
10 const resultsCount = await page.locator('div.results-container > div').count();
11 console.log('Results count:', resultsCount);
12});

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"extends": "@apify",
"root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.DS_Store
.idea
dist
node_modules
apify_storage
storage
# Added by Apify CLI
.venv

input.json

{
"COUNTRY_OR_REGION": "Hun",
"DATE_RANGE": "Last 90 days",
"DEVELOPER_OR_APP": "Example Developer"
}

package.json

{
"name": "apple-ads-library-scraper",
"version": "0.0.1",
"type": "module",
"description": "This is an example of an Apify actor.",
"dependencies": {
"apify": "^3.2.6",
"crawlee": "^3.11.5",
"playwright": "*"
},
"devDependencies": {
"@apify/eslint-config": "^0.4.0",
"@playwright/test": "^1.49.1",
"eslint": "^8.50.0"
},
"scripts": {
"start": "node src/main.js",
"lint": "eslint ./src --ext .js,.jsx",
"lint:fix": "eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1",
"postinstall": "npx crawlee install-playwright-browsers"
},
"author": "It's not you it's me",
"license": "ISC"
}