Apple Ads Library Scraper avatar

Apple Ads Library Scraper

Under maintenance
Try for free

No credit card required

Go to Store
This Actor is under maintenance.

This Actor may be unreliable while under maintenance. Would you like to try a similar Actor instead?

See alternative Actors
Apple Ads Library Scraper

Apple Ads Library Scraper

sameh.jarour/apple-ads-library-scraper
Try for free

No credit card required

The Apple Ads Library Scraper is a key tool for Mobile Growth Managers and User Acquisition Managers to monitor key competitors' insights as they plan to launch their Apple Search ads on the Apple store.

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-playwright-chrome:20
5
6# Check preinstalled packages
7RUN npm ls crawlee apify puppeteer playwright
8
9# Copy just package.json and package-lock.json
10# to speed up the build using Docker layer cache.
11COPY --chown=myuser package*.json ./
12
13# Install NPM packages, skip optional and development dependencies to
14# keep the image small. Avoid logging too much and print the dependency
15# tree for debugging
16RUN npm --quiet set progress=false \
17    && npm install --omit=dev --omit=optional \
18    && echo "Installed NPM packages:" \
19    && (npm list --omit=dev --all || true) \
20    && echo "Node.js version:" \
21    && node --version \
22    && echo "NPM version:" \
23    && npm --version \
24    && rm -r ~/.npm
25
26# Next, copy the remaining files and directories with the source code.
27# Since we do this after NPM install, quick build will be really fast
28# for most source file changes.
29COPY --chown=myuser . ./
30
31# Run the image. If you know you won't need headful browsers,
32# you can remove the XVFB start script for a micro perf gain.
33CMD ./start_xvfb_and_run_cmd.sh && npm start --silent

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "apple-ads-library-scraper",
4	"title": "Project Playwright Crawler JavaScript",
5	"description": "Crawlee and Playwright project in JavaScript.",
6	"version": "0.0",
7	"meta": {
8		"templateId": "js-crawlee-playwright-chrome"
9	},
10	"input": "./input_schema.json",
11	"dockerfile": "./Dockerfile"
12}

.actor/input_schema.json

1{
2    "title": "Apple Ads Library Scraper Input",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "startUrls": {
7            "title": "Start URLs",
8            "type": "array",
9            "description": "URLs to start with.",
10            "default": [
11                {
12                    "url": "https://adrepository.apple.com/"
13                }
14            ],
15            "editor": "hidden"
16        },
17        "DEVELOPER_OR_APP": {
18            "type": "string",
19            "title": "Developer or App Name",
20            "description": "Filter ads by a specific developer or app. Leave empty to scrape all ads.",
21            "default": "",
22            "editor": "textfield"
23        },
24        "COUNTRY_OR_REGION": {
25            "type": "string",
26            "title": "Country or Region",
27            "description": "Filter ads by a specific country or region.",
28            "default": "Austria",
29            "editor": "select",
30            "enum": [
31                "Austria", "Belgium", "Bulgaria", "Croatia", "Cyprus", "Czech Republic",
32                "Denmark", "Estonia", "Finland", "France", "Germany", "Greece",
33                "Hungary", "Ireland", "Italy", "Latvia", "Luxembourg", "Netherlands",
34                "Poland", "Portugal", "Romania", "Slovakia", "Slovenia", "Spain", "Sweden"
35            ]
36        },
37        "DATE_RANGE": {
38            "type": "string",
39            "title": "Date Range",
40            "description": "Filter ads by a specific date range.",
41            "default": "Last 90 days",
42            "editor": "select",
43            "enum": [
44                "Last 90 days",
45                "Last 180 days",
46                "Last Year"
47            ]
48        }
49    },
50    "required": ["COUNTRY_OR_REGION", "DATE_RANGE"]
51}

src/main.js

1import { Actor } from 'apify';
2import { PlaywrightCrawler } from 'crawlee';
3
4await Actor.init();
5
6// Get input from the schema
7const input = await Actor.getInput();
8const country = input.COUNTRY_OR_REGION || 'Austria'; // Use the input key from schema
9
10const crawler = new PlaywrightCrawler({
11    requestHandler: async ({ page, request, log }) => {
12        log.info(`Processing ${request.url} with label: ${request.label}`);
13
14        if (request.label === 'START') {
15            // Navigate to the main page
16            await page.goto('https://adrepository.apple.com/');
17            log.info(`Selecting Country or Region: ${country}`);
18            await page.getByLabel('Country or Region').click();
19            await page.getByLabel(country).check();
20            await page.getByRole('button', { name: 'Apply' }).click();
21
22            log.info('Waiting for results to load...');
23            await page.waitForTimeout(3000); // Allow results to begin loading
24            await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
25
26            // Locate buttons and process them manually
27            const buttons = page.locator('.br-md-bottom .button');
28            const buttonCount = await buttons.count();
29            log.info(`Found ${buttonCount} buttons for ad details.`);
30
31            for (let i = 0; i < buttonCount; i++) {
32                log.info(`Processing button ${i + 1} of ${buttonCount}`);
33
34                // Click on the button to navigate to ad details
35                await buttons.nth(i).click();
36                await page.waitForTimeout(3000); // Wait for ad details to load
37
38                // Extract ad details
39                const adDetails = {
40                    app: await page.locator('text=App').locator('xpath=following-sibling::*').nth(0).textContent(),
41                    developer: await page.locator('text=Developer').locator('xpath=following-sibling::*').nth(0).textContent(),
42                    legalName: await page.locator('text=Legal Name').locator('xpath=following-sibling::*').nth(0).textContent(),
43                    placement: await page.locator('text=Placement').locator('xpath=following-sibling::*').nth(0).textContent(),
44                    format: await page.locator('text=Format').locator('xpath=following-sibling::*').nth(0).textContent(),
45                    country: await page.locator('text=Country or Region').locator('xpath=following-sibling::*').nth(0).textContent(),
46                    parameters: await page.locator('text=Parameters').locator('xpath=following-sibling::*').nth(0).textContent(),
47                    firstImpression: await page.locator('text=First Impression').locator('xpath=following-sibling::*').nth(0).textContent(),
48                    latestImpression: await page.locator('text=Latest Impression').locator('xpath=following-sibling::*').nth(0).textContent(),
49                };
50                log.info(`Extracted Ad Details: ${JSON.stringify(adDetails)}`);
51                await Actor.pushData(adDetails);
52
53                // Return to main page
54                log.info('Returning to main page...');
55                await page.goBack();
56                await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
57            }
58
59            // Check for next page
60            const nextButton = page.getByLabel('next');
61            if (await nextButton.isVisible() && await nextButton.isEnabled()) {
62                log.info('Navigating to the next page...');
63                await nextButton.click();
64                await page.waitForTimeout(5000); // Wait for the next page to load
65                await page.waitForSelector('.br-md-bottom', { state: 'visible', timeout: 60000 });
66
67                // Repeat the process for the next page
68                const buttonsNextPage = page.locator('.br-md-bottom .button');
69                const buttonCountNextPage = await buttonsNextPage.count();
70                log.info(`Found ${buttonCountNextPage} buttons on the next page.`);
71            } else {
72                log.info('No more pages available.');
73            }
74        }
75    },
76});
77
78// Add the start URL with a label
79await crawler.addRequests([{ url: 'https://adrepository.apple.com/', label: 'START' }]);
80
81await crawler.run();
82await Actor.exit();

src/routes.js

1import { Router } from 'crawlee';
2
3// Define routes for different scraping actions
4export const routes = Router();
5
6routes.addDefaultHandler(async ({ page, request, log }) => {
7    log.info(`Processing URL: ${request.url}`);
8
9    // Implement additional logic if required
10    await page.goto(request.url);
11    // Example: Collect page title
12    const title = await page.title();
13    log.info(`Page title: ${title}`);
14});

tests/example.spec.js

1import { test, expect } from '@playwright/test';
2
3test('navigate and apply filters', async ({ page }) => {
4  await page.goto('https://adrepository.apple.com/');
5  await page.getByLabel('Country or Region').click();
6  await page.getByLabel('Austria').click();
7  await page.getByRole('button', { name: 'Apply' }).click();
8
9  await page.waitForSelector('div.results-container', { state: 'visible' });
10  const resultsCount = await page.locator('div.results-container > div').count();
11  console.log('Results count:', resultsCount);
12});

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "extends": "@apify",
3    "root": true
4}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv

input.json

1{
2  "COUNTRY_OR_REGION": "Hun",
3  "DATE_RANGE": "Last 90 days",
4  "DEVELOPER_OR_APP": "Example Developer"
5}

package.json

1{
2	"name": "apple-ads-library-scraper",
3	"version": "0.0.1",
4	"type": "module",
5	"description": "This is an example of an Apify actor.",
6	"dependencies": {
7		"apify": "^3.2.6",
8		"crawlee": "^3.11.5",
9		"playwright": "*"
10	},
11	"devDependencies": {
12		"@apify/eslint-config": "^0.4.0",
13		"@playwright/test": "^1.49.1",
14		"eslint": "^8.50.0"
15	},
16	"scripts": {
17		"start": "node src/main.js",
18		"lint": "eslint ./src --ext .js,.jsx",
19		"lint:fix": "eslint ./src --ext .js,.jsx --fix",
20		"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1",
21		"postinstall": "npx crawlee install-playwright-browsers"
22	},
23	"author": "It's not you it's me",
24	"license": "ISC"
25}
Developer
Maintained by Community

Actor Metrics

  • 1 monthly user

  • 0 No bookmarks yet

  • >99% runs succeeded

  • Created in Jan 2025

  • Modified a month ago