Example Selenium avatar
Example Selenium
Try for free

No credit card required

View all Actors
Example Selenium

Example Selenium

apify/example-selenium
Try for free

No credit card required

Example of loading a web page in headless Chrome using Selenium Webdriver.

Dockerfile

1FROM apify/actor-node-chrome:beta
2
3COPY package.json ./
4
5# Install NPM packages, skip optional and development dependencies to
6# keep the image small. Avoid logging too much and print the dependency
7# tree for debugging
8RUN npm --quiet set progress=false \
9 && npm install --only=prod --no-optional \
10 && echo "Installed NPM packages:" \
11 && npm list || true \
12 && echo "Node.js version:" \
13 && node --version \
14 && echo "NPM version:" \
15 && npm --version \
16 && echo "Google Chrome version:" \
17 && bash -c "$APIFY_CHROME_EXECUTABLE_PATH --version" \
18 && echo "ChromeDriver version:" \
19 && chromedriver --version
20
21# Next, copy the remaining files and directories with the source code.
22# Since we do this after NPM install, quick build will be really fast
23# for most source file changes.
24COPY . ./
25
26# Optionally, specify how to launch the source code of your actor.
27# By default, Apify's base Docker images define the CMD instruction
28# that runs the source code using the command specified
29# in the "scripts.start" section of the package.json file.
30# In short, the instruction looks something like this:
31# CMD npm start

INPUT_SCHEMA.json

1{
2    "title": "Input schema for Selenium example",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "url": {
7            "title": "Url",
8            "type": "string",
9            "description": "Url to open with the selenium webdriver",
10            "editor": "textfield",
11            "prefill": "https://www.example.com"
12        },
13        "userAgent": {
14            "title": "User agent",
15            "type": "string",
16            "description": "If you want to specify user agent to use, you can do it here",
17            "editor": "textfield",
18            "prefill": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
19        },
20        "proxy": {
21            "title": "Proxy configuration",
22            "type": "object",
23            "description": "Select proxies to be used by your crawler.",
24            "prefill": { "useApifyProxy": true },
25            "editor": "proxy"
26        }
27    },
28    "required": ["url"]
29}

main.js

1const Apify = require('apify');
2const { Capabilities, Builder, logging } = require('selenium-webdriver');
3const chrome = require('selenium-webdriver/chrome');
4const proxy = require('selenium-webdriver/proxy');
5const { anonymizeProxy } = require('proxy-chain');
6
7const launchChromeWebdriver = async (options) => {
8    let anonymizedProxyUrl = null;
9
10    // logging.installConsoleHandler();
11    // logging.getLogger('webdriver.http').setLevel(logging.Level.ALL);
12
13    // See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities for reference.
14    const capabilities = new Capabilities();
15    capabilities.set('browserName', 'chrome');
16
17    // Chrome-specific options
18    // By default, Selenium already defines a long list of command-line options
19    // to enable browser automation, here we add a few other ones
20    // (inspired by Lighthouse, see lighthouse/lighthouse-cli/chrome-launcher)
21    const chromeOptions = new chrome.Options();
22    chromeOptions.addArguments('--disable-translate');
23    chromeOptions.addArguments('--safebrowsing-disable-auto-update');
24
25    if (options.headless) {
26        chromeOptions.addArguments('--headless', '--no-sandbox');
27    }
28
29    if (options.userAgent) {
30        chromeOptions.addArguments(`--user-agent=${options.userAgent}`);
31    } 
32
33    if (options.extraChromeArguments) {
34        chromeOptions.addArguments(options.extraChromeArguments);
35    }
36
37    const builder = new Builder();
38
39    // For proxy servers with authentication, this class starts a local proxy server
40    // NOTE: to view effective proxy settings in Chrome, open chrome://net-internals/#proxy
41    if (options.proxyUrl) {
42        const anonymizedProxyUrl = await anonymizeProxy(options.proxyUrl)
43        chromeOptions.addArguments(`--proxy-server=${anonymizedProxyUrl}`);
44    }
45
46    const webDriver = builder
47        .setChromeOptions(chromeOptions)
48        .withCapabilities(capabilities)
49        .build();
50
51    return webDriver;
52};
53
54
55Apify.main(async () => {
56    const input = await Apify.getInput();
57    console.log('Input:');
58    console.dir(input);
59
60    // Prepare proxy URL
61    let proxyUrl = '';
62    if (input.proxy) {
63        const { useApifyProxy, apifyProxyGroups, proxyUrls } = input.proxy;
64        if (useApifyProxy) {
65            proxyUrl = Apify.getApifyProxyUrl({ groups: apifyProxyGroups });
66        } else if ((proxyUrls || []).length) {
67            proxyUrl = proxyUrls[Math.floor(Math.random * proxyUrls.length)];
68        }
69    }
70
71    // Prepare user agent
72    let userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36';
73    if (input.userAgent) userAgent = input.userAgent;
74
75    const options = {
76        proxyUrl,
77        userAgent,
78        headless: true,
79    }
80
81    console.log('Launching Selenium WebDriver...');
82    const webDriver = await launchChromeWebdriver(options);
83
84    console.log(`Opening ${input.url}...`);
85    await webDriver.get(input.url);
86
87    const currentUrl = await webDriver.getCurrentUrl();
88    console.log(`Current url ${currentUrl}`);
89                   
90    console.log('Getting the page title...');
91    const pageTitle = await webDriver.executeScript(() => {
92        return document.title;
93    });
94    console.log(`Page title: ${pageTitle}`);
95                   
96    console.log('Taking screenshot...');
97    const screenshot = await webDriver.takeScreenshot();
98    await Apify.setValue('screenshot.png', Buffer.from(screenshot, 'base64'), { contentType: 'image/png' });
99
100    console.log('Getting the page html code...');
101    const source = await webDriver.getPageSource();
102    await Apify.setValue('source.html', source, { contentType: 'text/html'});
103    
104    console.log('Saving output...');
105    await Apify.setValue('OUTPUT', {
106        url: input.url,
107        pageTitle, 
108    });
109    
110    console.log('Done.')
111});

package.json

1{
2    "name": "selenium-chrome-example",
3    "version": "0.0.1",
4    "dependencies": {
5        "apify": "^0.16.0",
6        "proxy-chain": "^0.3.2",
7        "selenium-webdriver": "^3.0.0"
8    },
9    "scripts": {
10        "start": "node main.js"
11    },
12    "author": "Apify"
13}
Developer
Maintained by Apify
Actor metrics
  • 6 monthly users
  • 98.6% runs succeeded
  • 21.1 days response time
  • Created in Nov 2017
  • Modified 6 months ago