Example Selenium avatar
Example Selenium

Pricing

Pay per usage

Go to Store
Example Selenium

Example Selenium

Developed by

Apify

Apify

Maintained by Apify

Example of loading a web page in headless Chrome using Selenium Webdriver.

4.7 (3)

Pricing

Pay per usage

3

Total users

281

Monthly users

6

Runs succeeded

>99%

Last modified

a year ago

Dockerfile

FROM apify/actor-node-chrome:beta
COPY package.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& npm list || true \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& echo "Google Chrome version:" \
&& bash -c "$APIFY_CHROME_EXECUTABLE_PATH --version" \
&& echo "ChromeDriver version:" \
&& chromedriver --version
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Optionally, specify how to launch the source code of your actor.
# By default, Apify's base Docker images define the CMD instruction
# that runs the source code using the command specified
# in the "scripts.start" section of the package.json file.
# In short, the instruction looks something like this:
# CMD npm start

INPUT_SCHEMA.json

{
"title": "Input schema for Selenium example",
"type": "object",
"schemaVersion": 1,
"properties": {
"url": {
"title": "Url",
"type": "string",
"description": "Url to open with the selenium webdriver",
"editor": "textfield",
"prefill": "https://www.example.com"
},
"userAgent": {
"title": "User agent",
"type": "string",
"description": "If you want to specify user agent to use, you can do it here",
"editor": "textfield",
"prefill": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
},
"proxy": {
"title": "Proxy configuration",
"type": "object",
"description": "Select proxies to be used by your crawler.",
"prefill": { "useApifyProxy": true },
"editor": "proxy"
}
},
"required": ["url"]
}

main.js

1const Apify = require('apify');
2const { Capabilities, Builder, logging } = require('selenium-webdriver');
3const chrome = require('selenium-webdriver/chrome');
4const proxy = require('selenium-webdriver/proxy');
5const { anonymizeProxy } = require('proxy-chain');
6
7const launchChromeWebdriver = async (options) => {
8 let anonymizedProxyUrl = null;
9
10 // logging.installConsoleHandler();
11 // logging.getLogger('webdriver.http').setLevel(logging.Level.ALL);
12
13 // See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities for reference.
14 const capabilities = new Capabilities();
15 capabilities.set('browserName', 'chrome');
16
17 // Chrome-specific options
18 // By default, Selenium already defines a long list of command-line options
19 // to enable browser automation, here we add a few other ones
20 // (inspired by Lighthouse, see lighthouse/lighthouse-cli/chrome-launcher)
21 const chromeOptions = new chrome.Options();
22 chromeOptions.addArguments('--disable-translate');
23 chromeOptions.addArguments('--safebrowsing-disable-auto-update');
24
25 if (options.headless) {
26 chromeOptions.addArguments('--headless', '--no-sandbox');
27 }
28
29 if (options.userAgent) {
30 chromeOptions.addArguments(`--user-agent=${options.userAgent}`);
31 }
32
33 if (options.extraChromeArguments) {
34 chromeOptions.addArguments(options.extraChromeArguments);
35 }
36
37 const builder = new Builder();
38
39 // For proxy servers with authentication, this class starts a local proxy server
40 // NOTE: to view effective proxy settings in Chrome, open chrome://net-internals/#proxy
41 if (options.proxyUrl) {
42 const anonymizedProxyUrl = await anonymizeProxy(options.proxyUrl)
43 chromeOptions.addArguments(`--proxy-server=${anonymizedProxyUrl}`);
44 }
45
46 const webDriver = builder
47 .setChromeOptions(chromeOptions)
48 .withCapabilities(capabilities)
49 .build();
50
51 return webDriver;
52};
53
54
55Apify.main(async () => {
56 const input = await Apify.getInput();
57 console.log('Input:');
58 console.dir(input);
59
60 // Prepare proxy URL
61 let proxyUrl = '';
62 if (input.proxy) {
63 const { useApifyProxy, apifyProxyGroups, proxyUrls } = input.proxy;
64 if (useApifyProxy) {
65 proxyUrl = Apify.getApifyProxyUrl({ groups: apifyProxyGroups });
66 } else if ((proxyUrls || []).length) {
67 proxyUrl = proxyUrls[Math.floor(Math.random * proxyUrls.length)];
68 }
69 }
70
71 // Prepare user agent
72 let userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36';
73 if (input.userAgent) userAgent = input.userAgent;
74
75 const options = {
76 proxyUrl,
77 userAgent,
78 headless: true,
79 }
80
81 console.log('Launching Selenium WebDriver...');
82 const webDriver = await launchChromeWebdriver(options);
83
84 console.log(`Opening ${input.url}...`);
85 await webDriver.get(input.url);
86
87 const currentUrl = await webDriver.getCurrentUrl();
88 console.log(`Current url ${currentUrl}`);
89
90 console.log('Getting the page title...');
91 const pageTitle = await webDriver.executeScript(() => {
92 return document.title;
93 });
94 console.log(`Page title: ${pageTitle}`);
95
96 console.log('Taking screenshot...');
97 const screenshot = await webDriver.takeScreenshot();
98 await Apify.setValue('screenshot.png', Buffer.from(screenshot, 'base64'), { contentType: 'image/png' });
99
100 console.log('Getting the page html code...');
101 const source = await webDriver.getPageSource();
102 await Apify.setValue('source.html', source, { contentType: 'text/html'});
103
104 console.log('Saving output...');
105 await Apify.setValue('OUTPUT', {
106 url: input.url,
107 pageTitle,
108 });
109
110 console.log('Done.')
111});

package.json

{
"name": "selenium-chrome-example",
"version": "0.0.1",
"dependencies": {
"apify": "^0.16.0",
"proxy-chain": "^0.3.2",
"selenium-webdriver": "^3.0.0"
},
"scripts": {
"start": "node main.js"
},
"author": "Apify"
}