Example Selenium
Try for free
No credit card required
View all Actors
Example Selenium
apify/example-selenium
Try for free
No credit card required
Example of loading a web page in headless Chrome using Selenium Webdriver.
Do you want to learn more about this Actor?
Get a demoDockerfile
1FROM apify/actor-node-chrome:beta
2
3COPY package.json ./
4
5# Install NPM packages, skip optional and development dependencies to
6# keep the image small. Avoid logging too much and print the dependency
7# tree for debugging
8RUN npm --quiet set progress=false \
9 && npm install --only=prod --no-optional \
10 && echo "Installed NPM packages:" \
11 && npm list || true \
12 && echo "Node.js version:" \
13 && node --version \
14 && echo "NPM version:" \
15 && npm --version \
16 && echo "Google Chrome version:" \
17 && bash -c "$APIFY_CHROME_EXECUTABLE_PATH --version" \
18 && echo "ChromeDriver version:" \
19 && chromedriver --version
20
21# Next, copy the remaining files and directories with the source code.
22# Since we do this after NPM install, quick build will be really fast
23# for most source file changes.
24COPY . ./
25
26# Optionally, specify how to launch the source code of your actor.
27# By default, Apify's base Docker images define the CMD instruction
28# that runs the source code using the command specified
29# in the "scripts.start" section of the package.json file.
30# In short, the instruction looks something like this:
31# CMD npm start
INPUT_SCHEMA.json
1{
2 "title": "Input schema for Selenium example",
3 "type": "object",
4 "schemaVersion": 1,
5 "properties": {
6 "url": {
7 "title": "Url",
8 "type": "string",
9 "description": "Url to open with the selenium webdriver",
10 "editor": "textfield",
11 "prefill": "https://www.example.com"
12 },
13 "userAgent": {
14 "title": "User agent",
15 "type": "string",
16 "description": "If you want to specify user agent to use, you can do it here",
17 "editor": "textfield",
18 "prefill": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36"
19 },
20 "proxy": {
21 "title": "Proxy configuration",
22 "type": "object",
23 "description": "Select proxies to be used by your crawler.",
24 "prefill": { "useApifyProxy": true },
25 "editor": "proxy"
26 }
27 },
28 "required": ["url"]
29}
main.js
1const Apify = require('apify');
2const { Capabilities, Builder, logging } = require('selenium-webdriver');
3const chrome = require('selenium-webdriver/chrome');
4const proxy = require('selenium-webdriver/proxy');
5const { anonymizeProxy } = require('proxy-chain');
6
7const launchChromeWebdriver = async (options) => {
8 let anonymizedProxyUrl = null;
9
10 // logging.installConsoleHandler();
11 // logging.getLogger('webdriver.http').setLevel(logging.Level.ALL);
12
13 // See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities for reference.
14 const capabilities = new Capabilities();
15 capabilities.set('browserName', 'chrome');
16
17 // Chrome-specific options
18 // By default, Selenium already defines a long list of command-line options
19 // to enable browser automation, here we add a few other ones
20 // (inspired by Lighthouse, see lighthouse/lighthouse-cli/chrome-launcher)
21 const chromeOptions = new chrome.Options();
22 chromeOptions.addArguments('--disable-translate');
23 chromeOptions.addArguments('--safebrowsing-disable-auto-update');
24
25 if (options.headless) {
26 chromeOptions.addArguments('--headless', '--no-sandbox');
27 }
28
29 if (options.userAgent) {
30 chromeOptions.addArguments(`--user-agent=${options.userAgent}`);
31 }
32
33 if (options.extraChromeArguments) {
34 chromeOptions.addArguments(options.extraChromeArguments);
35 }
36
37 const builder = new Builder();
38
39 // For proxy servers with authentication, this class starts a local proxy server
40 // NOTE: to view effective proxy settings in Chrome, open chrome://net-internals/#proxy
41 if (options.proxyUrl) {
42 const anonymizedProxyUrl = await anonymizeProxy(options.proxyUrl)
43 chromeOptions.addArguments(`--proxy-server=${anonymizedProxyUrl}`);
44 }
45
46 const webDriver = builder
47 .setChromeOptions(chromeOptions)
48 .withCapabilities(capabilities)
49 .build();
50
51 return webDriver;
52};
53
54
55Apify.main(async () => {
56 const input = await Apify.getInput();
57 console.log('Input:');
58 console.dir(input);
59
60 // Prepare proxy URL
61 let proxyUrl = '';
62 if (input.proxy) {
63 const { useApifyProxy, apifyProxyGroups, proxyUrls } = input.proxy;
64 if (useApifyProxy) {
65 proxyUrl = Apify.getApifyProxyUrl({ groups: apifyProxyGroups });
66 } else if ((proxyUrls || []).length) {
67 proxyUrl = proxyUrls[Math.floor(Math.random * proxyUrls.length)];
68 }
69 }
70
71 // Prepare user agent
72 let userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36';
73 if (input.userAgent) userAgent = input.userAgent;
74
75 const options = {
76 proxyUrl,
77 userAgent,
78 headless: true,
79 }
80
81 console.log('Launching Selenium WebDriver...');
82 const webDriver = await launchChromeWebdriver(options);
83
84 console.log(`Opening ${input.url}...`);
85 await webDriver.get(input.url);
86
87 const currentUrl = await webDriver.getCurrentUrl();
88 console.log(`Current url ${currentUrl}`);
89
90 console.log('Getting the page title...');
91 const pageTitle = await webDriver.executeScript(() => {
92 return document.title;
93 });
94 console.log(`Page title: ${pageTitle}`);
95
96 console.log('Taking screenshot...');
97 const screenshot = await webDriver.takeScreenshot();
98 await Apify.setValue('screenshot.png', Buffer.from(screenshot, 'base64'), { contentType: 'image/png' });
99
100 console.log('Getting the page html code...');
101 const source = await webDriver.getPageSource();
102 await Apify.setValue('source.html', source, { contentType: 'text/html'});
103
104 console.log('Saving output...');
105 await Apify.setValue('OUTPUT', {
106 url: input.url,
107 pageTitle,
108 });
109
110 console.log('Done.')
111});
package.json
1{
2 "name": "selenium-chrome-example",
3 "version": "0.0.1",
4 "dependencies": {
5 "apify": "^0.16.0",
6 "proxy-chain": "^0.3.2",
7 "selenium-webdriver": "^3.0.0"
8 },
9 "scripts": {
10 "start": "node main.js"
11 },
12 "author": "Apify"
13}
Developer
Maintained by Apify
Actor Metrics
7 monthly users
-
2 stars
>99% runs succeeded
Created in Nov 2017
Modified 5 months ago
Categories