
Example Selenium
Pricing
Pay per usage
Go to Store

Example Selenium
Example of loading a web page in headless Chrome using Selenium Webdriver.
4.7 (3)
Pricing
Pay per usage
3
Total users
281
Monthly users
6
Runs succeeded
>99%
Last modified
a year ago
Dockerfile
FROM apify/actor-node-chrome:beta
COPY package.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && npm list || true \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && echo "Google Chrome version:" \ && bash -c "$APIFY_CHROME_EXECUTABLE_PATH --version" \ && echo "ChromeDriver version:" \ && chromedriver --version
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Optionally, specify how to launch the source code of your actor.# By default, Apify's base Docker images define the CMD instruction# that runs the source code using the command specified# in the "scripts.start" section of the package.json file.# In short, the instruction looks something like this:# CMD npm start
INPUT_SCHEMA.json
{ "title": "Input schema for Selenium example", "type": "object", "schemaVersion": 1, "properties": { "url": { "title": "Url", "type": "string", "description": "Url to open with the selenium webdriver", "editor": "textfield", "prefill": "https://www.example.com" }, "userAgent": { "title": "User agent", "type": "string", "description": "If you want to specify user agent to use, you can do it here", "editor": "textfield", "prefill": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36" }, "proxy": { "title": "Proxy configuration", "type": "object", "description": "Select proxies to be used by your crawler.", "prefill": { "useApifyProxy": true }, "editor": "proxy" } }, "required": ["url"]}
main.js
1const Apify = require('apify');2const { Capabilities, Builder, logging } = require('selenium-webdriver');3const chrome = require('selenium-webdriver/chrome');4const proxy = require('selenium-webdriver/proxy');5const { anonymizeProxy } = require('proxy-chain');6
7const launchChromeWebdriver = async (options) => {8 let anonymizedProxyUrl = null;9
10 // logging.installConsoleHandler();11 // logging.getLogger('webdriver.http').setLevel(logging.Level.ALL);12
13 // See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities for reference.14 const capabilities = new Capabilities();15 capabilities.set('browserName', 'chrome');16
17 // Chrome-specific options18 // By default, Selenium already defines a long list of command-line options19 // to enable browser automation, here we add a few other ones20 // (inspired by Lighthouse, see lighthouse/lighthouse-cli/chrome-launcher)21 const chromeOptions = new chrome.Options();22 chromeOptions.addArguments('--disable-translate');23 chromeOptions.addArguments('--safebrowsing-disable-auto-update');24
25 if (options.headless) {26 chromeOptions.addArguments('--headless', '--no-sandbox');27 }28
29 if (options.userAgent) {30 chromeOptions.addArguments(`--user-agent=${options.userAgent}`);31 } 32
33 if (options.extraChromeArguments) {34 chromeOptions.addArguments(options.extraChromeArguments);35 }36
37 const builder = new Builder();38
39 // For proxy servers with authentication, this class starts a local proxy server40 // NOTE: to view effective proxy settings in Chrome, open chrome://net-internals/#proxy41 if (options.proxyUrl) {42 const anonymizedProxyUrl = await anonymizeProxy(options.proxyUrl)43 chromeOptions.addArguments(`--proxy-server=${anonymizedProxyUrl}`);44 }45
46 const webDriver = builder47 .setChromeOptions(chromeOptions)48 .withCapabilities(capabilities)49 .build();50
51 return webDriver;52};53
54
55Apify.main(async () => {56 const input = await Apify.getInput();57 console.log('Input:');58 console.dir(input);59
60 // Prepare proxy URL61 let proxyUrl = '';62 if (input.proxy) {63 const { useApifyProxy, apifyProxyGroups, proxyUrls } = input.proxy;64 if (useApifyProxy) {65 proxyUrl = Apify.getApifyProxyUrl({ groups: apifyProxyGroups });66 } else if ((proxyUrls || []).length) {67 proxyUrl = proxyUrls[Math.floor(Math.random * proxyUrls.length)];68 }69 }70
71 // Prepare user agent72 let userAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36';73 if (input.userAgent) userAgent = input.userAgent;74
75 const options = {76 proxyUrl,77 userAgent,78 headless: true,79 }80
81 console.log('Launching Selenium WebDriver...');82 const webDriver = await launchChromeWebdriver(options);83
84 console.log(`Opening ${input.url}...`);85 await webDriver.get(input.url);86
87 const currentUrl = await webDriver.getCurrentUrl();88 console.log(`Current url ${currentUrl}`);89 90 console.log('Getting the page title...');91 const pageTitle = await webDriver.executeScript(() => {92 return document.title;93 });94 console.log(`Page title: ${pageTitle}`);95 96 console.log('Taking screenshot...');97 const screenshot = await webDriver.takeScreenshot();98 await Apify.setValue('screenshot.png', Buffer.from(screenshot, 'base64'), { contentType: 'image/png' });99
100 console.log('Getting the page html code...');101 const source = await webDriver.getPageSource();102 await Apify.setValue('source.html', source, { contentType: 'text/html'});103 104 console.log('Saving output...');105 await Apify.setValue('OUTPUT', {106 url: input.url,107 pageTitle, 108 });109 110 console.log('Done.')111});
package.json
{ "name": "selenium-chrome-example", "version": "0.0.1", "dependencies": { "apify": "^0.16.0", "proxy-chain": "^0.3.2", "selenium-webdriver": "^3.0.0" }, "scripts": { "start": "node main.js" }, "author": "Apify"}