# First, specify the base Docker image. You can read more about
# the available images at https://sdk.apify.com/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:16

# Second, copy just package.json and package-lock.json since those are the only
# files that affect "npm install" in the next step, to speed up the build.
COPY package*.json ./

# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
 && npm install --only=prod --no-optional \
 && echo "Installed NPM packages:" \
 && (npm list || true) \
 && echo "Node.js version:" \
 && node --version \
 && echo "NPM version:" \
 && npm --version

# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./

# Optionally, specify how to launch the source code of your actor.
# By default, Apify's base Docker images define the CMD instruction
# that runs the Node.js source code using the command specified
# in the "scripts.start" section of the package.json file.
# In short, the instruction looks something like this:
#
# CMD npm start

main.js

1// This is the main Node.js source code file of your actor.
2// It is referenced from the "scripts" section of the package.json file.
3
4const Apify = require('apify');
5
6Apify.main(async () => {
7    // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.
8    // For more information, see https://docs.apify.com/actors/development/input-schema
9    const input = await Apify.getInput();
10    console.log('Input:');
11    console.dir(input);
12
13    // Here you can prepare your input for actor apify/web-scraper this input is based on a actor
14    // task you used as the starting point.
15    const metamorphInput = {
16        "breakpointLocation": "NONE",
17        "browserLog": false,
18        "debugLog": false,
19        "downloadCss": true,
20        "downloadMedia": true,
21        "ignoreCorsAndCsp": false,
22        "ignoreSslErrors": false,
23        "injectJQuery": true,
24        "keepUrlFragments": false,
25        "maxRequestRetries": input.maxRequestRetries,
26        "pageFunction": // The function accepts a single argument: the "context" object.
27        // For a complete list of its properties and functions,
28        // see https://apify.com/apify/web-scraper#page-function 
29        async function pageFunction(context) {
30            // This statement works as a breakpoint when you're trying to debug your code. Works only with Run mode: DEVELOPMENT!
31            // debugger; 
32            // jQuery is handy for finding DOM elements and extracting data from them.
33            // To use it, make sure to enable the "Inject jQuery" option.
34            const $ = context.jQuery;
35        
36            let timeoutMillis; // undefined
37            await context.waitFor(1000);
38                // 2 sec timeout after the first.
39                // get elements contained the wrapper class
40            var node = document.querySelector(".wrapper")
41            var href = "";
42            // if this is not the reviews page, check to see if it has a full reviews page
43            if (!context.request.url.endsWith("/reviews/")){
44                context.log.info(context.request.url);
45            
46                for (let i= 0; i < node.childNodes.length; i++)
47                {
48                    if(node.childNodes[i].childNodes[0].innerText == 'Reviews'){
49                        
50                        try{
51                            href = node.childNodes[i].childNodes[0].href;
52                        }catch{}
53                        break;
54                    }
55                }
56                context.log.info(href)
57                // if the a tag had an href attribute, that means this has a full reviews page, so we will navigate there to get all the reviews
58                if(href != ""){
59                    await context.enqueueRequest({ url: href });
60                    return;
61                }
62            }
63        
64            await context.waitFor(1500);
65            // scrape all the reviews
66            var fullReviewData = $("[data-testid='reviews-container']");
67        
68        
69            var results = [];
70            do{
71            for (let i = 0; i < fullReviewData.length; i++ )
72            {
73                var score = "";
74                var companySize = "";
75                var industry = "";
76                var timeUsed = "";
77                var reviewSource = "";
78                var date = "";
79                var title = "";
80                var summary = "";
81                var pros = "";
82                var cons = "";
83                var date = "";
84        
85                try{
86                    score = fullReviewData[i].querySelectorAll("[data-testid='reviewers-rating'] .OverallStarRatingComponent .fullStar").length;
87                }catch(error){}
88        
89                try{
90                    companySize = fullReviewData[i].querySelector('div.review-company > p > strong').innerText;
91                }
92                catch(error){}
93        
94        
95        
96                try{
97                    industry = fullReviewData[i].querySelector('div.review-gdm-industry > p > strong').innerText
98                }
99                catch(error){
100        
101                }
102                try{
103                    timeUsed = fullReviewData[i].querySelector('div.review-profile-time-used > p > strong').innerText
104                }
105                catch(error){}
106                try{
107                    reviewSource = fullReviewData[i].querySelector('div.tooltip > p').innerText
108                }
109                catch(error){}
110                try{ 
111                    date = fullReviewData[i].querySelector("#reviews-list .review-date").innerText
112                }
113                catch(error){}
114                try{ 
115                    title = fullReviewData[i].querySelector("[data-testid='review-title']").innerText
116                }
117                catch(error){}
118                try{
119                    summary = fullReviewData[i].querySelector("[data-testid='review-summary']").innerText
120                }
121                catch(error){}
122                try{
123                    pros = fullReviewData[i].querySelector("[data-testid='review-pros']").innerText
124                }
125                catch(error){}
126                try{
127                    cons = fullReviewData[i].querySelector("[data-testid='review-cons']").innerText
128                }
129                catch(error){}
130                results.push({"score":score,"companySize":companySize,"industry":industry,"timeUsed":timeUsed,"reviewSource":reviewSource,"date":date,"title":title,"summary":summary,"pros":pros,"cons":cons});
131                
132            }
133            var button = document.getElementsByClassName("next");
134            if (button == null || button.length != 1){
135                button = null;
136            }
137            else {
138                button[0].click();
139                await context.waitFor(1500);
140            }
141            console.log("Hi");
142            }while(button != null  && results.length <= 250) // putting a limit on this so it does not time out
143            
144        
145            // Print some information to actor log
146            
147        
148            // Manually add a new page to the queue for scraping
149        
150            // Return an object with the data extracted from the page.
151            // It will be stored to the resulting dataset.
152            return {
153                results: results
154            };
155        },
156        "postNavigationHooks": `// We need to return array of (possibly async) functions here.
157            // The functions accept a single argument: the "crawlingContext" object.
158            [
159                async (crawlingContext) => {
160                    // ...
161                },
162            ]`,
163        "preNavigationHooks": `// We need to return array of (possibly async) functions here.
164            // The functions accept two arguments: the "crawlingContext" object
165            // and "gotoOptions".
166            [
167                async (crawlingContext, gotoOptions) => {
168                    // ...
169                },
170            ]`,
171        "proxyConfiguration": {
172            "useApifyProxy": true,
173            "apifyProxyCountry": "US"
174        },
175        "startUrls": input.startUrls,
176        "runMode": "PRODUCTION",
177        "useChrome": false,
178        "waitUntil": [
179            "networkidle2"
180        ]
181    };
182
183    // Now let's metamorph into actor apify/web-scraper using the created input.
184    await Apify.metamorph('apify/web-scraper', metamorphInput);
185});

package.json

{
    "name": "my-actor",
    "version": "0.0.1",
    "dependencies": {
        "apify": "^2.2.2"
    },
    "scripts": {
        "start": "node main.js"
    },
    "author": "Me!"
}

Google Maps Scraper

compass/crawler-google-places

Extract data from thousands of Google Maps locations and businesses, including reviews, reviewer details, images, contact info, opening hours, location, prices & more. Export scraped data, run the scraper via API, schedule and monitor runs, or integrate with other tools.

Compass

181K

4.7

🔥 ImmoScout24 Scraper (API) Pro

clearpath/immoscout24-api-pro

ImmobilienScout24.de Scraper (API) with 50+ data points per property. Extract realtor contact info, investment data, detailed amenities. Perfect for real estate investors, property managers & market research. Enterprise-grade German property intelligence.

ClearPath

Dach Region 2M+ Business Leads+ Emails From Google Maps

xmiso_scrapers/dach-region-2m-business-leads-emails-from-google-maps-ppr

Looking for affordable apollo.io alternative when searching for DE,AT,CH business leads? Get recent data from continuously updated database of DACH businesses with emails and social links from $0.8/1000 rows

Miso

SEMrush Exstractor / Bulk Data Extractor / Scraper actor

marceli/semrush-exstractor-bulk-data-extractor-scraper-actor

The Bulk Data Extractor is a powerful tool designed for users who want to efficiently scrape and extract data from various online sources, specifically optimized for SEMrush functionalities. 📊 This tool is essential for SEO professionals looking to gather large volumes of data seamlessly. 🚀

Data_Scraper_Pro Marcel

5.0

🎬4k Video Downloader (watermark-free)

nextapi/4k-video-downloader

Unlock the power of professional video downloading! Access high-quality, watermark-free videos from YouTube, TikTok, and more with rich metadata extraction.

NextAPI

5.0

Zillow RealEstate Agents Scraper

hello.datawizards/Real-Estate-Agents-Scraper

Zillow-Real-Estate-Agents-Scraper is an Apify Actor that extracts detailed real estate agent data. Get structured JSON with names, profiles, sales stats, and reviews by location. Ideal for lead generation and market analysis. Built by DataWizards with residential proxy support.

datawizards

Ziprecruiter Jobs Scraper Pro

hello.datawizards/ziprecruiter-Jobs-Scraper-Pro

The ZipRecruiter-Jobs-Scraper-Pro Apify Actor extracts detailed job listings from ZipRecruiter in JSON format. Ideal for job market analysis and recruitment automation, it supports custom queries, locations, and Apify Proxy. Built by DataWizards for fast, reliable job data extraction.

datawizards

Instantly Scrape Emails, Phones & Social Media from Any Site 🔥

danielnestle2024/instantly-scrape-emails-phones-social-media-from-any-site

🔥 INSTANT LEADS! 🔥 Our scraper extracts emails, phones & social links from any website. Perfect for marketers & businesses to boost contacts & sales. ⚡️ Fast. Targeted. Effective. ➡️ Get your leads now!

Daniel

Stealth Scraper

lolio9/stealth-scraper

A stealthy, headless browser-based scraper that mimics human behavior to avoid detection. Automatically saves every visited HTML page and downloadable file, incrementally archiving progress. Perfect for large websites, internal networks, or compliance-sensitive environments.

Marcus

Real Estate Api With Mortgage History

scrap3r/real-estate-api-with-mortgage-history

Real Estate API With Mortgage History is a powerful tool for accessing comprehensive real estate data, including property details, sales history, tax assessments, and mortgage information. Provide an address, and get back a wealth of information to power your real estate applications.