Yellow-Pages-Scraper-withEmail avatar

Yellow-Pages-Scraper-withEmail

Under maintenance
Try for free

No credit card required

Go to Store
This Actor is under maintenance.

This Actor may be unreliable while under maintenance. Would you like to try a similar Actor instead?

See alternative Actors
Yellow-Pages-Scraper-withEmail

Yellow-Pages-Scraper-withEmail

krish_patel/yellow-pages-scraper-withemail
Try for free

No credit card required

Allows you to gather leads from yellow pages along with their corresponding email if any.

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "extends": "@apify"
3}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.idea
4node_modules

Dockerfile

1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --only=prod --no-optional --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY . ./
26
27# Optionally, specify how to launch the source code of your actor.
28# By default, Apify's base Docker images define the CMD instruction
29# that runs the Node.js source code using the command specified
30# in the "scripts.start" section of the package.json file.
31# In short, the instruction looks something like this:
32#
33# CMD npm start

INPUT_SCHEMA.json

1{
2    "title": "Yellowpages scraper input schema",
3    "type": "object",
4    "schemaVersion": 1,
5    "properties": {
6        "search": {
7            "title": "Search",
8            "type": "string",
9            "editor": "textfield",
10            "prefill": "Dentist",
11            "description": "Query to search on Yellow Pages"
12        },
13        "location": {
14            "title": "Location",
15            "type": "string",
16            "editor": "textfield",
17            "prefill": "Los Angeles",
18            "description": "Location to search entries in"
19        },
20        "startUrls": {
21            "title": "Start URLs",
22            "type": "array",
23            "editor": "requestListSources",
24            "description": "List of URLs that will be crawled."
25        },
26        "maxItems": {
27            "title": "Max items",
28            "type": "integer",
29            "minimum": 0,
30            "prefill": 200,
31            "description": "Maximum number of outputted results"
32        },
33        "extendOutputFunction": {
34            "title": "Extend output function",
35            "type": "string",
36            "editor": "javascript",
37            "description": "A function which result will get merged with the default result",
38            "prefill": "($, record) => {\n    return {};\n}",
39            "sectionCaption": "Advanced options"
40        },
41        "proxyConfiguration": {
42            "title": "Proxy configuration",
43            "type": "object",
44            "editor": "proxy",
45            "description": "In this case the best option is usually no proxy. Otherwise US or UK proxies work best.",
46            "prefill": { "useApifyProxy": false }
47        }
48    }
49}

apify.json

1{
2    "env": { "npm_config_loglevel": "silent" }
3}

main.js

1// This is the main Node.js source code file of your actor.
2
3// Import Apify SDK. For more information, see https://sdk.apify.com/
4const Apify = require('apify');
5
6Apify.main(async () => {
7    const input= await Apify.getInput();
8    const run=await Apify.call('petr_cermak/yellow-pages-scraper', input);
9    const axios=require('axios');
10    const cheerio=require('cheerio');
11    var dataset=await Apify.openDataset(run.defaultDatasetId);
12    var ar=await dataset.getData();
13    var data=ar.items;
14    for(var i=0;i<data.length;i++){
15    var obj=data[i];
16    var res=await axios.get(obj.url);
17    var $=cheerio.load(res.data);
18    var emails=$(".email-business").attr("href");
19    obj.email="";
20    if(emails==undefined){
21        obj.email="none found";
22    }else{
23        var array=emails.split(":");
24        obj.email=array[1];
25    }
26    await Apify.pushData(obj);
27    }
28});

package.json

1{
2    "name": "project-empty",
3    "version": "0.0.1",
4    "description": "This is a boilerplate of an Apify actor.",
5    "dependencies": {
6        "apify": "^2.3.2"
7    },
8    "devDependencies": {
9        "@apify/eslint-config": "^0.1.3",
10        "eslint": "^7.0.0"
11    },
12    "scripts": {
13        "start": "node main.js",
14        "lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
15        "lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
16        "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
17    },
18    "author": "It's not you it's me",
19    "license": "ISC"
20}
Developer
Maintained by Community

Actor Metrics

  • 11 monthly users

  • 3 stars

  • 56% runs succeeded

  • Created in Aug 2022

  • Modified 2 years ago