Yellow-Pages-Scraper-withEmail avatar
Yellow-Pages-Scraper-withEmail

Under maintenance

Pricing

Pay per usage

Go to Store
Yellow-Pages-Scraper-withEmail

Yellow-Pages-Scraper-withEmail

Under maintenance

Developed by

Krish Patel

Krish Patel

Maintained by Community

Allows you to gather leads from yellow pages along with their corresponding email if any.

0.0 (0)

Pricing

Pay per usage

5

Total users

650

Monthly users

5

Runs succeeded

0.93%

Last modified

2 years ago

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"extends": "@apify"
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.idea
node_modules

Dockerfile

# First, specify the base Docker image. You can read more about
# the available images at https://sdk.apify.com/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:16
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --only=prod --no-optional --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Optionally, specify how to launch the source code of your actor.
# By default, Apify's base Docker images define the CMD instruction
# that runs the Node.js source code using the command specified
# in the "scripts.start" section of the package.json file.
# In short, the instruction looks something like this:
#
# CMD npm start

INPUT_SCHEMA.json

{
"title": "Yellowpages scraper input schema",
"type": "object",
"schemaVersion": 1,
"properties": {
"search": {
"title": "Search",
"type": "string",
"editor": "textfield",
"prefill": "Dentist",
"description": "Query to search on Yellow Pages"
},
"location": {
"title": "Location",
"type": "string",
"editor": "textfield",
"prefill": "Los Angeles",
"description": "Location to search entries in"
},
"startUrls": {
"title": "Start URLs",
"type": "array",
"editor": "requestListSources",
"description": "List of URLs that will be crawled."
},
"maxItems": {
"title": "Max items",
"type": "integer",
"minimum": 0,
"prefill": 200,
"description": "Maximum number of outputted results"
},
"extendOutputFunction": {
"title": "Extend output function",
"type": "string",
"editor": "javascript",
"description": "A function which result will get merged with the default result",
"prefill": "($, record) => {\n return {};\n}",
"sectionCaption": "Advanced options"
},
"proxyConfiguration": {
"title": "Proxy configuration",
"type": "object",
"editor": "proxy",
"description": "In this case the best option is usually no proxy. Otherwise US or UK proxies work best.",
"prefill": { "useApifyProxy": false }
}
}
}

apify.json

{
"env": { "npm_config_loglevel": "silent" }
}

main.js

1// This is the main Node.js source code file of your actor.
2
3// Import Apify SDK. For more information, see https://sdk.apify.com/
4const Apify = require('apify');
5
6Apify.main(async () => {
7 const input= await Apify.getInput();
8 const run=await Apify.call('petr_cermak/yellow-pages-scraper', input);
9 const axios=require('axios');
10 const cheerio=require('cheerio');
11 var dataset=await Apify.openDataset(run.defaultDatasetId);
12 var ar=await dataset.getData();
13 var data=ar.items;
14 for(var i=0;i<data.length;i++){
15 var obj=data[i];
16 var res=await axios.get(obj.url);
17 var $=cheerio.load(res.data);
18 var emails=$(".email-business").attr("href");
19 obj.email="";
20 if(emails==undefined){
21 obj.email="none found";
22 }else{
23 var array=emails.split(":");
24 obj.email=array[1];
25 }
26 await Apify.pushData(obj);
27 }
28});

package.json

{
"name": "project-empty",
"version": "0.0.1",
"description": "This is a boilerplate of an Apify actor.",
"dependencies": {
"apify": "^2.3.2"
},
"devDependencies": {
"@apify/eslint-config": "^0.1.3",
"eslint": "^7.0.0"
},
"scripts": {
"start": "node main.js",
"lint": "./node_modules/.bin/eslint ./src --ext .js,.jsx",
"lint:fix": "./node_modules/.bin/eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}