SoftwareAdviceProductNameSearch
Try for free
No credit card required
View all Actors
SoftwareAdviceProductNameSearch
undrtkr984/softwareadviceproductnamesearch
Try for free
No credit card required
Dockerfile
1# First, specify the base Docker image. You can read more about
2# the available images at https://sdk.apify.com/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:16
5
6# Second, copy just package.json and package-lock.json since those are the only
7# files that affect "npm install" in the next step, to speed up the build.
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Next, copy the remaining files and directories with the source code.
23# Since we do this after NPM install, quick build will be really fast
24# for most source file changes.
25COPY . ./
26
27# Optionally, specify how to launch the source code of your actor.
28# By default, Apify's base Docker images define the CMD instruction
29# that runs the Node.js source code using the command specified
30# in the "scripts.start" section of the package.json file.
31# In short, the instruction looks something like this:
32#
33# CMD npm start
INPUT_SCHEMA.json
1{
2 "title": "Input Schema",
3 "description": "To update crawler to another site, you need to change startUrls options!",
4 "type": "object",
5 "schemaVersion": 1,
6 "properties": {
7 "startUrls": {
8 "title": "Start URLs",
9 "type": "array",
10 "description": "A static list of URLs to scrape. <br><br>For details, see <a href='https://apify.com/apify/web-scraper#start-urls' target='_blank' rel='noopener'>Start URLs</a> in README.",
11 "prefill": [{ "url": "https://crawlee.dev" }],
12 "editor": "requestListSources"
13 },
14 "maxRequestRetries": {
15 "title": "maxRequestRetries",
16 "type": "integer",
17 "description": "Max Request Retries",
18 "prefill": 2,
19 "default": 2,
20 "editor": "number"
21 }
22 },
23 "required": ["startUrls"]
24}
main.js
1// This is the main Node.js source code file of your actor.
2// It is referenced from the "scripts" section of the package.json file.
3
4const Apify = require('apify');
5
6Apify.main(async () => {
7 // Get input of the actor. Input fields can be modified in INPUT_SCHEMA.json file.
8 // For more information, see https://docs.apify.com/actors/development/input-schema
9 const input = await Apify.getInput();
10 console.log('Input:');
11 console.dir(input);
12
13 // Here you can prepare your input for actor apify/web-scraper this input is based on a actor
14 // task you used as the starting point.
15 const metamorphInput = {
16 "breakpointLocation": "NONE",
17 "browserLog": false,
18 "debugLog": false,
19 "downloadCss": true,
20 "downloadMedia": true,
21 "ignoreCorsAndCsp": false,
22 "ignoreSslErrors": false,
23 "injectJQuery": true,
24 "keepUrlFragments": false,
25 "maxRequestRetries": input.maxRequestRetries,
26 "pageFunction": // The function accepts a single argument: the "context" object.
27 // For a complete list of its properties and functions,
28 // see https://apify.com/apify/web-scraper#page-function
29 async function pageFunction(context) {
30 // This statement works as a breakpoint when you're trying to debug your code. Works only with Run mode: DEVELOPMENT!
31 // debugger;
32 // jQuery is handy for finding DOM elements and extracting data from them.
33 // To use it, make sure to enable the "Inject jQuery" option.
34 const $ = context.jQuery;
35 const search = "#SearchProductCardComponent";
36 let timeoutMillis; // undefined
37 await context.waitFor(2000);
38 // 2 sec timeout after the first.
39
40 const products = document.querySelectorAll(search);
41
42
43 var results = [];
44
45 for (let i = 0; i < products.length-1; i++ )
46 {
47
48 var logoURL = "";
49 var url = "";
50 var name = "";
51 var category = "";
52 try{logoURL = products[i].getElementsByTagName('img')[0].src;}
53 catch(error){}
54 try{url = products[i].childNodes[1].childNodes[0].href;}
55 catch(error){}
56 try{name = products[i].childNodes[1].childNodes[0].text;}
57 catch(error){}
58 try{category = products[i].childNodes[1].childNodes[1].text;}
59 catch(error){}
60 results.push({"name":name,"url":url,"logoURL":logoURL,"category":category})
61
62 }
63
64
65 // Print some information to actor log
66
67
68 // Manually add a new page to the queue for scraping
69
70 // Return an object with the data extracted from the page.
71 // It will be stored to the resulting dataset.
72 return {
73 results: results
74 };
75 },
76 "postNavigationHooks": `// We need to return array of (possibly async) functions here.
77 // The functions accept a single argument: the "crawlingContext" object.
78 [
79 async (crawlingContext) => {
80 // ...
81 },
82 ]`,
83 "preNavigationHooks": `// We need to return array of (possibly async) functions here.
84 // The functions accept two arguments: the "crawlingContext" object
85 // and "gotoOptions".
86 [
87 async (crawlingContext, gotoOptions) => {
88 // ...
89 },
90 ]`,
91 "proxyConfiguration": {
92 "useApifyProxy": true,
93 "apifyProxyCountry": "US"
94 },
95 "startUrls": input.startUrls,
96 "runMode": "PRODUCTION",
97 "useChrome": false,
98 "waitUntil": [
99 "networkidle2"
100 ]
101 };
102
103 // Now let's metamorph into actor apify/web-scraper using the created input.
104 await Apify.metamorph('apify/web-scraper', metamorphInput);
105});
package.json
1{
2 "name": "my-actor",
3 "version": "0.0.1",
4 "dependencies": {
5 "apify": "^2.2.2"
6 },
7 "scripts": {
8 "start": "node main.js"
9 },
10 "author": "Me!"
11}
Developer
Maintained by Community
Actor Metrics
1 monthly user
-
1 star
>99% runs succeeded
Created in Jan 2023
Modified 2 years ago
Categories