QPublic avatar

QPublic

Deprecated
View all Actors
This Actor is deprecated

This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?

See alternative Actors
QPublic

QPublic

zyberg/qpublic

Dockerfile

1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY --chown=myuser:myuser . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]

package.json

1{
2    "name": "apify-project",
3    "version": "0.0.1",
4    "description": "",
5    "author": "It's not you it's me",
6    "license": "ISC",
7    "dependencies": {
8        "apify": "0.21.10",
9        "lodash": "latest"
10    },
11    "scripts": {
12        "start": "node main.js"
13    }
14}

main.js

1const Apify = require('apify');
2var _ = require('lodash');
3
4const BASE_URL_SEARCH = 'https://qpublic.schneidercorp.com/Application.aspx?App=FultonCountyGA&Layer=Parcels&PageType=Search'
5const BASE_NAME = 'qPublic'
6const TYPES = {
7    BY_ADDRESS: 'BY_ADDRESS',
8    BY_PARCEL: 'BY_PARCEL'
9}
10
11const TEMPLATE = {
12    "Summary": {
13        "Parcel Number": "",
14        "Location Address": "",
15        "Legal Description": "",
16        "Property Class": "",
17        "Neighborhood": "",
18        "Tax District": "",
19        "Zoning": "",
20        "Acres": "",
21        "Homestead": "",
22        "Exemptions": ""
23    },
24    "Owner": "",
25    "Land": {},
26    "Residential Improvement Information": [],
27    "Accessory Information": [],
28    "Sales": [],
29    "Valuation": []
30}
31
32const getCurrentDate = () => {
33    const currentDate = new Date();
34    const year = currentDate.getUTCFullYear();
35    const month = currentDate.getUTCMonth() + 1;
36    const day = currentDate.getUTCDate();
37
38    return `${year}-${month < 10 ? '0' + month : month}-${day < 10 ? '0' + day : day}`;
39}
40
41Apify.main(async () => {
42    const input = await Apify.getInput();
43    if (!input.addresses) input.addresses = [];
44    if (!input.parcels) input.parcels = [];
45
46    const dataset = await Apify.openDataset(BASE_NAME);
47    const requestQueue = await Apify.openRequestQueue(BASE_NAME);
48    
49    let proxyConfiguration = undefined;
50    try {
51        proxyConfiguration = await Apify.createProxyConfiguration();
52    } catch (error) {
53        console.log('%cWARNING: Set up Proxies for this Apify account!', 'color:Red');
54        console.log('%cWARNING: This run will not use any proxies!', 'color:Red');
55    }
56
57    for (let address of input.addresses) {
58        console.log(address)
59        await requestQueue.addRequest({
60            url: BASE_URL_SEARCH,
61            userData: {
62                address,
63                type: TYPES.BY_ADDRESS
64            },
65            uniqueKey: address + '-' + (new Date()).getTime()
66        });
67    }
68
69    for (let parcel of input.parcels) {
70        await requestQueue.addRequest({
71            url: BASE_URL_SEARCH,
72            userData: {
73                parcel,
74                type: TYPES.BY_PARCEL
75            },
76            uniqueKey: parcel + '-' + (new Date()).getTime()
77        })
78    }
79
80    const crawler = new Apify.PuppeteerCrawler({
81        requestQueue,
82        maxRequestsPerCrawl: 0,
83        maxConcurrency: 4,
84        handlePageTimeoutSecs: 60 * 60,
85        proxyConfiguration: proxyConfiguration,
86        useSessionPool: true,
87        sessionPoolOptions: {
88            sessionOptions: { maxPoolSize: 1 },
89        },
90        launchPuppeteerOptions: {
91            args: [
92                '--headless',
93                '--disable-background-timer-throttling',
94                '--disable-backgrounding-occluded-windows',
95                '--disable-renderer-backgrounding',
96            ],
97        },
98        handlePageFunction: async ({ request, page }) => {
99            const waitIfNone = async (selector) => {
100                if (await page.$(selector) === null)
101                    await page.waitForSelector(selector);
102            }
103
104            const run = {
105                scrapeByAddress: async (query) => {
106                    const selectors = {
107                        inputSearchField: '#ctlBodyPane_ctl01_ctl01_txtAddress',
108                        inputSearchButton: '#ctlBodyPane_ctl01_ctl01_btnSearch',
109                        noData: '#ctlBodyPane_noDataList_pnlNoResults'
110                    };
111
112                    waitIfNone(selectors.inputSearchField); 
113                    await run.checkForTermsAndConditions();
114
115                    /* Enter query */
116                    await page.focus(selectors.inputSearchField);
117                    await page.keyboard.type(query);
118
119                    await page.click(selectors.inputSearchButton)
120                    await new Promise(res => setTimeout(res, 3 * 1000));
121
122                    /* Check: is found? */
123                    let result = null
124                    if (await page.$(selectors.noData) !== null) {
125                        result = await run.fillEmpty(query);
126                        
127                        console.log(`No data found for address "${query}"`);
128                    } else {
129                        /* Scrape data */
130                        result = await run.scrapeData();
131                        console.log(`Successfully scraped data for address "${query}"`);
132                    }
133
134                    /* Save data */
135                    dataset.pushData(result);
136                },
137                scrapeByParcel: async (query) => {
138                    const selectors = {
139                        inputSearchField: '#ctlBodyPane_ctl02_ctl01_txtParcelID',
140                        inputSearchButton: '#ctlBodyPane_ctl02_ctl01_btnSearch',
141                        noData: '#ctlBodyPane_noDataList_pnlNoResults'
142                    };
143
144                    waitIfNone(selectors.inputSearchField); 
145                    await run.checkForTermsAndConditions();
146
147                    /* Enter query */
148                    await page.focus(selectors.inputSearchField);
149                    await page.keyboard.type(query);
150
151                    await page.click(selectors.inputSearchButton)
152                    await new Promise(res => setTimeout(res, 3 * 1000));
153
154                    /* Check: is found? */
155                    let result = null
156                    if (await page.$(selectors.noData) !== null) {
157                        result = await run.fillEmpty(query);
158                        
159                        console.log(`No data found for parcel "${query}"`);
160                    } else {
161                        /* Scrape data */
162                        result = await run.scrapeData();
163                        console.log(`Successfully scraped data for parcel "${query}"`);
164                    }
165
166                    /* Save data */
167                    dataset.pushData(result);
168                },
169                checkForTermsAndConditions: async () => {
170                    const selectors = {
171                        modal: '[aria-label="Terms and Conditions"]',
172                        buttonAgree: '[aria-label="Terms and Conditions"] .button-1'
173                    };
174
175                    if (await page.$(selectors.modal) !== null) {
176                        waitIfNone(selectors.buttonAgree);
177                        await page.click(selectors.buttonAgree);
178                    }
179                },
180                fillEmpty: async (query) => {
181                    const templateFilled = TEMPLATE;
182
183                    templateFilled['Summary']['Location Address'] = query
184
185                    return templateFilled
186                },
187                scrapeData: async () => {
188                    const selectors = {
189                        tableRowSummary: '#ctlBodyPane_ctl00_mSection .module-content table tr',
190                        
191                        sectionOwner: '#ctlBodyPane_ctl01_mSection .module-content .four-column-blocks:first-child',
192                        
193                        tableHeadingLand: '#ctlBodyPane_ctl02_mSection table thead tr th',
194                        tableRowLand: '#ctlBodyPane_ctl02_mSection table tbody tr',
195                        totalAcres: '#ctlBodyPane_ctl02_mSection .block-row > .four-column-blocks:nth-child(2)',
196                        totalLandValue: '#ctlBodyPane_ctl02_mSection .block-row div .four-column-blocks:nth-child(2)',
197
198                        rowResidentialInformation: '#ctlBodyPane_ctl03_mSection .block-row',
199                        tableResidentialInformation: 'table tr',
200
201                        rowCommercialInformation: '#ctlBodyPane_ctl04_mSection .block-row',
202                        tableCommercialInformation: 'table tr',
203
204                        tableHeadingAccessory: '#ctlBodyPane_ctl05_mSection table table thead tr th',
205                        tableRowAccessory: '#ctlBodyPane_ctl05_mSection table table tbody tr',
206
207                        tableHeadingSales: '#ctlBodyPane_ctl06_mSection table thead tr th',
208                        tableRowSales: '#ctlBodyPane_ctl06_mSection table tbody tr',
209
210                        tableHeadingValuation: '#ctlBodyPane_ctl07_mSection table tbody tr th',
211                        tableRowValuation: '#ctlBodyPane_ctl07_mSection table tr'
212                    };
213
214                    waitIfNone(selectors.tableRowSummary);
215
216                    /* Get Summary */
217                    let tableSummary = null;
218                    if (await page.$(selectors.tableRowSummary) !== null) {
219                        tableSummary = await page.$$eval(selectors.tableRowSummary, (trs) => trs.reduce(
220                            (acc, el) => {
221                                const row = Array.from(el.children).map(e => e.innerText);
222                                acc[row[0].trim()] = row[1].trim();
223                                return acc
224                            }, {}));
225    
226                        delete tableSummary['View Map'];
227                    }
228
229
230                    /* Get Owner */
231                    let infoOwner = null;
232                    if (await page.$(selectors.sectionOwner) !== null)
233                        infoOwner = await page.$eval(selectors.sectionOwner, el => el.innerText);
234
235
236                    /* Get Land */
237                    let tableLand = null;
238                    let totalAcres = null;
239                    let totalLandValue = null;
240                    if (await page.$(selectors.tableHeadingLand) !== null) {
241                        const headingsLand = await page.$$eval(selectors.tableHeadingLand, ths => ths.map(th => th.innerText))
242                        tableLand = await page.$$eval(selectors.tableRowLand, (trs, headings) => trs.map(tr => {
243                            return Array.from(tr.children).reduce((acc, c, index) => {
244                                 acc[headings[index]] = c.innerText.trim();
245                                 return acc;
246                            }, {});
247                        }), headingsLand);
248    
249                        totalAcres = await page.$eval(selectors.totalAcres, el => el.innerText);
250                        totalLandValue = await page.$eval(selectors.totalLandValue, el => el.innerText);
251                    }
252
253
254                    /* Get Residential Improvement information */
255                    let cardsResidentialImprovementInformation = null
256                    if (await page.$(selectors.tableResidentialInformation) !== null) {
257                        cardsResidentialImprovementInformation = await page.$$eval(selectors.rowResidentialInformation, (rows, selectorTrs) => rows.map(row => {
258                            return Array.from(row.querySelectorAll(selectorTrs)).reduce(
259                                (acc, el) => {
260                                    const row = Array.from(el.children).map(e => e.innerText);
261                                    acc[row[0].trim()] = row[1].trim();
262                                    return acc
263                                }, {})
264                        }), selectors.tableResidentialInformation)
265                    }
266
267
268                    /* Get Commercial Improvement information */
269                    let cardsCommercialImprovementInformation = null
270                    if (await page.$(selectors.tableCommercialInformation) !== null) {
271                        cardsCommercialImprovementInformation = await page.$$eval(selectors.rowCommercialInformation, (rows, selectorTrs) => rows.map(row => {
272                            return Array.from(row.querySelectorAll(selectorTrs)).reduce(
273                                (acc, el) => {
274                                    const row = Array.from(el.children).map(e => e.innerText);
275                                    acc[row[0].trim()] = row[1].trim();
276                                    return acc
277                                }, {})
278                        }), selectors.tableCommercialInformation)
279                    }
280
281
282                    /* Get Accessory Information */
283                    let tableAccessory = null;
284                    if (await page.$(selectors.tableHeadingAccessory) !== null) {
285                        const headingsAccessory = await page.$$eval(selectors.tableHeadingAccessory, ths => ths.map(th => th.innerText))
286                        tableAccessory = await page.$$eval(selectors.tableRowAccessory, (trs, headings) => trs.map(tr => {
287                            return Array.from(tr.children).reduce((acc, c, index) => {
288                                 acc[headings[index]] = c.innerText.trim();
289                                 return acc;
290                            }, {});
291                        }), headingsAccessory);
292                    }
293
294
295                    /* Get Sales */
296                    let tableSales = null;
297                    if (await page.$(selectors.tableHeadingSales) !== null) {
298                        const headingsSales = await page.$$eval(selectors.tableHeadingSales, ths => ths.map(th => th.innerText))
299                        tableSales = await page.$$eval(selectors.tableRowSales, (trs, headings) => trs.map(tr => {
300                            return Array.from(tr.children).reduce((acc, c, index) => {
301                                 acc[headings[index]] = c.innerText.trim();
302                                 return acc;
303                            }, {});
304                        }), headingsSales);
305                    }
306                    
307
308                    /* Get Valuation */
309                    let tableValuation = null;
310                    if (await page.$(selectors.tableHeadingValuation)) {
311                        const headingsValuation = await page.$$eval(
312                            selectors.tableHeadingValuation,
313                            ths => ths.map(th => th.innerText)
314                        );
315                        headingsValuation.unshift('Year');
316    
317                        const rows = await page.$$eval(selectors.tableRowValuation, trs => trs.map(tr => {
318                            const tds = [...tr.children];
319                            return tds.map(td => td.textContent);
320                        }));
321    
322                        const templateTableValuation = (new Array(rows[1].length - 2)).fill(null).map(i => ({}));
323                        tableValuation = rows.reduce((acc, row, index) => {
324                            for(let i = 2; i < row.length; i++) {
325                                if (headingsValuation[index].trim() !== '')
326                                    acc[i - 2][headingsValuation[index]] = row[i];
327                            }
328                            return acc;
329                        }, templateTableValuation);
330                    }
331
332                    return [
333                        {
334                            'Scraped At': getCurrentDate(),
335                            'Summary': tableSummary,
336                            'Owner': infoOwner,
337                            'Land': {
338                                'table': tableLand,
339                                'totalAcres': totalAcres,
340                                'totalLandValue': totalLandValue
341                            },
342                            'Residential Improvement Information': cardsResidentialImprovementInformation,
343                            'Commercial Improvement Information': cardsCommercialImprovementInformation,
344                            'Accessory Information': tableAccessory,
345                            'Sales': tableSales,
346                            'Valuation': tableValuation,
347                        }
348                    ]
349                }
350            };
351
352            if (request.userData.type === TYPES.BY_ADDRESS)
353                await run.scrapeByAddress(request.userData.address);
354            else if (request.userData.type === TYPES.BY_PARCEL)
355                await run.scrapeByParcel(request.userData.parcel)
356        }
357    });
358    await crawler.run();
359});
Developer
Maintained by Community
Categories