QPublic
Go to Store
This Actor is unavailable because the developer has decided to deprecate it. Would you like to try a similar Actor instead?
See alternative ActorsQPublic
zyberg/qpublic
Dockerfile
1# This is a template for a Dockerfile used to run acts in Actor system.
2# The base image name below is set during the act build, based on user settings.
3# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
4FROM apify/actor-node-chrome:v0.21.10
5
6# Second, copy just package.json and package-lock.json since it should be
7# the only file that affects "npm install" in the next step, to speed up the build
8COPY package*.json ./
9
10# Install NPM packages, skip optional and development dependencies to
11# keep the image small. Avoid logging too much and print the dependency
12# tree for debugging
13RUN npm --quiet set progress=false \
14 && npm install --only=prod --no-optional \
15 && echo "Installed NPM packages:" \
16 && (npm list --all || true) \
17 && echo "Node.js version:" \
18 && node --version \
19 && echo "NPM version:" \
20 && npm --version
21
22# Copy source code to container
23# Do this in the last step, to have fast build if only the source code changed
24COPY . ./
25
26# NOTE: The CMD is already defined by the base image.
27# Uncomment this for local node inspector debugging:
28# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
1{
2 "name": "apify-project",
3 "version": "0.0.1",
4 "description": "",
5 "author": "It's not you it's me",
6 "license": "ISC",
7 "dependencies": {
8 "apify": "0.21.10",
9 "lodash": "latest"
10 },
11 "scripts": {
12 "start": "node main.js"
13 }
14}
main.js
1const Apify = require('apify');
2var _ = require('lodash');
3
4const BASE_URL_SEARCH = 'https://qpublic.schneidercorp.com/Application.aspx?App=FultonCountyGA&Layer=Parcels&PageType=Search'
5const BASE_NAME = 'qPublic'
6const TYPES = {
7 BY_ADDRESS: 'BY_ADDRESS',
8 BY_PARCEL: 'BY_PARCEL'
9}
10
11const TEMPLATE = {
12 "Summary": {
13 "Parcel Number": "",
14 "Location Address": "",
15 "Legal Description": "",
16 "Property Class": "",
17 "Neighborhood": "",
18 "Tax District": "",
19 "Zoning": "",
20 "Acres": "",
21 "Homestead": "",
22 "Exemptions": ""
23 },
24 "Owner": "",
25 "Land": {},
26 "Residential Improvement Information": [],
27 "Accessory Information": [],
28 "Sales": [],
29 "Valuation": []
30}
31
32const getCurrentDate = () => {
33 const currentDate = new Date();
34 const year = currentDate.getUTCFullYear();
35 const month = currentDate.getUTCMonth() + 1;
36 const day = currentDate.getUTCDate();
37
38 return `${year}-${month < 10 ? '0' + month : month}-${day < 10 ? '0' + day : day}`;
39}
40
41Apify.main(async () => {
42 const input = await Apify.getInput();
43 if (!input.addresses) input.addresses = [];
44 if (!input.parcels) input.parcels = [];
45
46 const dataset = await Apify.openDataset(BASE_NAME);
47 const requestQueue = await Apify.openRequestQueue(BASE_NAME);
48
49 let proxyConfiguration = undefined;
50 try {
51 proxyConfiguration = await Apify.createProxyConfiguration();
52 } catch (error) {
53 console.log('%cWARNING: Set up Proxies for this Apify account!', 'color:Red');
54 console.log('%cWARNING: This run will not use any proxies!', 'color:Red');
55 }
56
57 for (let address of input.addresses) {
58 console.log(address)
59 await requestQueue.addRequest({
60 url: BASE_URL_SEARCH,
61 userData: {
62 address,
63 type: TYPES.BY_ADDRESS
64 },
65 uniqueKey: address + '-' + (new Date()).getTime()
66 });
67 }
68
69 for (let parcel of input.parcels) {
70 await requestQueue.addRequest({
71 url: BASE_URL_SEARCH,
72 userData: {
73 parcel,
74 type: TYPES.BY_PARCEL
75 },
76 uniqueKey: parcel + '-' + (new Date()).getTime()
77 })
78 }
79
80 const crawler = new Apify.PuppeteerCrawler({
81 requestQueue,
82 maxRequestsPerCrawl: 0,
83 maxConcurrency: 4,
84 handlePageTimeoutSecs: 60 * 60,
85 proxyConfiguration: proxyConfiguration,
86 useSessionPool: true,
87 sessionPoolOptions: {
88 sessionOptions: { maxPoolSize: 1 },
89 },
90 launchPuppeteerOptions: {
91 args: [
92 '--headless',
93 '--disable-background-timer-throttling',
94 '--disable-backgrounding-occluded-windows',
95 '--disable-renderer-backgrounding',
96 ],
97 },
98 handlePageFunction: async ({ request, page }) => {
99 const waitIfNone = async (selector) => {
100 if (await page.$(selector) === null)
101 await page.waitForSelector(selector);
102 }
103
104 const run = {
105 scrapeByAddress: async (query) => {
106 const selectors = {
107 inputSearchField: '#ctlBodyPane_ctl01_ctl01_txtAddress',
108 inputSearchButton: '#ctlBodyPane_ctl01_ctl01_btnSearch',
109 noData: '#ctlBodyPane_noDataList_pnlNoResults'
110 };
111
112 waitIfNone(selectors.inputSearchField);
113 await run.checkForTermsAndConditions();
114
115 /* Enter query */
116 await page.focus(selectors.inputSearchField);
117 await page.keyboard.type(query);
118
119 await page.click(selectors.inputSearchButton)
120 await new Promise(res => setTimeout(res, 3 * 1000));
121
122 /* Check: is found? */
123 let result = null
124 if (await page.$(selectors.noData) !== null) {
125 result = await run.fillEmpty(query);
126
127 console.log(`No data found for address "${query}"`);
128 } else {
129 /* Scrape data */
130 result = await run.scrapeData();
131 console.log(`Successfully scraped data for address "${query}"`);
132 }
133
134 /* Save data */
135 dataset.pushData(result);
136 },
137 scrapeByParcel: async (query) => {
138 const selectors = {
139 inputSearchField: '#ctlBodyPane_ctl02_ctl01_txtParcelID',
140 inputSearchButton: '#ctlBodyPane_ctl02_ctl01_btnSearch',
141 noData: '#ctlBodyPane_noDataList_pnlNoResults'
142 };
143
144 waitIfNone(selectors.inputSearchField);
145 await run.checkForTermsAndConditions();
146
147 /* Enter query */
148 await page.focus(selectors.inputSearchField);
149 await page.keyboard.type(query);
150
151 await page.click(selectors.inputSearchButton)
152 await new Promise(res => setTimeout(res, 3 * 1000));
153
154 /* Check: is found? */
155 let result = null
156 if (await page.$(selectors.noData) !== null) {
157 result = await run.fillEmpty(query);
158
159 console.log(`No data found for parcel "${query}"`);
160 } else {
161 /* Scrape data */
162 result = await run.scrapeData();
163 console.log(`Successfully scraped data for parcel "${query}"`);
164 }
165
166 /* Save data */
167 dataset.pushData(result);
168 },
169 checkForTermsAndConditions: async () => {
170 const selectors = {
171 modal: '[aria-label="Terms and Conditions"]',
172 buttonAgree: '[aria-label="Terms and Conditions"] .button-1'
173 };
174
175 if (await page.$(selectors.modal) !== null) {
176 waitIfNone(selectors.buttonAgree);
177 await page.click(selectors.buttonAgree);
178 }
179 },
180 fillEmpty: async (query) => {
181 const templateFilled = TEMPLATE;
182
183 templateFilled['Summary']['Location Address'] = query
184
185 return templateFilled
186 },
187 scrapeData: async () => {
188 const selectors = {
189 tableRowSummary: '#ctlBodyPane_ctl00_mSection .module-content table tr',
190
191 sectionOwner: '#ctlBodyPane_ctl01_mSection .module-content .four-column-blocks:first-child',
192
193 tableHeadingLand: '#ctlBodyPane_ctl02_mSection table thead tr th',
194 tableRowLand: '#ctlBodyPane_ctl02_mSection table tbody tr',
195 totalAcres: '#ctlBodyPane_ctl02_mSection .block-row > .four-column-blocks:nth-child(2)',
196 totalLandValue: '#ctlBodyPane_ctl02_mSection .block-row div .four-column-blocks:nth-child(2)',
197
198 rowResidentialInformation: '#ctlBodyPane_ctl03_mSection .block-row',
199 tableResidentialInformation: 'table tr',
200
201 rowCommercialInformation: '#ctlBodyPane_ctl04_mSection .block-row',
202 tableCommercialInformation: 'table tr',
203
204 tableHeadingAccessory: '#ctlBodyPane_ctl05_mSection table table thead tr th',
205 tableRowAccessory: '#ctlBodyPane_ctl05_mSection table table tbody tr',
206
207 tableHeadingSales: '#ctlBodyPane_ctl06_mSection table thead tr th',
208 tableRowSales: '#ctlBodyPane_ctl06_mSection table tbody tr',
209
210 tableHeadingValuation: '#ctlBodyPane_ctl07_mSection table tbody tr th',
211 tableRowValuation: '#ctlBodyPane_ctl07_mSection table tr'
212 };
213
214 waitIfNone(selectors.tableRowSummary);
215
216 /* Get Summary */
217 let tableSummary = null;
218 if (await page.$(selectors.tableRowSummary) !== null) {
219 tableSummary = await page.$$eval(selectors.tableRowSummary, (trs) => trs.reduce(
220 (acc, el) => {
221 const row = Array.from(el.children).map(e => e.innerText);
222 acc[row[0].trim()] = row[1].trim();
223 return acc
224 }, {}));
225
226 delete tableSummary['View Map'];
227 }
228
229
230 /* Get Owner */
231 let infoOwner = null;
232 if (await page.$(selectors.sectionOwner) !== null)
233 infoOwner = await page.$eval(selectors.sectionOwner, el => el.innerText);
234
235
236 /* Get Land */
237 let tableLand = null;
238 let totalAcres = null;
239 let totalLandValue = null;
240 if (await page.$(selectors.tableHeadingLand) !== null) {
241 const headingsLand = await page.$$eval(selectors.tableHeadingLand, ths => ths.map(th => th.innerText))
242 tableLand = await page.$$eval(selectors.tableRowLand, (trs, headings) => trs.map(tr => {
243 return Array.from(tr.children).reduce((acc, c, index) => {
244 acc[headings[index]] = c.innerText.trim();
245 return acc;
246 }, {});
247 }), headingsLand);
248
249 totalAcres = await page.$eval(selectors.totalAcres, el => el.innerText);
250 totalLandValue = await page.$eval(selectors.totalLandValue, el => el.innerText);
251 }
252
253
254 /* Get Residential Improvement information */
255 let cardsResidentialImprovementInformation = null
256 if (await page.$(selectors.tableResidentialInformation) !== null) {
257 cardsResidentialImprovementInformation = await page.$$eval(selectors.rowResidentialInformation, (rows, selectorTrs) => rows.map(row => {
258 return Array.from(row.querySelectorAll(selectorTrs)).reduce(
259 (acc, el) => {
260 const row = Array.from(el.children).map(e => e.innerText);
261 acc[row[0].trim()] = row[1].trim();
262 return acc
263 }, {})
264 }), selectors.tableResidentialInformation)
265 }
266
267
268 /* Get Commercial Improvement information */
269 let cardsCommercialImprovementInformation = null
270 if (await page.$(selectors.tableCommercialInformation) !== null) {
271 cardsCommercialImprovementInformation = await page.$$eval(selectors.rowCommercialInformation, (rows, selectorTrs) => rows.map(row => {
272 return Array.from(row.querySelectorAll(selectorTrs)).reduce(
273 (acc, el) => {
274 const row = Array.from(el.children).map(e => e.innerText);
275 acc[row[0].trim()] = row[1].trim();
276 return acc
277 }, {})
278 }), selectors.tableCommercialInformation)
279 }
280
281
282 /* Get Accessory Information */
283 let tableAccessory = null;
284 if (await page.$(selectors.tableHeadingAccessory) !== null) {
285 const headingsAccessory = await page.$$eval(selectors.tableHeadingAccessory, ths => ths.map(th => th.innerText))
286 tableAccessory = await page.$$eval(selectors.tableRowAccessory, (trs, headings) => trs.map(tr => {
287 return Array.from(tr.children).reduce((acc, c, index) => {
288 acc[headings[index]] = c.innerText.trim();
289 return acc;
290 }, {});
291 }), headingsAccessory);
292 }
293
294
295 /* Get Sales */
296 let tableSales = null;
297 if (await page.$(selectors.tableHeadingSales) !== null) {
298 const headingsSales = await page.$$eval(selectors.tableHeadingSales, ths => ths.map(th => th.innerText))
299 tableSales = await page.$$eval(selectors.tableRowSales, (trs, headings) => trs.map(tr => {
300 return Array.from(tr.children).reduce((acc, c, index) => {
301 acc[headings[index]] = c.innerText.trim();
302 return acc;
303 }, {});
304 }), headingsSales);
305 }
306
307
308 /* Get Valuation */
309 let tableValuation = null;
310 if (await page.$(selectors.tableHeadingValuation)) {
311 const headingsValuation = await page.$$eval(
312 selectors.tableHeadingValuation,
313 ths => ths.map(th => th.innerText)
314 );
315 headingsValuation.unshift('Year');
316
317 const rows = await page.$$eval(selectors.tableRowValuation, trs => trs.map(tr => {
318 const tds = [...tr.children];
319 return tds.map(td => td.textContent);
320 }));
321
322 const templateTableValuation = (new Array(rows[1].length - 2)).fill(null).map(i => ({}));
323 tableValuation = rows.reduce((acc, row, index) => {
324 for(let i = 2; i < row.length; i++) {
325 if (headingsValuation[index].trim() !== '')
326 acc[i - 2][headingsValuation[index]] = row[i];
327 }
328 return acc;
329 }, templateTableValuation);
330 }
331
332 return [
333 {
334 'Scraped At': getCurrentDate(),
335 'Summary': tableSummary,
336 'Owner': infoOwner,
337 'Land': {
338 'table': tableLand,
339 'totalAcres': totalAcres,
340 'totalLandValue': totalLandValue
341 },
342 'Residential Improvement Information': cardsResidentialImprovementInformation,
343 'Commercial Improvement Information': cardsCommercialImprovementInformation,
344 'Accessory Information': tableAccessory,
345 'Sales': tableSales,
346 'Valuation': tableValuation,
347 }
348 ]
349 }
350 };
351
352 if (request.userData.type === TYPES.BY_ADDRESS)
353 await run.scrapeByAddress(request.userData.address);
354 else if (request.userData.type === TYPES.BY_PARCEL)
355 await run.scrapeByParcel(request.userData.parcel)
356 }
357 });
358 await crawler.run();
359});
Developer
Maintained by Community
Categories