QPublic
Deprecated
Pricing
Pay per usage
Go to Store
QPublic
Deprecated
0.0 (0)
Pricing
Pay per usage
1
Total users
9
Monthly users
2
Last modified
3 years ago
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.# The base image name below is set during the act build, based on user settings.# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/userFROM apify/actor-node-chrome:v0.21.10
# Second, copy just package.json and package-lock.json since it should be# the only file that affects "npm install" in the next step, to speed up the buildCOPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# Copy source code to container# Do this in the last step, to have fast build if only the source code changedCOPY . ./
# NOTE: The CMD is already defined by the base image.# Uncomment this for local node inspector debugging:# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{ "name": "apify-project", "version": "0.0.1", "description": "", "author": "It's not you it's me", "license": "ISC", "dependencies": { "apify": "0.21.10", "lodash": "latest" }, "scripts": { "start": "node main.js" }}
main.js
1const Apify = require('apify');2var _ = require('lodash');3
4const BASE_URL_SEARCH = 'https://qpublic.schneidercorp.com/Application.aspx?App=FultonCountyGA&Layer=Parcels&PageType=Search'5const BASE_NAME = 'qPublic'6const TYPES = {7 BY_ADDRESS: 'BY_ADDRESS',8 BY_PARCEL: 'BY_PARCEL'9}10
11const TEMPLATE = {12 "Summary": {13 "Parcel Number": "",14 "Location Address": "",15 "Legal Description": "",16 "Property Class": "",17 "Neighborhood": "",18 "Tax District": "",19 "Zoning": "",20 "Acres": "",21 "Homestead": "",22 "Exemptions": ""23 },24 "Owner": "",25 "Land": {},26 "Residential Improvement Information": [],27 "Accessory Information": [],28 "Sales": [],29 "Valuation": []30}31
32const getCurrentDate = () => {33 const currentDate = new Date();34 const year = currentDate.getUTCFullYear();35 const month = currentDate.getUTCMonth() + 1;36 const day = currentDate.getUTCDate();37
38 return `${year}-${month < 10 ? '0' + month : month}-${day < 10 ? '0' + day : day}`;39}40
41Apify.main(async () => {42 const input = await Apify.getInput();43 if (!input.addresses) input.addresses = [];44 if (!input.parcels) input.parcels = [];45
46 const dataset = await Apify.openDataset(BASE_NAME);47 const requestQueue = await Apify.openRequestQueue(BASE_NAME);48 49 let proxyConfiguration = undefined;50 try {51 proxyConfiguration = await Apify.createProxyConfiguration();52 } catch (error) {53 console.log('%cWARNING: Set up Proxies for this Apify account!', 'color:Red');54 console.log('%cWARNING: This run will not use any proxies!', 'color:Red');55 }56
57 for (let address of input.addresses) {58 console.log(address)59 await requestQueue.addRequest({60 url: BASE_URL_SEARCH,61 userData: {62 address,63 type: TYPES.BY_ADDRESS64 },65 uniqueKey: address + '-' + (new Date()).getTime()66 });67 }68
69 for (let parcel of input.parcels) {70 await requestQueue.addRequest({71 url: BASE_URL_SEARCH,72 userData: {73 parcel,74 type: TYPES.BY_PARCEL75 },76 uniqueKey: parcel + '-' + (new Date()).getTime()77 })78 }79
80 const crawler = new Apify.PuppeteerCrawler({81 requestQueue,82 maxRequestsPerCrawl: 0,83 maxConcurrency: 4,84 handlePageTimeoutSecs: 60 * 60,85 proxyConfiguration: proxyConfiguration,86 useSessionPool: true,87 sessionPoolOptions: {88 sessionOptions: { maxPoolSize: 1 },89 },90 launchPuppeteerOptions: {91 args: [92 '--headless',93 '--disable-background-timer-throttling',94 '--disable-backgrounding-occluded-windows',95 '--disable-renderer-backgrounding',96 ],97 },98 handlePageFunction: async ({ request, page }) => {99 const waitIfNone = async (selector) => {100 if (await page.$(selector) === null)101 await page.waitForSelector(selector);102 }103
104 const run = {105 scrapeByAddress: async (query) => {106 const selectors = {107 inputSearchField: '#ctlBodyPane_ctl01_ctl01_txtAddress',108 inputSearchButton: '#ctlBodyPane_ctl01_ctl01_btnSearch',109 noData: '#ctlBodyPane_noDataList_pnlNoResults'110 };111
112 waitIfNone(selectors.inputSearchField); 113 await run.checkForTermsAndConditions();114
115 /* Enter query */116 await page.focus(selectors.inputSearchField);117 await page.keyboard.type(query);118
119 await page.click(selectors.inputSearchButton)120 await new Promise(res => setTimeout(res, 3 * 1000));121
122 /* Check: is found? */123 let result = null124 if (await page.$(selectors.noData) !== null) {125 result = await run.fillEmpty(query);126 127 console.log(`No data found for address "${query}"`);128 } else {129 /* Scrape data */130 result = await run.scrapeData();131 console.log(`Successfully scraped data for address "${query}"`);132 }133
134 /* Save data */135 dataset.pushData(result);136 },137 scrapeByParcel: async (query) => {138 const selectors = {139 inputSearchField: '#ctlBodyPane_ctl02_ctl01_txtParcelID',140 inputSearchButton: '#ctlBodyPane_ctl02_ctl01_btnSearch',141 noData: '#ctlBodyPane_noDataList_pnlNoResults'142 };143
144 waitIfNone(selectors.inputSearchField); 145 await run.checkForTermsAndConditions();146
147 /* Enter query */148 await page.focus(selectors.inputSearchField);149 await page.keyboard.type(query);150
151 await page.click(selectors.inputSearchButton)152 await new Promise(res => setTimeout(res, 3 * 1000));153
154 /* Check: is found? */155 let result = null156 if (await page.$(selectors.noData) !== null) {157 result = await run.fillEmpty(query);158 159 console.log(`No data found for parcel "${query}"`);160 } else {161 /* Scrape data */162 result = await run.scrapeData();163 console.log(`Successfully scraped data for parcel "${query}"`);164 }165
166 /* Save data */167 dataset.pushData(result);168 },169 checkForTermsAndConditions: async () => {170 const selectors = {171 modal: '[aria-label="Terms and Conditions"]',172 buttonAgree: '[aria-label="Terms and Conditions"] .button-1'173 };174
175 if (await page.$(selectors.modal) !== null) {176 waitIfNone(selectors.buttonAgree);177 await page.click(selectors.buttonAgree);178 }179 },180 fillEmpty: async (query) => {181 const templateFilled = TEMPLATE;182
183 templateFilled['Summary']['Location Address'] = query184
185 return templateFilled186 },187 scrapeData: async () => {188 const selectors = {189 tableRowSummary: '#ctlBodyPane_ctl00_mSection .module-content table tr',190 191 sectionOwner: '#ctlBodyPane_ctl01_mSection .module-content .four-column-blocks:first-child',192 193 tableHeadingLand: '#ctlBodyPane_ctl02_mSection table thead tr th',194 tableRowLand: '#ctlBodyPane_ctl02_mSection table tbody tr',195 totalAcres: '#ctlBodyPane_ctl02_mSection .block-row > .four-column-blocks:nth-child(2)',196 totalLandValue: '#ctlBodyPane_ctl02_mSection .block-row div .four-column-blocks:nth-child(2)',197
198 rowResidentialInformation: '#ctlBodyPane_ctl03_mSection .block-row',199 tableResidentialInformation: 'table tr',200
201 rowCommercialInformation: '#ctlBodyPane_ctl04_mSection .block-row',202 tableCommercialInformation: 'table tr',203
204 tableHeadingAccessory: '#ctlBodyPane_ctl05_mSection table table thead tr th',205 tableRowAccessory: '#ctlBodyPane_ctl05_mSection table table tbody tr',206
207 tableHeadingSales: '#ctlBodyPane_ctl06_mSection table thead tr th',208 tableRowSales: '#ctlBodyPane_ctl06_mSection table tbody tr',209
210 tableHeadingValuation: '#ctlBodyPane_ctl07_mSection table tbody tr th',211 tableRowValuation: '#ctlBodyPane_ctl07_mSection table tr'212 };213
214 waitIfNone(selectors.tableRowSummary);215
216 /* Get Summary */217 let tableSummary = null;218 if (await page.$(selectors.tableRowSummary) !== null) {219 tableSummary = await page.$$eval(selectors.tableRowSummary, (trs) => trs.reduce(220 (acc, el) => {221 const row = Array.from(el.children).map(e => e.innerText);222 acc[row[0].trim()] = row[1].trim();223 return acc224 }, {}));225 226 delete tableSummary['View Map'];227 }228
229
230 /* Get Owner */231 let infoOwner = null;232 if (await page.$(selectors.sectionOwner) !== null)233 infoOwner = await page.$eval(selectors.sectionOwner, el => el.innerText);234
235
236 /* Get Land */237 let tableLand = null;238 let totalAcres = null;239 let totalLandValue = null;240 if (await page.$(selectors.tableHeadingLand) !== null) {241 const headingsLand = await page.$$eval(selectors.tableHeadingLand, ths => ths.map(th => th.innerText))242 tableLand = await page.$$eval(selectors.tableRowLand, (trs, headings) => trs.map(tr => {243 return Array.from(tr.children).reduce((acc, c, index) => {244 acc[headings[index]] = c.innerText.trim();245 return acc;246 }, {});247 }), headingsLand);248 249 totalAcres = await page.$eval(selectors.totalAcres, el => el.innerText);250 totalLandValue = await page.$eval(selectors.totalLandValue, el => el.innerText);251 }252
253
254 /* Get Residential Improvement information */255 let cardsResidentialImprovementInformation = null256 if (await page.$(selectors.tableResidentialInformation) !== null) {257 cardsResidentialImprovementInformation = await page.$$eval(selectors.rowResidentialInformation, (rows, selectorTrs) => rows.map(row => {258 return Array.from(row.querySelectorAll(selectorTrs)).reduce(259 (acc, el) => {260 const row = Array.from(el.children).map(e => e.innerText);261 acc[row[0].trim()] = row[1].trim();262 return acc263 }, {})264 }), selectors.tableResidentialInformation)265 }266
267
268 /* Get Commercial Improvement information */269 let cardsCommercialImprovementInformation = null270 if (await page.$(selectors.tableCommercialInformation) !== null) {271 cardsCommercialImprovementInformation = await page.$$eval(selectors.rowCommercialInformation, (rows, selectorTrs) => rows.map(row => {272 return Array.from(row.querySelectorAll(selectorTrs)).reduce(273 (acc, el) => {274 const row = Array.from(el.children).map(e => e.innerText);275 acc[row[0].trim()] = row[1].trim();276 return acc277 }, {})278 }), selectors.tableCommercialInformation)279 }280
281
282 /* Get Accessory Information */283 let tableAccessory = null;284 if (await page.$(selectors.tableHeadingAccessory) !== null) {285 const headingsAccessory = await page.$$eval(selectors.tableHeadingAccessory, ths => ths.map(th => th.innerText))286 tableAccessory = await page.$$eval(selectors.tableRowAccessory, (trs, headings) => trs.map(tr => {287 return Array.from(tr.children).reduce((acc, c, index) => {288 acc[headings[index]] = c.innerText.trim();289 return acc;290 }, {});291 }), headingsAccessory);292 }293
294
295 /* Get Sales */296 let tableSales = null;297 if (await page.$(selectors.tableHeadingSales) !== null) {298 const headingsSales = await page.$$eval(selectors.tableHeadingSales, ths => ths.map(th => th.innerText))299 tableSales = await page.$$eval(selectors.tableRowSales, (trs, headings) => trs.map(tr => {300 return Array.from(tr.children).reduce((acc, c, index) => {301 acc[headings[index]] = c.innerText.trim();302 return acc;303 }, {});304 }), headingsSales);305 }306 307
308 /* Get Valuation */309 let tableValuation = null;310 if (await page.$(selectors.tableHeadingValuation)) {311 const headingsValuation = await page.$$eval(312 selectors.tableHeadingValuation,313 ths => ths.map(th => th.innerText)314 );315 headingsValuation.unshift('Year');316 317 const rows = await page.$$eval(selectors.tableRowValuation, trs => trs.map(tr => {318 const tds = [...tr.children];319 return tds.map(td => td.textContent);320 }));321 322 const templateTableValuation = (new Array(rows[1].length - 2)).fill(null).map(i => ({}));323 tableValuation = rows.reduce((acc, row, index) => {324 for(let i = 2; i < row.length; i++) {325 if (headingsValuation[index].trim() !== '')326 acc[i - 2][headingsValuation[index]] = row[i];327 }328 return acc;329 }, templateTableValuation);330 }331
332 return [333 {334 'Scraped At': getCurrentDate(),335 'Summary': tableSummary,336 'Owner': infoOwner,337 'Land': {338 'table': tableLand,339 'totalAcres': totalAcres,340 'totalLandValue': totalLandValue341 },342 'Residential Improvement Information': cardsResidentialImprovementInformation,343 'Commercial Improvement Information': cardsCommercialImprovementInformation,344 'Accessory Information': tableAccessory,345 'Sales': tableSales,346 'Valuation': tableValuation,347 }348 ]349 }350 };351
352 if (request.userData.type === TYPES.BY_ADDRESS)353 await run.scrapeByAddress(request.userData.address);354 else if (request.userData.type === TYPES.BY_PARCEL)355 await run.scrapeByParcel(request.userData.parcel)356 }357 });358 await crawler.run();359});