
Bike24 (bike24.de) scraper
Deprecated
Pricing
Pay per usage
Go to Store


Bike24 (bike24.de) scraper
Deprecated
Scrapes products titles, prices, images and availability. Does NOT scrape product details.
0.0 (0)
Pricing
Pay per usage
1
Total users
4
Monthly users
1
Runs succeeded
0%
Last modified
3 years ago
Dockerfile
FROM apify/actor-node-playwright-firefox:16
COPY package.json ./
RUN npm --quiet set progress=false \ && npm install aws-crt \ && npm install --only=prod --no-optional
COPY . ./
INPUT_SCHEMA.json
{ "title": "Bike24 (bike24.de) scraper", "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.", "type": "object", "schemaVersion": 1, "properties": { "mode": { "title": "Mode", "description": "", "type": "string", "editor": "select", "default": "TEST", "prefill": "TEST", "enum": [ "TEST", "FULL" ], "enumTitles": [ "TEST mode (scrapes only few categories)", "FULL" ] }, "proxyConfiguration": { "title": "Proxy configuration", "description": "Select proxies to be used by your actor.", "type": "object", "editor": "proxy", "default": { "useApifyProxy": true, "apifyProxyGroups": [ "RESIDENTIAL" ] }, "prefill": { "useApifyProxy": true, "apifyProxyGroups": [ "RESIDENTIAL" ] } }, "debug": { "title": "Debug", "description": "Debug mode prints more logs, disables concurrency and other optimizations.", "type": "boolean", "editor": "checkbox", "default": false }, "APIFY_USE_MEMORY_REQUEST_QUEUE": { "sectionCaption": "Advanced", "sectionDescription": "Advanced options, use only if you know what you're doing.", "title": "Use in-memory request queue instead of the native one", "description": "In-memory request queue can reduce costs, but it may case issues with longer runs due to non-persistence.", "type": "boolean", "default": false, "editor": "checkbox" }, "APIFY_DONT_STORE_IN_DATASET": { "title": "Don't store in dataset", "description": "If set to true, the actor will not store the results in the default dataset. Useful when using alternative storage, like own database", "type": "boolean", "default": false, "editor": "checkbox" }, "PG_CONNECTION_STRING_NORMALIZED": { "title": "Postgres connection string for normalized data", "description": "If set, actor will store normalized data in Postgres database in PG_DATA_TABLE and PG_DATA_PRICE_TABLE tables", "type": "string", "editor": "textfield" }, "PG_DATA_TABLE": { "title": "Postgres table name for product data", "description": "Table name for storing product name, url, image, ...", "type": "string", "editor": "textfield" }, "PG_DATA_PRICE_TABLE": { "title": "Postgres table name for price data", "description": "Table name for storing price, original price, stock status, ...", "type": "string", "editor": "textfield" } }, "required": [ "mode", "proxyConfiguration" ]}
apify.json
{ "name": "bike24-bike24-de-scraper", "version": "0.1", "buildTag": "latest", "env": null, "defaultRunOptions": { "build": "latest", "timeoutSecs": 3600, "memoryMbytes": 4096 }}
main.js
1import { URL } from "node:url";2import { Actor } from "apify3";3import {4 CheerioCrawler,5 createCheerioRouter,6 utils as crawleeUtils,7} from "crawlee";8import playwright from "playwright";9import { Session } from "@crawlee/core";10import { init, save } from "./_utils/common.js";11
12const LABELS = {13 INDEX: `INDEX`,14 PRODUCTS: `PRODUCTS`,15};16
17var MODE;18
19(function (MODE) {20 MODE["TEST"] = "TEST";21 MODE["FULL"] = "FULL";22})(MODE || (MODE = {}));23
24const BASE_URL = `https://www.bike24.com`;25
26async function enqueueInitial(mode, crawler) {27 if (mode === MODE.FULL) {28 await crawler.addRequests([29 {30 userData: { label: LABELS.INDEX },31 url: `https://www.bike24.com/brands`,32 },33 ]);34 } else if (mode === MODE.TEST) {35 await crawler.addRequests([36 {37 userData: { label: LABELS.PRODUCTS },38 url: `https://www.bike24.com/brands/100percent`,39 },40 ]);41 }42}43
44const router = createCheerioRouter();45
46router.addHandler(LABELS.INDEX, async ({ crawler, $ }) => {47 $(`.list-brands-sitemap__section-item a`).each((i, el) => {48 const url = $(el).attr(`href`); // urls are relative49 const fullUrl = `${BASE_URL}${url}`;50 const name = $(el).text().trim(); // there's extra space at the beginning and end51 void crawler.addRequests([52 {53 userData: { label: LABELS.PRODUCTS, category: name },54 url: fullUrl,55 },56 ]);57 });58});59
60router.addHandler(LABELS.PRODUCTS, async ({ crawler, $, request }) => {61 if (!request.url.includes(`page=`)) {62 // on first page63 const totalPages = Number($(`.page-pagination-item`).last().text()); // e.g. `12`64 // FIXME:65 for (let i = 2; i <= Math.min(totalPages, 3); i++) {66 // skip first page, that is already handled67 const url = new URL(request.url);68 url.searchParams.set(`page`, i.toString());69 void crawler.addRequests([70 {71 url: url.toString(),72 userData: {73 label: LABELS.PRODUCTS,74 category: request.userData.category, // pass category name75 },76 },77 ]);78 }79 }80
81 const TAX_RATE = 1.21;82
83 const products = [];84 const $products = $(`.product-tile`);85 $products.each((i, el) => {86 const pid = $(el)87 .find(`.product-tile__anchor`)88 .attr(`href`)89 .replace(/\D/g, ``); // e.g. `p2421335.html` -> `242133590 const relUrl = $(el).find(`.product-tile__anchor`).attr(`href`); // relative url91 const url = `${BASE_URL}${relUrl}`;92 const name = $(el).find(`.product-tile__title`)?.text()?.trim();93 const prices = JSON.parse($(`.productPrice`, el).attr(`data-props`));94 const img = $(el).find(`.product-tile__picture img`).attr(`src`);95 const inStock = !!$(`.delivery-message--success`).length;96 const product = {97 pid,98 name,99 url,100 img,101 inStock,102 currentPrice: prices.price * TAX_RATE,103 originalPrice: prices.oldPrice104 ? prices.oldPrice * TAX_RATE105 : prices.price * TAX_RATE,106 currency: `EUR`,107 };108 products.push(product);109 });110 await save(products);111});112
113void Actor.main(async () => {114 const input = await Actor.getInput();115 const {116 mode = MODE.FULL,117 proxyConfiguration: inputProxyConfiguration,118 ...rest119 } = input ?? {};120
121 // TODO: Better pattern to handle both proxy and no proxy122 const proxyConfiguration = inputProxyConfiguration123 ? await Actor.createProxyConfiguration(inputProxyConfiguration)124 : undefined;125
126 await init({ actorNameOverride: `bike-24` }, rest);127 const crawler = new CheerioCrawler({128 proxyConfiguration,129 maxConcurrency: 1,130 maxRequestRetries: 0,131 sessionPoolOptions: {132 maxPoolSize: 1, // not brave enough for concurrency133 sessionOptions: {134 maxAgeSecs: 60 * 60 * 2, // 2 hours, default is 50m135 maxUsageCount: 1000, // default is 50, let's use as much as possible, until we get blocked136 // TODO: Investigate why so many Firefox sessions are created137 },138 createSessionFunction: async (sessionPool) => {139 console.log(140 `[SESSION] Creating new session, will use Firefox to unblock (should take ~10s)`141 );142 const session = new Session({ sessionPool });143 await unblock(session, proxyConfiguration);144 return session;145 },146 },147 persistCookiesPerSession: true,148 preNavigationHooks: [149 async ({ session }, gotOptions) => {150 const userData = session.userData;151 gotOptions.headers = userData.headers; // real-like headers obtained from Firefox152 gotOptions.headers.Cookie = userData.cookies153 .map((c) => `${c.name}=${c.value}`)154 .join(`; `); // real cookies obtained from Firefox155 // gotOptions.proxyUrl = `http://127.0.0.1:9090` // NOTE: uncomment for debugging with MITM156 },157 ],158 requestHandler: router,159 });160 await enqueueInitial(mode, crawler);161 await crawler.run();162});163
164async function unblock(session, proxyConfiguration) {165 const browser = await playwright.firefox.launch({166 headless: true, // NOTE: uncomment for debugging167 // TODO: Better pattern to handle both proxy and no proxy168 proxy: proxyConfiguration169 ? { server: await proxyConfiguration.newUrl(session.id) }170 : undefined,171 // proxy: { server: `http://127.0.0.1:9090` }, // NOTE: uncomment for debugging with MITM172 });173 const browserContext = await browser.newContext({ ignoreHTTPSErrors: true });174
175 const countryCode = `29`;176 await browserContext.addCookies([177 {178 name: `countryTax`,179 value: `{"shippingCountry":${countryCode},"taxRates":[{"value":21,"name":"Normaler Mehrwertsteuersatz","taxGroup":1},{"value":15,"name":"Lebensmittel mit red. MwSt.","taxGroup":2},{"value":15,"name":"Druckerzeugnisse","taxGroup":3}],"validUntil":"Wednesday, 16-Nov-2022 00:00:00 UTC"}`, // FIXME180 domain: `www.bike24.com`,181 path: `/`,182 },183 {184 name: `deliveryLocation`,185 value: `{"country":${countryCode},"zipCode":null}`,186 domain: `www.bike24.com`,187 path: `/`,188 },189 ]);190
191 const page = await browserContext.newPage();192 // page.on(`console`, msg => console.log(`⚪️ Playwright log (${msg.type()}) ${msg.text()}`))193
194 let headersToSet;195
196 await page.route(`**/*`, (route) => {197 const request = route.request();198 const url = request.url();199 const method = request.method(); // GET, POST, etc.200 const resourceType = request.resourceType(); // document, stylesheet, image, ...201 // console.log(`🔵 Playwright route: ${method} ${url} (${resourceType})`)202
203 // use the first main request to store the sent headers204 if (!headersToSet) headersToSet = pickHeaders(request.headers());205
206 route.continue();207 });208
209 await page.goto(`https://www.bike24.com/brands/shimano`);210 // Wait for some time to pass basic Cloudflare Javascript checks211 await crawleeUtils.sleep(5000); // TODO: Be smarter, 3000s is enough for r2-bike.com, but not for g2.com212 // Get all cookies and store them for subsequent requests213 const cookies = await page.context().cookies();214 // eslint-disable-next-line dot-notation215 const cfCookie = cookies.find((c) => c.name === `__cf_bm`).value;216 console.log(217 `[SESSION] Cloudflare cookie "__cf_bm": ${cfCookie ?? `😱😱😱 not found`}`218 );219 session.userData = { headers: headersToSet, cookies };220 await browser.close();221}222
223function pickHeaders(headers) {224 // Pick just the headers that gotScraping can correctly handle (= order)225 // This seems to be needed mainly to avoid setting Host header, which when set, was at the end of the headers list, which Cloudflare did not like226 // If we skip the Host header, then gotScraping will set it automatically, and in the correct order227
228 // taken from https://github.com/apify/header-generator/blob/1b0fd217b6fa0beaf42b9de321e47ac5f1d4cebf/src/data_files/headers-order.json#L62229 const headersList = [230 `sec-ch-ua`,231 `sec-ch-ua-mobile`,232 `user-agent`,233 `User-Agent`,234 `accept`,235 `Accept`,236 `accept-language`,237 `Accept-Language`,238 `accept-encoding`,239 `Accept-Encoding`,240 `dnt`,241 `DNT`,242 `referer`,243 `Referer`,244
245 // Handling cookies explicitly246 // `cookie`,247 // `Cookie`,248
249 `Connection`,250 `upgrade-insecure-requests`,251 `Upgrade-Insecure-Requests`,252 `te`,253 `sec-fetch-site`,254 `sec-fetch-mode`,255 `sec-fetch-user`,256 `sec-fetch-dest`,257 `Sec-Fetch-Mode`,258 `Sec-Fetch-Dest`,259 `Sec-Fetch-Site`,260 `Sec-Fetch-User`,261 ];262 return headersList.reduce((acc, header) => {263 if (headers[header]) acc[header] = headers[header];264 return acc;265 }, {});266}
package.json
{ "name": "bike24-bike24-de-scraper", "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.", "type": "module", "scripts": { "start": "node ./main.js", "push-to-apify-platform": "npx apify push" }, "dependencies": { "apify3": "npm:apify@^3.0.2", "crawlee": "*", "playwright": "*", "@crawlee/core": "*", "pg": "*", "pg-connection-string": "*", "dotenv": "*", "find-config": "*", "@elastic/elasticsearch": "*", "filenamify": "*", "@crawlee/memory-storage": "*" }, "apify": { "title": "Bike24 (bike24.de) scraper", "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.", "isPublic": true, "isDeprecated": false, "isAnonymouslyRunnable": true, "notice": "", "pictureUrl": "", "seoTitle": "", "seoDescription": "", "categories": [ "ECOMMERCE" ] }}
.actor/actor.json
{ "actorSpecification": 1, "name": "bike24-bike24-de-scraper", "title": "Bike24 (bike24.de) scraper", "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.", "version": "0.1.0", "storages": { "dataset": { "actorSpecification": 1, "title": "Bike24 (bike24.de) scraper", "description": "Scrapes products titles, prices, images and availability. Does NOT scrape product details.", "views": { "overview": { "title": "Overview", "description": "Overview of the most important fields", "transformation": { "fields": [ "pid", "name", "url", "img", "inStock", "currentPrice", "originalPrice", "currency" ] }, "display": { "component": "table", "columns": [ { "label": "Pid", "field": "pid", "format": "text" }, { "label": "Name", "field": "name", "format": "text" }, { "label": "Url", "field": "url", "format": "link" }, { "label": "Img", "field": "img", "format": "image" }, { "label": "In Stock", "field": "inStock", "format": "boolean" }, { "label": "Current Price", "field": "currentPrice", "format": "number" }, { "label": "Original Price", "field": "originalPrice", "format": "number" }, { "label": "Currency", "field": "currency", "format": "text" } ] } } } } }}
.actor/logo.png
_utils/common.js
1import { createHash } from 'crypto'2import os from "os"3import path from "path"4// eslint-disable-next-line @apify/apify-actor/no-forbidden-node-internals5import fs from "fs"6import pg from "pg"7import pgConnectionString from 'pg-connection-string'8import { config } from 'dotenv'9import findConfig from "find-config"10import { Client as ElasticClient } from "@elastic/elasticsearch"11import filenamify from 'filenamify'12import { Configuration, Dataset } from 'crawlee'13import { MemoryStorage } from '@crawlee/memory-storage'14
15config({ path: findConfig(`.env`) })16
17const elasticIndexName = `actors-monorepo-shops`18
19const globalLogsProps = {20 __NODE_STARTED: new Date().toISOString(),21}22
23let actorName24let pgClient25let pgClientNormalized26let elasticClient27export async function init ({ actorNameOverride }, restInput) {28 parseEnvFromInput(restInput)29
30 if (os.platform() === `darwin`) {31 const filePath = process.argv[1] // ~/Projects/apify-actors-monorepo/actors/foo.ts32 const basename = path.basename(filePath) // foo.ts33 actorName = actorNameOverride ?? basename.split(`.`)[0] // foo34 const gitBranch = fs.readFileSync(path.join(process.cwd(), `..`, `.git/HEAD`), `utf8`)35 .split(` `)[1]36 .trim()37 .replace(`refs/heads/`, ``)38 const gitCommit = fs.readFileSync(path.join(process.cwd(), `..`, `.git/refs/heads/${gitBranch}`), `utf8`)39 const gitCommitShort = gitCommit.substring(0, 7)40 globalLogsProps.__GIT_COMMIT = gitCommitShort41 }42
43 if (process.env.APIFY_USE_MEMORY_REQUEST_QUEUE === `true`) { // dotenv -> bool-like vars are strings44 Configuration.getGlobalConfig().useStorageClient(new MemoryStorage())45 }46
47 if (process.env.APIFY_IS_AT_HOME) {48 actorName = actorNameOverride ?? process.env.APIFY_ACTOR_ID // Name would be better, but it's not in ENV49 }50
51 /* ELASTIC */52 /* ======= */53 if (process.env.ELASTIC_CLOUD_ID) {54 elasticClient = new ElasticClient({55 cloud: { id: process.env.ELASTIC_CLOUD_ID },56 auth: { apiKey: process.env.ELASTIC_CLOUD_API_KEY },57 })58
59 // const mapping = await elasticClient.indices.getMapping({ index: actorName })60
61 // eslint-disable-next-line no-inner-declarations62 async function enforceIndexMapping () {63 const doesIndexExist = await elasticClient.indices.exists({ index: elasticIndexName })64 if (!doesIndexExist) await elasticClient.indices.create({ index: elasticIndexName })65 await elasticClient.indices.putMapping({66 index: elasticIndexName,67 body: {68 properties: {69 _discount: { type: `float` },70 originalPrice: { type: `float` },71 currentPrice: { type: `float` },72 },73 },74 })75 }76
77 try {78 await enforceIndexMapping()79 } catch (err) {80 if (err.message.includes(`cannot be changed from type`)) {81 console.log(`Elastic index ${elasticIndexName} already exists with incorrect mappings. As existing mapping cannot be changed, index will be deleted and recreated.`)82 await elasticClient.indices.delete({ index: elasticIndexName })83 await enforceIndexMapping()84 }85 }86 }87
88 /* POSTGRESQL */89 /* ========== */90 if (process.env.PG_CONNECTION_STRING) {91 const pgConfig = pgConnectionString(process.env.PG_CONNECTION_STRING)92 // const pgPool = new pg.Pool(pgConfig)93
94 pgClient = new pg.Client(pgConfig)95 await pgClient.connect()96
97 // Check if table exists and have proper columns98 const { rows: tables } = await pgClient.query(`99 SELECT table_name100 FROM information_schema.tables101 WHERE table_schema = 'public'102 `)103
104 // eslint-disable-next-line camelcase105 const tableExists = tables.some(({ table_name }) => table_name === process.env.PG_DATA_TABLE)106 if (!tableExists) {107 throw new Error(`Table ${process.env.PG_DATA_TABLE} does not exist in database ${pgConfig.database}`)108 }109
110 // TODO: Handle pgClient closing111 }112
113 if (process.env.PG_CONNECTION_STRING_NORMALIZED) {114 const pgConfig = pgConnectionString(process.env.PG_CONNECTION_STRING_NORMALIZED)115
116 pgClientNormalized = new pg.Client(pgConfig)117 await pgClientNormalized.connect()118
119 // Check if table exists and have proper columns120 const { rows: tables } = await pgClientNormalized.query(`121 SELECT table_name122 FROM information_schema.tables123 WHERE table_schema = 'public'124 `)125
126 // eslint-disable-next-line camelcase127 const tableMainExists = tables.some(({ table_name }) => table_name === process.env.PG_DATA_TABLE)128 // eslint-disable-next-line camelcase129 const tablePricesExists = tables.some(({ table_name }) => table_name === process.env.PG_DATA_PRICE_TABLE)130 if (!tableMainExists) throw new Error(`Table ${process.env.PG_DATA_TABLE} does not exist in database ${pgConfig.database}`)131 if (!tablePricesExists) throw new Error(`Table ${process.env.PG_DATA_PRICE_TABLE} does not exist in database ${pgConfig.database}`)132
133 // TODO: Handle pgClient closing134 }135}136
137// inspired by @drobnikj138// TODO: Similar, but less obfuscated for easier debugging139export const createUniqueKeyFromUrl = (url) => {140 const hash = createHash(`sha256`)141 const cleanUrl = url.split(`://`)[1] // Remove protocol142 hash.update(cleanUrl)143 return hash.digest(`hex`)144}145
146/**147 *148 * @param {Date} datetime149 * @return {Promise<void>}150 */151export const sleepUntil = async (datetime) => {152 const now = new Date()153 const difference = datetime - now154 if (difference > 0) {155 return new Promise((resolve) => {156 setTimeout(resolve, difference)157 })158 }159 return Promise.resolve()160}161
162// TODO: Uff, nicer! But at least it's tested163export function parsePrice (string) {164 let amount, currency165 const noText = string.replace(/[^\d,.]/g, ``)166 const decimals = noText.match(/([,.])(\d{2})$/)167 if (decimals) {168 const decimalSeparator = decimals[1] // ?169 // eslint-disable-next-line @typescript-eslint/no-unused-vars, no-unused-vars170 const decimalAmount = decimals[2] // ?171 const mainAmount = noText.split(decimalSeparator)[0].replace(/\D/g, ``)172 amount = parseFloat(mainAmount + `.` + decimalAmount) // ?173 } else {174 const justNumbers = noText.replace(/[,.]/g, ``)175 amount = parseInt(justNumbers)176 }177 return { amount, currency }178}179
180export function toNumberOrNull (str) {181 // TODO: Handle better, but only after adding test182 if (str === undefined) return null183 if (str === null) return null184 if (str === ``) return null185 const num = Number(str)186 if (Number.isNaN(num)) return null187 return num188}189
190export async function save (objs) {191 if (!Array.isArray(objs)) objs = [objs]192 if (objs.length === 0) return console.log(`No data to save.`)193
194 const objsExtended = objs.map(async (obj) => {195 const objExtended = {196 ...obj,197 actorName,198 ...globalLogsProps,199 // __NODE_VERSION: global.process.versions.node,200 // __NODE_UPTIME: global.process.uptime().toFixed(2), // seconds, 2 decimals201 }202 // if run on Apify203 if (process.env.APIFY_IS_AT_HOME) {204 objExtended.__APIFY_ACTOR_ID = process.env.APIFY_ACTOR_ID205 objExtended.__APIFY_ACTOR_RUN_ID = process.env.APIFY_ACTOR_RUN_ID206 objExtended.__APIFY_ACTOR_BUILD_ID = process.env.APIFY_ACTOR_BUILD_ID207 objExtended.__APIFY_ACTOR_BUILD_NUMBER = process.env.APIFY_ACTOR_BUILD_NUMBER208 objExtended.__APIFY_ACTOR_TASK_ID = process.env.APIFY_ACTOR_TASK_ID209 if (process.env.APIFY_DONT_STORE_IN_DATASET !== `true`) { // Note: dotenv is not casting vars, so they are strings210 await Dataset.pushData(obj)211 }212 }213 return objExtended214 })215 // if runs on local machine (MacOS)216 if (os.platform() === `darwin`) {217 const cwd = process.cwd() // ~/Projects/apify-actors-monorepo/actors218 const storageDir = path.join(cwd, `${actorName}.storage`) // ~/Projects/apify-actors-monorepo/actors/foo.storage219 if (!fs.existsSync(storageDir)) fs.mkdirSync(storageDir)220 const dataDir = path.join(storageDir, `data`) // ~/Projects/apify-actors-monorepo/actors/foo.storage/data221 if (!fs.existsSync(dataDir)) fs.mkdirSync(dataDir)222 for (const objExtended of objsExtended) {223 const id = String(objExtended.id ?? objExtended.pid) // ?? uuidv4()224 const fileName = `${filenamify(id)}.json`225 const dataFilePath = path.join(dataDir, fileName) // ~/Projects/apify-actors-monorepo/actors/foo.storage/data/foo.json226 fs.writeFileSync(dataFilePath, JSON.stringify(objExtended, null, 2))227 }228 }229
230 if (pgClient) {231 const objsPg = objs.map((obj) => ({232 ...obj,233 // TODO: This is becoming not nice, and not clear234 shop: actorName,235 scrapedAt: new Date().toISOString().split(`T`)[0],236 }))237
238 const columns = getColumns(objsPg)239 const values = getValues(objsPg)240 const queryString = `241 INSERT INTO public."${process.env.PG_DATA_TABLE}" (${columns})242 VALUES (${values})243 `244 try {245 const { rowCount } = await pgClient.query(queryString)246 console.log(`[save] saved to database: ${JSON.stringify(rowCount)}`)247 } catch (err) {248 if (err.message.includes(`violates unique constraint`)) console.warn(`PostgresSQL: violates unique constraint`)249 else throw err250 }251 }252
253 // Only make sense for HlidacShopu254 if (pgClientNormalized) {255 const objsPgData = objs.map((obj) => ({256 shop: actorName,257 pid: obj.pid,258 name: obj.name,259 url: obj.url,260 img: obj.img,261 }))262
263 const objsPgDataPrice = objs.map((obj) => ({264 shop: actorName,265 pid: obj.pid,266 scrapedAt: new Date().toISOString().split(`T`)[0],267 currentPrice: obj.currentPrice,268 originalPrice: obj.originalPrice,269 inStock: obj.inStock,270 }))271
272 const queryString = `273 INSERT INTO public."${process.env.PG_DATA_TABLE}" (${getColumns(objsPgData)})274 VALUES (${getValues(objsPgData)})275 ON CONFLICT DO NOTHING276 `277 try {278 const { rowCount } = await pgClientNormalized.query(queryString)279 console.log(`[save] saved to database (data): ${JSON.stringify(rowCount)}`)280 } catch (err) {281 if (err.message.includes(`violates unique constraint`)) console.warn(`PostgresSQL: violates unique constraint`)282 else throw err283 }284
285 const queryStringPrice = `286 INSERT INTO public."${process.env.PG_DATA_PRICE_TABLE}" (${getColumns(objsPgDataPrice)})287 VALUES (${getValues(objsPgDataPrice)})288 ON CONFLICT DO NOTHING289 `290 try {291 const { rowCount } = await pgClientNormalized.query(queryStringPrice)292 console.log(`[save] saved to database (price): ${JSON.stringify(rowCount)}`)293 } catch (err) {294 if (err.message.includes(`violates unique constraint`)) console.warn(`PostgresSQL: violates unique constraint`)295 else throw err296 }297 }298
299 if (elasticClient) {300 // .index creates or updates the document301 // .create creates a new document if it doesn't exist, 409 if it does302 // try {303 // const res = await elasticClient.index({304 // index: `actors-monorepo-shops`, // TODO: Consider using actorName305 // id, // foo-bar306 // document: objExtended, // {...}307 // })308 // } catch (err) {309 // // https://discuss.elastic.co/t/elasticsearch-503-ok-false-message-the-requested-deployment-is-currently-unavailable/200583310 // if (err.message.includes(`requested resource is currently unavailable`)) console.log(`Elasticsearch is unavailable, skipping, but not aborting`)311 // else throw err312 // }313 }314}315
316function getColumns (objs) {317 return Object.keys(objs[0]).map((key) => `"${key}"`).join(`, `)318}319
320function getValues (objs) {321 return objs.map(objPg => Object.values(objPg).map((value) => {322 // escape strings to prevent SQL injection323 if (typeof value === `string`) return `'${value.replace(/'/g, `''`)}'`324 // convert to DB specific null325 if (typeof value === `undefined` || value === null) return `NULL`326 return value327 }).join(`, `)).join(`), (`)328}329
330export function parseEnvFromInput (input) {331 const env = {}332 for (const key in input) {333 if (key === key.toUpperCase()) env[key] = input[key]334 }335 console.log(`[parseEnvFromInput] ${JSON.stringify(env)}`)336 Object.assign(process.env, env)337}338
339export const isInspect =340 process.execArgv.join().includes(`--inspect`) ||341 // @ts-ignore342 process?._preload_modules?.join(`|`)?.includes(`debug`)