
Google Search By Image
Deprecated
Pricing
Pay per usage
Go to Store

Google Search By Image
Deprecated
Search Google results by providing image URLs. It can be used for: reverse search, finding people by photo, detecting objects on images, getting product names, finding social media related to the person in the photo, finding related images and terms, and identifying objects on photos
0.0 (0)
Pricing
Pay per usage
1
Total users
45
Monthly users
1
Runs succeeded
>99%
Last modified
2 years ago
src/main.ts
1import { Actor } from 'apify';2import { BasicCrawler, log } from 'crawlee';3import { gotScraping } from 'got-scraping';4import FormData from 'form-data';5import { JSDOM } from 'jsdom';6import { createHash } from 'crypto';7
8const createValidKVNameFromUrl = (url: string) => createHash('sha256').update(url).digest('hex').slice(0, -2);9
10const headers: Record<string, string> = {11 'Alt-Used': 'www.google.com',12 'Origin': 'https://www.google.com',13 'Referer': 'https://www.google.com/',14 'Cache-Control': 'no-cache',15 'Pragma': "no-cache",16};17
18await Actor.init();19
20interface InputSchema {21 startUrls: string[];22 debug?: boolean;23}24
25const { startUrls = [], debug, } = await Actor.getInput<InputSchema>() ?? {};26
27if (debug) {28 log.setLevel(log.LEVELS.DEBUG);29}30
31const proxyConfiguration = await Actor.createProxyConfiguration({32 groups: ['RESIDENTIAL'],33 countryCode: 'US'34});35
36const newUrl = (imageUrl: string) => {37 const nUrl = new URL('https://www.google.com.br/searchbyimage');38
39 nUrl.searchParams.set('image_url', imageUrl);40 nUrl.searchParams.set('btnG', 'Search by image');41 nUrl.searchParams.set('encoded_image', '');42 nUrl.searchParams.set('image_content', '');43 nUrl.searchParams.set('filename', '');44 nUrl.searchParams.set('hl', 'en');45
46 return nUrl.toString();47}48
49const { defaultKeyValueStoreId } = Actor.getEnv();50
51const crawler = new BasicCrawler({52 maxConcurrency: 3,53 useSessionPool: true,54 async requestHandler({ session, request }) {55 const { userData } = request;56
57 const response = await gotScraping({58 url: newUrl(request.url),59 method: 'GET',60 proxyUrl: await proxyConfiguration!.newUrl(session!.id),61 headers,62 responseType: 'text',63 } as any);64
65 const { window } = new JSDOM(response.body, {66 url: 'https://www.google.com/search/',67 runScripts: 'dangerously',68 pretendToBeVisual: true,69 });70
71 const { document } = window;72
73 if (!document.querySelectorAll('#rso').length) {74 throw new Error(`No results found`);75 }76
77 const relatedSearch = document.querySelector<HTMLAnchorElement>('#topstuff a.fKDtNb[href^="/search"]')?.href;78
79 const matches = Array.from<HTMLDivElement>(document.querySelectorAll<HTMLDivElement>('#search .normal-header ~ .g')).map((div) => {80 const [info, snippet] = Array.from(div.querySelectorAll('[data-content-feature="1"] > div > span'));81 const date = info?.querySelector?.('span:nth-child(3)')?.textContent ?? null;82 83 return {84 title: div?.querySelector?.('h3')?.textContent,85 url: div?.querySelector?.('a')?.href,86 date: /\d, \d/.test(date) ? date : null,87 text: snippet?.textContent || null,88 };89 });90
91 const results = Array.from<HTMLDivElement>(document.querySelectorAll('#rso > div:first-child .g')).map((div) => {92 const [info, snippet] = Array.from(div.querySelectorAll('[data-content-feature="1"] > div > span'));93
94 return {95 title: div?.querySelector?.('h3')?.textContent,96 url: div?.querySelector?.('a')?.href,97 date: /\d, \d/.test(info?.textContent) ? info.querySelector('span')?.textContent : null,98 text: snippet?.textContent || null,99 };100 }); 101
102 const images = Array.from<HTMLDivElement>(document.querySelectorAll('[data-lpage]')).map((div) => {103 return {104 image: '',105 url: div?.dataset?.['lpage'],106 imageData: div?.querySelector?.('img')?.src,107 };108 }); 109
110 for (const image of images) {111 if (!image.imageData || !image.url) {112 continue;113 }114
115 const hash = createValidKVNameFromUrl(image.url);116 const [, contentType, imageData] = image.imageData.split(/data:|;base64,/);117
118 await Actor.setValue(119 hash,120 Buffer.from(imageData, 'base64'),121 { contentType }122 );123
124 delete image.imageData;125 image.image = `https://api.apify.com/v2/key-value-stores/${defaultKeyValueStoreId}/records/${hash}`;126 } 127
128 await Actor.pushData({129 relatedSearch: relatedSearch || null,130 matches,131 results,132 images,133 userData,134 });135
136 // await Actor.setValue('OUTPUT', dom.serialize(), { contentType: 'text/plain' });137 },138});139
140await crawler.addRequests(startUrls);141
142log.info('Starting the crawl.');143await crawler.run();144log.info('Crawl finished.');145
146await Actor.exit();
crawlee_storage/key_value_stores/default/INPUT.json
{ "startUrls": [ "https://apify.com" ]}
.gitignore
# This file tells Git which files shouldn't be added to source control
.ideadistnode_modulesapify_storagecrawlee_storage
Dockerfile
# using multistage build, as we need dev deps to build the TS source codeFROM apify/actor-node:18-beta AS builder
# copy all files, install all dependencies (including dev deps) and build the projectCOPY . ./RUN npm install --include=dev \ && npm run build
# create final imageFROM apify/actor-node:18-beta# copy only necessary filesCOPY /usr/src/app/package.json ./COPY /usr/src/app/README.md ./COPY /usr/src/app/dist ./distCOPY /usr/src/app/apify.json ./apify.jsonCOPY /usr/src/app/INPUT_SCHEMA.json ./INPUT_SCHEMA.json
# install only prod depsRUN npm --quiet set progress=false \ && npm install --only=prod --no-optional \ && echo "Installed NPM packages:" \ && (npm list --only=prod --no-optional --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version
# run compiled codeCMD npm run start:prod
INPUT_SCHEMA.json
{ "title": "@crawlee/cheerio typescript template", "type": "object", "schemaVersion": 1, "properties": { "startUrls": { "title": "Image URLs", "type": "array", "description": "Image URLs to search for", "editor": "requestListSources", "prefill": [ { "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dc/Steve_Jobs_Headshot_2010-CROP_%28cropped_2%29.jpg/1024px-Steve_Jobs_Headshot_2010-CROP_%28cropped_2%29.jpg", "userData": { "name": "Steve Jobs" } } ] }, "debug": { "title": "Debug mode", "type": "boolean", "description": "Enable additional logging", "editor": "checkbox", "default": false } }, "required": [ "startUrls" ]}
apify.json
{ "name": "crawlee-cheerio-typescript", "title": "Crawlee + Cheerio + Typescript", "version": "0.0", "buildTag": "latest"}
package.json
{ "name": "crawlee-cheerio-typescript", "version": "0.0.1", "type": "module", "description": "This is an example of an Apify actor.", "dependencies": { "apify": "^3.0.2", "crawlee": "^3.0.3", "got-scraping": "^3.2.10", "form-data": "^4.0.0" }, "devDependencies": { "@apify/tsconfig": "^0.1.0", "ts-node": "^10.9.1", "typescript": "^4.7.4" }, "scripts": { "start": "npm run start:dev", "start:prod": "node dist/main.js", "start:dev": "ts-node-esm -T src/main.ts", "build": "tsc", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}
payload
-----------------------------324172699818326226173603017879Content-Disposition: form-data; name="image_url"
-----------------------------324172699818326226173603017879Content-Disposition: form-data; name="encoded_image"; filename=""Content-Type: application/octet-stream
-----------------------------324172699818326226173603017879Content-Disposition: form-data; name="image_content"
base64-----------------------------324172699818326226173603017879Content-Disposition: form-data; name="filename"
x.jpg-----------------------------324172699818326226173603017879Content-Disposition: form-data; name="hl"
en-BR-----------------------------324172699818326226173603017879--
tsconfig.json
{ "extends": "@apify/tsconfig", "compilerOptions": { "module": "ES2022", "target": "ES2022", "outDir": "dist", "noUnusedLocals": false, "lib": ["DOM"] }, "include": [ "./src/**/*" ]}