
UK VAT number checker
Pricing
Pay per usage
Go to Store

UK VAT number checker
This actor uses https://www.tax.service.gov.uk/ to check if UK VAT number is valid or not. It can check multiple VAT numbers in one run. If VAT number is valid then business name and address are fetched.
0.0 (0)
Pricing
Pay per usage
2
Total users
10
Monthly users
2
Runs succeeded
>99%
Last modified
a month ago
.actor/Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://sdk.apify.com/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:16
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image.CMD npm start --silent
.actor/INPUT_SCHEMA.json
{ "title": "CheerioCrawler Template", "type": "object", "schemaVersion": 1, "properties": { "vatIds": { "title": "Vat numbers", "type": "array", "description": "UK Vat numbers to check.", "editor": "stringList", "prefill": ["GB123456789"], "example": ["GB123456789"] } }, "required": ["vatIds"]}
.actor/README.md
1# UK VAT number checker2
3Checks if given UK VAT number(s) is valid. Uses `https://www.tax.service.gov.uk/check-vat-number/` to perform the check.4
5# Input6
7Array of UK VAT numbers.8
9# Output10
11Array of check results.12
13```14[15 {16 "vatId": "GB220430231",17 "isValid": true,18 "businessName": "TESCO PLC",19 "checkedAt": "2025-03-28T13:33:56.267Z",20 "address": "CIRRUS A SHIRE PARK KESTREL WAY WELWYN GARDEN CITY AL7 1GA GB"21 }22]23```
.actor/actor.json
{ "actorSpecification": 1, "name": "uk-vat-id-check", "title": "UK VAT number checker", "description": "", "version": "0.0", "input": "./INPUT_SCHEMA.json", "readme": "./README.md", "dockefile": "./Dockerfile", "minMemoryMbytes": 256, "maxMemoryMbytes": 4096, "storages": { "dataset": { "actorSpecification": 1, "title": "UK VAT number check result", "views": { "results": { "title": "UK VAT number check result", "transformation": { "fields": [ "vatId", "isValid", "businessName", "checkedAt", "address" ] }, "display": { "component": "table", "properties": { "vatId": { "label": "VAT number", "format": "string" }, "isValid": { "label": "Is valid", "format": "boolean" }, "businessName": { "label": "Business name", "format": "string" }, "address": { "label": "Address", "format": "string" }, "checkedAt": { "label": "Checked at", "format": "datetime" } } } } } } }}
src/main.js
1/**2 * This template is a production ready boilerplate for developing with `CheerioCrawler`.3 * Use this to bootstrap your projects using the most up-to-date code.4 * If you're looking for examples or want to learn more, see README.5 */6
7// For more information, see https://sdk.apify.com8import { Actor } from 'apify';9// For more information, see https://crawlee.dev10import { BasicCrawler } from 'crawlee';11import { gotScraping } from 'got-scraping';12import cheerio from 'cheerio';13import { FingerprintGenerator } from 'fingerprint-generator';14
15// UK (Standard = 9 digits), (Branches = 12 digits), (Government = GD + 3 digits), (Health authority = HA + 3 digits), "XI" prefix is used for Northern Ireland!16const UK_VAT_NUMBER_REGEX = /^(GB|XI)?([0-9]{9}([0-9]{3})?$|(GD|HA)[0-9]{3}$)/;17
18// Initialize the Apify SDK19await Actor.init();20const input = await Actor.getInput();21
22const vatIdsToValidate = [];23for (const vatId of input.vatIds) {24 // UK VAT number is 9 or 12 numbers. Could be prefixed by GB.25 if (!vatId.match(UK_VAT_NUMBER_REGEX)) {26 await Actor.pushData({ vatId, isValid: false, checkedAt: new Date(), address: null, businessName: null, status: 'error' });27 } else {28 vatIdsToValidate.push(vatId);29 }30}31
32const startUrls = vatIdsToValidate.map((vatId) => ({33 url: 'https://www.tax.service.gov.uk/check-vat-number/enter-vat-details',34 uniqueKey: vatId,35 userData: { vatId, label: 'start' },36}));37
38const fingerprintGenerator = new FingerprintGenerator();39
40const proxyConfiguration = await Actor.createProxyConfiguration();41
42const crawler = new BasicCrawler({43 useSessionPool: true,44 async requestHandler({ request, session, log }) {45 const browserFingerprintWithHeaders = fingerprintGenerator.getFingerprint({46 devices: ['desktop'],47 browsers: ['chrome'],48 });49
50 const proxyUrl = await proxyConfiguration.newUrl();51 const response = await gotScraping({52 url: 'https://www.tax.service.gov.uk/check-vat-number/enter-vat-details',53 method: 'GET',54 proxyUrl,55 http2: true,56 headers: {57 ...browserFingerprintWithHeaders.headers,58 },59 });60 61 let $ = cheerio.load(response.body);62 const token = $('input[name="csrfToken"]').val();63 const payload = {64 csrfToken: token,65 target: request.userData.vatId,66 requester: '',67 };68 await session.setCookiesFromResponse(response);69
70 let postResponse;71 // We have to catch the redirect as the response contains set-cookie headers72 // And gotScraping won't use these cookies in redirect request73 // That's why this hack with maxRedirects: 074 try {75 await gotScraping({76 url: 'https://www.tax.service.gov.uk/check-vat-number/enter-vat-details',77 form: payload,78 method: 'POST',79 proxyUrl,80 http2: true,81 maxRedirects: 0,82 headers: {83 ...browserFingerprintWithHeaders.headers,84 Cookie: session.getCookieString('https://www.tax.service.gov.uk'),85 'Content-Type': 'application/x-www-form-urlencoded',86 Referer: 'https://www.tax.service.gov.uk/check-vat-number/enter-vat-details'87 },88 });89 } catch (err) {90 if (!err.response.headers.location) throw err;91 postResponse = err.response;92 }93
94 if (!postResponse) throw new Error('VAT number check failed: Post to gov.uk failed');95 await session.setCookiesFromResponse(postResponse);96
97 log.info('Fetching result', { location: postResponse.headers.location });98 const checkResponse = await gotScraping({99 url: `https://www.tax.service.gov.uk${postResponse.headers.location}`,100 proxyUrl,101 http2: true,102 maxRedirects: 1,103 headers: {104 ...browserFingerprintWithHeaders.headers,105 Cookie: session.getCookieString('https://www.tax.service.gov.uk'),106 },107 });108 $ = cheerio.load(checkResponse.body);109 110 const isInvalid = $('h1.govuk-heading-xl').text().toLowerCase().includes('invalid uk vat number');111 const isValid = $('h1.govuk-panel__title').text().toLowerCase().includes('valid uk vat number');112 if (!isInvalid && !isValid) {113 await Actor.setValue(`debug-${request.userData.vatId}`, checkResponse.body, { contentType: 'text/html'});114 throw new Error('Could not check VAT number: Unknown response');115 }116
117 const result = {118 isValid,119 checkedAt: new Date(),120 vatId: request.userData.vatId,121 businessName: null,122 address: null,123 status: 'success',124 }125
126 const subHeaders = $('h3.govuk-heading-s');127 subHeaders.each((i, el) => {128 if ($(el).text().includes('Registered business name')) {129 result.businessName = $(el).next().text().trim();130 }131 if ($(el).text().includes('Registered business address')) {132 result.address = $(el).next().text().trim().replace(/\s+/g, ' ');133 }134 });135
136 await Actor.pushData(result);137
138 },139 async failedRequestHandler({ request }, err) {140 await Actor.pushData({141 isValid: null,142 status: 'error',143 checkedAt: new Date(),144 vatId: request.userData.vatId,145 businessName: null,146 address: null,147 error: err.message,148 });149 }150});151
152await crawler.run(startUrls);153
154// Exit successfully155await Actor.exit();
src/routes.js
1import { Dataset } from 'crawlee';2
3export const router = createCheerioRouter();4
5router.addDefaultHandler(async ({ enqueueLinks, log }) => {6 log.info(`enqueueing new URLs`);7 await enqueueLinks({8 globs: ['https://apify.com/*'],9 label: 'detail',10 });11});12
13router.addHandler('vatId', async ({ request, $, log }) => {14 const title = $('title').text();15 log.info(`${title}`, { url: request.loadedUrl });16
17 await Dataset.pushData({18 url: request.loadedUrl,19 title,20 });21});
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify", "root": true}
.gitignore
# This file tells Git which files shouldn't be added to source control
.DS_Store.ideadistnode_modulesapify_storagestorage
package.json
{ "name": "crawlee-cheerio-javascript", "version": "0.0.1", "type": "module", "description": "This is a boilerplate of an Apify actor.", "engines": { "node": ">=16.0.0" }, "dependencies": { "apify": "^3.0.0", "crawlee": "^3.0.0", "got-scraping": "^3.2.0", "cheerio": "1.0.0-rc.12", "fingerprint-generator": "^2.1.10" }, "devDependencies": { "@apify/eslint-config": "^0.3.1", "eslint": "^8.20.0" }, "scripts": { "start": "node src/main.js", "lint": "eslint ./src --ext .js,.jsx", "lint:fix": "eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}