UK VAT number checker avatar
UK VAT number checker

Pricing

Pay per usage

Go to Store
UK VAT number checker

UK VAT number checker

Developed by

Jan Novotny

Jan Novotny

Maintained by Community

This actor uses https://www.tax.service.gov.uk/ to check if UK VAT number is valid or not. It can check multiple VAT numbers in one run. If VAT number is valid then business name and address are fetched.

0.0 (0)

Pricing

Pay per usage

2

Total users

10

Monthly users

2

Runs succeeded

>99%

Last modified

a month ago

.actor/Dockerfile

# Specify the base Docker image. You can read more about
# the available images at https://sdk.apify.com/docs/guides/docker-images
# You can also use any other image from Docker Hub.
FROM apify/actor-node:16
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.
# Since we do this after NPM install, quick build will be really fast
# for most source file changes.
COPY . ./
# Run the image.
CMD npm start --silent

.actor/INPUT_SCHEMA.json

{
"title": "CheerioCrawler Template",
"type": "object",
"schemaVersion": 1,
"properties": {
"vatIds": {
"title": "Vat numbers",
"type": "array",
"description": "UK Vat numbers to check.",
"editor": "stringList",
"prefill": ["GB123456789"],
"example": ["GB123456789"]
}
},
"required": ["vatIds"]
}

.actor/README.md

1# UK VAT number checker
2
3Checks if given UK VAT number(s) is valid. Uses `https://www.tax.service.gov.uk/check-vat-number/` to perform the check.
4
5# Input
6
7Array of UK VAT numbers.
8
9# Output
10
11Array of check results.
12
13```
14[
15 {
16 "vatId": "GB220430231",
17 "isValid": true,
18 "businessName": "TESCO PLC",
19 "checkedAt": "2025-03-28T13:33:56.267Z",
20 "address": "CIRRUS A SHIRE PARK KESTREL WAY WELWYN GARDEN CITY AL7 1GA GB"
21 }
22]
23```

.actor/actor.json

{
"actorSpecification": 1,
"name": "uk-vat-id-check",
"title": "UK VAT number checker",
"description": "",
"version": "0.0",
"input": "./INPUT_SCHEMA.json",
"readme": "./README.md",
"dockefile": "./Dockerfile",
"minMemoryMbytes": 256,
"maxMemoryMbytes": 4096,
"storages": {
"dataset": {
"actorSpecification": 1,
"title": "UK VAT number check result",
"views": {
"results": {
"title": "UK VAT number check result",
"transformation": {
"fields": [
"vatId",
"isValid",
"businessName",
"checkedAt",
"address"
]
},
"display": {
"component": "table",
"properties": {
"vatId": {
"label": "VAT number",
"format": "string"
},
"isValid": {
"label": "Is valid",
"format": "boolean"
},
"businessName": {
"label": "Business name",
"format": "string"
},
"address": {
"label": "Address",
"format": "string"
},
"checkedAt": {
"label": "Checked at",
"format": "datetime"
}
}
}
}
}
}
}
}

src/main.js

1/**
2 * This template is a production ready boilerplate for developing with `CheerioCrawler`.
3 * Use this to bootstrap your projects using the most up-to-date code.
4 * If you're looking for examples or want to learn more, see README.
5 */
6
7// For more information, see https://sdk.apify.com
8import { Actor } from 'apify';
9// For more information, see https://crawlee.dev
10import { BasicCrawler } from 'crawlee';
11import { gotScraping } from 'got-scraping';
12import cheerio from 'cheerio';
13import { FingerprintGenerator } from 'fingerprint-generator';
14
15// UK (Standard = 9 digits), (Branches = 12 digits), (Government = GD + 3 digits), (Health authority = HA + 3 digits), "XI" prefix is used for Northern Ireland!
16const UK_VAT_NUMBER_REGEX = /^(GB|XI)?([0-9]{9}([0-9]{3})?$|(GD|HA)[0-9]{3}$)/;
17
18// Initialize the Apify SDK
19await Actor.init();
20const input = await Actor.getInput();
21
22const vatIdsToValidate = [];
23for (const vatId of input.vatIds) {
24 // UK VAT number is 9 or 12 numbers. Could be prefixed by GB.
25 if (!vatId.match(UK_VAT_NUMBER_REGEX)) {
26 await Actor.pushData({ vatId, isValid: false, checkedAt: new Date(), address: null, businessName: null, status: 'error' });
27 } else {
28 vatIdsToValidate.push(vatId);
29 }
30}
31
32const startUrls = vatIdsToValidate.map((vatId) => ({
33 url: 'https://www.tax.service.gov.uk/check-vat-number/enter-vat-details',
34 uniqueKey: vatId,
35 userData: { vatId, label: 'start' },
36}));
37
38const fingerprintGenerator = new FingerprintGenerator();
39
40const proxyConfiguration = await Actor.createProxyConfiguration();
41
42const crawler = new BasicCrawler({
43 useSessionPool: true,
44 async requestHandler({ request, session, log }) {
45 const browserFingerprintWithHeaders = fingerprintGenerator.getFingerprint({
46 devices: ['desktop'],
47 browsers: ['chrome'],
48 });
49
50 const proxyUrl = await proxyConfiguration.newUrl();
51 const response = await gotScraping({
52 url: 'https://www.tax.service.gov.uk/check-vat-number/enter-vat-details',
53 method: 'GET',
54 proxyUrl,
55 http2: true,
56 headers: {
57 ...browserFingerprintWithHeaders.headers,
58 },
59 });
60
61 let $ = cheerio.load(response.body);
62 const token = $('input[name="csrfToken"]').val();
63 const payload = {
64 csrfToken: token,
65 target: request.userData.vatId,
66 requester: '',
67 };
68 await session.setCookiesFromResponse(response);
69
70 let postResponse;
71 // We have to catch the redirect as the response contains set-cookie headers
72 // And gotScraping won't use these cookies in redirect request
73 // That's why this hack with maxRedirects: 0
74 try {
75 await gotScraping({
76 url: 'https://www.tax.service.gov.uk/check-vat-number/enter-vat-details',
77 form: payload,
78 method: 'POST',
79 proxyUrl,
80 http2: true,
81 maxRedirects: 0,
82 headers: {
83 ...browserFingerprintWithHeaders.headers,
84 Cookie: session.getCookieString('https://www.tax.service.gov.uk'),
85 'Content-Type': 'application/x-www-form-urlencoded',
86 Referer: 'https://www.tax.service.gov.uk/check-vat-number/enter-vat-details'
87 },
88 });
89 } catch (err) {
90 if (!err.response.headers.location) throw err;
91 postResponse = err.response;
92 }
93
94 if (!postResponse) throw new Error('VAT number check failed: Post to gov.uk failed');
95 await session.setCookiesFromResponse(postResponse);
96
97 log.info('Fetching result', { location: postResponse.headers.location });
98 const checkResponse = await gotScraping({
99 url: `https://www.tax.service.gov.uk${postResponse.headers.location}`,
100 proxyUrl,
101 http2: true,
102 maxRedirects: 1,
103 headers: {
104 ...browserFingerprintWithHeaders.headers,
105 Cookie: session.getCookieString('https://www.tax.service.gov.uk'),
106 },
107 });
108 $ = cheerio.load(checkResponse.body);
109
110 const isInvalid = $('h1.govuk-heading-xl').text().toLowerCase().includes('invalid uk vat number');
111 const isValid = $('h1.govuk-panel__title').text().toLowerCase().includes('valid uk vat number');
112 if (!isInvalid && !isValid) {
113 await Actor.setValue(`debug-${request.userData.vatId}`, checkResponse.body, { contentType: 'text/html'});
114 throw new Error('Could not check VAT number: Unknown response');
115 }
116
117 const result = {
118 isValid,
119 checkedAt: new Date(),
120 vatId: request.userData.vatId,
121 businessName: null,
122 address: null,
123 status: 'success',
124 }
125
126 const subHeaders = $('h3.govuk-heading-s');
127 subHeaders.each((i, el) => {
128 if ($(el).text().includes('Registered business name')) {
129 result.businessName = $(el).next().text().trim();
130 }
131 if ($(el).text().includes('Registered business address')) {
132 result.address = $(el).next().text().trim().replace(/\s+/g, ' ');
133 }
134 });
135
136 await Actor.pushData(result);
137
138 },
139 async failedRequestHandler({ request }, err) {
140 await Actor.pushData({
141 isValid: null,
142 status: 'error',
143 checkedAt: new Date(),
144 vatId: request.userData.vatId,
145 businessName: null,
146 address: null,
147 error: err.message,
148 });
149 }
150});
151
152await crawler.run(startUrls);
153
154// Exit successfully
155await Actor.exit();

src/routes.js

1import { Dataset } from 'crawlee';
2
3export const router = createCheerioRouter();
4
5router.addDefaultHandler(async ({ enqueueLinks, log }) => {
6 log.info(`enqueueing new URLs`);
7 await enqueueLinks({
8 globs: ['https://apify.com/*'],
9 label: 'detail',
10 });
11});
12
13router.addHandler('vatId', async ({ request, $, log }) => {
14 const title = $('title').text();
15 log.info(`${title}`, { url: request.loadedUrl });
16
17 await Dataset.pushData({
18 url: request.loadedUrl,
19 title,
20 });
21});

.dockerignore

# configurations
.idea
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.eslintrc

{
"extends": "@apify",
"root": true
}

.gitignore

# This file tells Git which files shouldn't be added to source control
.DS_Store
.idea
dist
node_modules
apify_storage
storage

package.json

{
"name": "crawlee-cheerio-javascript",
"version": "0.0.1",
"type": "module",
"description": "This is a boilerplate of an Apify actor.",
"engines": {
"node": ">=16.0.0"
},
"dependencies": {
"apify": "^3.0.0",
"crawlee": "^3.0.0",
"got-scraping": "^3.2.0",
"cheerio": "1.0.0-rc.12",
"fingerprint-generator": "^2.1.10"
},
"devDependencies": {
"@apify/eslint-config": "^0.3.1",
"eslint": "^8.20.0"
},
"scripts": {
"start": "node src/main.js",
"lint": "eslint ./src --ext .js,.jsx",
"lint:fix": "eslint ./src --ext .js,.jsx --fix",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}