Expired Domains Scraper
Try for free
No credit card required
View all Actors
Expired Domains Scraper
martin1080p/expired-domains-scraper
Try for free
No credit card required
The Expired Domains Scraper automates finding valuable expired domains from expireddomains.com, offering filters and sorting by SEO metrics and auction details for efficient domain acquisition.
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32 && npm install --omit=dev --omit=optional \
33 && echo "Installed NPM packages:" \
34 && (npm list --omit=dev --all || true) \
35 && echo "Node.js version:" \
36 && node --version \
37 && echo "NPM version:" \
38 && npm --version \
39 && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "expired-domains-scraper",
4 "title": "Expired Domains Scraper",
5 "description": "Expired Domains Scraper is a robust tool designed to automate the process of identifying and extracting information about expired domain names. This tool is ideal for digital marketers, domain investors, SEO specialists, and businesses looking to acquire valuable domains that have lapsed in registration. By leveraging this scraper, users can gain a competitive edge in acquiring high-quality domains that may still have significant SEO value, backlinks, or brand potential.",
6 "version": "0.0",
7 "meta": {
8 "templateId": "ts-crawlee-cheerio"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile",
12 "storages": {
13 "dataset": {
14 "actorSpecification": 1,
15 "fields": {},
16 "views": {
17 "overview": {
18 "title": "Overview",
19 "transformation": {
20 "fields": [
21 "name",
22 "link",
23 "enddate",
24 "tld",
25 "godaddyPrice",
26 "godaddyBid",
27 "auctionType",
28 "rdapRegistration",
29 "waybackFirstYear",
30 "waybackCount",
31 "searchVolumeVolume",
32 "searchVolumeCpc",
33 "googleStatusIsBlocked",
34 "googleStatusIsFaillisted",
35 "googleStatusIsAdult",
36 "mozDomainAuthority",
37 "mozPageAuthority",
38 "majesticCitationflow",
39 "majesticTrustflow",
40 "majesticExtbacklinks",
41 "ownranksBrandrank",
42 "ownranksTldsaturation",
43 "ownranksRadiotest"
44 ]
45 },
46 "display": {
47 "component": "table",
48 "properties": {
49 "name": {
50 "label": "Name",
51 "format": "text"
52 },
53 "link": {
54 "label": "Link",
55 "format": "link"
56 },
57 "enddate": {
58 "label": "End date",
59 "format": "date"
60 },
61 "tld": {
62 "label": "TLD",
63 "format": "text"
64 },
65 "godaddyPrice": {
66 "label": "GoDaddy price",
67 "format": "number"
68 },
69 "godaddyBid": {
70 "label": "N# of bids",
71 "format": "number"
72 },
73 "auctionType": {
74 "label": "Auction type",
75 "format": "text"
76 },
77 "rdapRegistration": {
78 "label": "Whois first registration",
79 "format": "number"
80 },
81 "waybackFirstYear": {
82 "label": "Wayback first year",
83 "format": "number"
84 },
85 "waybackCount": {
86 "label": "Wayback count",
87 "format": "number"
88 },
89 "searchVolumeVolume": {
90 "label": "Search volume",
91 "format": "number"
92 },
93 "searchVolumeCpc": {
94 "label": "Search volume CPC",
95 "format": "number"
96 },
97 "googleStatusIsBlocked": {
98 "label": "Google status is blocked",
99 "format": "boolean"
100 },
101 "googleStatusIsFaillisted": {
102 "label": "Google status is faillisted",
103 "format": "boolean"
104 },
105 "googleStatusIsAdult": {
106 "label": "Google status is adult",
107 "format": "boolean"
108 },
109 "mozDomainAuthority": {
110 "label": "Moz domain authority",
111 "format": "number"
112 },
113 "mozPageAuthority": {
114 "label": "Moz page authority",
115 "format": "number"
116 },
117 "majesticCitationflow": {
118 "label": "Majestic citation flow",
119 "format": "number"
120 },
121 "majesticTrustflow": {
122 "label": "Majestic trust flow",
123 "format": "number"
124 },
125 "majesticExtbacklinks": {
126 "label": "Majestic external backlinks",
127 "format": "number"
128 },
129 "ownranksBrandrank": {
130 "label": "Ownranks brand rank",
131 "format": "number"
132 },
133 "ownranksTldsaturation": {
134 "label": "Ownranks TLD saturation",
135 "format": "number"
136 },
137 "ownranksRadiotest": {
138 "label": "Ownranks radio test",
139 "format": "number"
140 }
141 }
142 }
143 }
144 }
145 }
146 }
147}
.actor/input_schema.json
1{
2 "title": "Expired Domains Scraper",
3 "type": "object",
4 "schemaVersion": 1,
5 "required": ["numDomains", "sortBy", "sortDir", "filterLengthMin", "filterLengthMax", "filterExpDate"],
6 "properties": {
7 "numDomains": {
8 "title": "Number of domains",
9 "description": "Total number of domains to be fetched.",
10 "type": "integer",
11 "editor": "number",
12 "default": 50,
13 "minimum": 25
14 },
15 "sortBy": {
16 "sectionCaption": "Sort settings",
17 "sectionDescription": "Domains to be fetched in specific order.",
18 "title": "Sort by",
19 "description": "Select one of the options.",
20 "type": "string",
21 "enum": ["name", "link", "enddate", "tld", "godaddy_price", "auction_type", "godaddy_bid", "ai_price_pred_price", "rdap_registration", "wayback_first_year", "wayback_count", "search_volume_volume", "search_volume_cpc", "google_status_is_blocked", "google_status_is_faillisted", "google_status_is_adult", "fb_shares", "fb_react", "majestic_extbacklinks", "majestic_citationflow", "majestic_trustflow", "moz_domain_authority", "moz_page_authority", "ownranks_brandrank", "ownranks_tldsaturation", "ownranks_radiotest"],
22 "enumTitles": ["Name", "Link", "Expiration date", "TLD", "Current price", "Auction type", "Godaddy bid count", "Est. auction price", "WHOIS birth year", "Internet archive birth year", "Internet archive count", "Search volume", "Cost per click", "Google blocked", "Google Faillisted", "Google adult", "Facebook shares", "Facebook react", "External backlinks", "Citation flow", "Trust flow", "MOZ domain authority", "MOZ page authority", "BrandRank score", "TLD Saturation", "RadioTest"],
23 "default": "name"
24 },
25 "sortDir": {
26 "title": "Sort direction",
27 "description": "Select one of the options.",
28 "type": "string",
29 "enum": ["asc", "desc"],
30 "enumTitles": ["ASC", "DESC"],
31 "default": "asc"
32 },
33 "filterTldCom": {
34 "sectionCaption": "Filter settings",
35 "sectionDescription": "Filter fetched domains by specific properties.",
36 "groupCaption": "Filter by TLD",
37 "groupDescription": "Specific TLDs to be fetched.",
38 "title": "com",
39 "description": "Domains ending with .com.",
40 "type": "boolean",
41 "default": true
42 },
43 "filterTldNet": {
44 "title": "net",
45 "description": "Domains ending with .net.",
46 "type": "boolean",
47 "default": true
48 },
49 "filterTldOrg": {
50 "title": "org",
51 "description": "Domains ending with .org.",
52 "type": "boolean",
53 "default": true
54 },
55 "filterTldIo": {
56 "title": "org",
57 "description": "Domains ending with .io.",
58 "type": "boolean",
59 "default": true
60 },
61 "filterTldUk": {
62 "title": "uk",
63 "description": "Domains ending with .uk.",
64 "type": "boolean",
65 "default": true
66 },
67 "filterTldN": {
68 "title": "nTLD",
69 "description": "Domains ending with .agency, .bio, etc.",
70 "type": "boolean",
71 "default": true
72 },
73 "filterTldOther": {
74 "title": "other TLDs",
75 "description": "Domains ending with all other extensions.",
76 "type": "boolean",
77 "default": true
78 },
79 "filterLengthMin": {
80 "title": "Length min",
81 "description": "Domains with minimum length of specified number.",
82 "type": "integer",
83 "default": 1,
84 "minimum": 1,
85 "maximum": 255,
86 "nullable": true
87 },
88 "filterLengthMax": {
89 "title": "Length max",
90 "description": "Domains with maximum length of specified number.",
91 "type": "integer",
92 "default": 255,
93 "minimum": 1,
94 "maximum": 255,
95 "nullable": true
96 },
97 "filterCharsHyp": {
98 "groupCaption": "Filter by used characters",
99 "groupDescription": "Domains excluding certain characters.",
100 "title": "No hyphens",
101 "description": "Domains excluding hyphens in their name.",
102 "type": "boolean",
103 "default": false
104 },
105 "filterCharsNum": {
106 "title": "No numbers",
107 "description": "Domains excluding numbers in their name.",
108 "type": "boolean",
109 "default": false
110 },
111 "filterCharsLet": {
112 "title": "No letters",
113 "description": "Domains excluding letters in their name.",
114 "type": "boolean",
115 "default": false
116 },
117 "filterGglNA": {
118 "groupCaption": "Filter by Google status",
119 "groupDescription": "Domains listed in certain google lists.",
120 "title": "Not adult",
121 "description": "Domains not marked as adult content.",
122 "type": "boolean",
123 "default": true
124 },
125 "filterGglNB": {
126 "title": "Not blocked",
127 "description": "Domains not marked as blocked.",
128 "type": "boolean",
129 "default": true
130 },
131 "filterGglA": {
132 "title": "Adult",
133 "description": "Domains marked as adult content.",
134 "type": "boolean",
135 "default": false
136 },
137 "filterGglB": {
138 "title": "Blocked",
139 "description": "Domains marked as blocked.",
140 "type": "boolean",
141 "default": false
142 },
143 "filterAucTypAuc": {
144 "groupCaption": "Filter by auction type",
145 "groupDescription": "Domains in certain auction processes.",
146 "title": "Godaddy auctions",
147 "description": "Domains with listed Godaddy auction.",
148 "type": "boolean",
149 "default": true
150 },
151 "filterAucTypClo": {
152 "title": "Godaddy closeouts",
153 "description": "Domains in the middle of closeout process.",
154 "type": "boolean",
155 "default": true
156 },
157 "filterAucTypDel": {
158 "title": "Pending delete",
159 "description": "Domains awaiting pending deletion.",
160 "type": "boolean",
161 "default": true
162 },
163 "filterExpDate": {
164 "title": "Expiration date",
165 "description": "Domains with selected date of expiration. Allowed date range is from today to 4 days after today. For example, if today is 2024-06-24, the allowed range is 2024-06-24 to 2024-06-28.",
166 "type": "string",
167 "editor": "datepicker",
168 "default": "2024-01-01",
169 "prefill": "2024-01-01"
170 }
171 }
172}
.vscode/launch.json
1{
2 "version": "0.2.0",
3 "configurations": [
4 {
5 "type": "node",
6 "request": "launch",
7 "name": "Launch via apify",
8 "runtimeExecutable": "apify",
9 "runtimeArgs": [
10 "run",
11 "--purge"
12 ],
13 "skipFiles": [
14 "<node_internals>/**"
15 ],
16 }
17 ]
18}
src/builders/domainBuilder.ts
1import { DomainParser } from '../parsers/domainParser.js';
2import { Domain } from '../types/domain.js';
3
4export class DomainBuilder {
5 domainParser: DomainParser;
6 domain: Domain = {} as Domain;
7
8 constructor(domainParser: DomainParser) {
9 this.domainParser = domainParser;
10 }
11
12 addName(): DomainBuilder {
13 this.domain.name = this.domainParser.name;
14 return this;
15 }
16
17 addLink(): DomainBuilder {
18 this.domain.link = this.domainParser.link;
19 return this;
20 }
21
22 addEnddate(): DomainBuilder {
23 this.domain.enddate = this.domainParser.enddate;
24 return this;
25 }
26
27 addTld(): DomainBuilder {
28 this.domain.tld = this.domainParser.tld;
29 return this;
30 }
31
32 addGodaddyPrice(): DomainBuilder {
33 this.domain.godaddyPrice = this.domainParser.godaddyPrice;
34 return this;
35 }
36
37 addGodaddyBid(): DomainBuilder {
38 this.domain.godaddyBid = this.domainParser.godaddyBid;
39 return this;
40 }
41
42 addAuctionType(): DomainBuilder {
43 this.domain.auctionType = this.domainParser.auctionType;
44 return this;
45 }
46
47 addAiPricePredPrice(): DomainBuilder {
48 this.domain.aiPricePredPrice = this.domainParser.aiPricePredPrice;
49 return this;
50 }
51
52 addRdapRegistration(): DomainBuilder {
53 this.domain.rdapRegistration = this.domainParser.rdapRegistration;
54 return this;
55 }
56
57 addWaybackFirstYear(): DomainBuilder {
58 this.domain.waybackFirstYear = this.domainParser.waybackFirstYear;
59 return this;
60 }
61
62 addWaybackCount(): DomainBuilder {
63 this.domain.waybackCount = this.domainParser.waybackCount;
64 return this;
65 }
66
67 addSearchVolumeVolume(): DomainBuilder {
68 this.domain.searchVolumeVolume = this.domainParser.searchVolumeVolume;
69 return this;
70 }
71
72 addSearchVolumeCpc(): DomainBuilder {
73 this.domain.searchVolumeCpc = this.domainParser.searchVolumeCpc;
74 return this;
75 }
76
77 addGoogleStatusIsBlocked(): DomainBuilder {
78 this.domain.googleStatusIsBlocked = this.domainParser.googleStatusIsBlocked;
79 return this;
80 }
81
82 addGoogleStatusIsFaillisted(): DomainBuilder {
83 this.domain.googleStatusIsFaillisted = this.domainParser.googleStatusIsFaillisted;
84 return this;
85 }
86
87 addGoogleStatusIsAdult(): DomainBuilder {
88 this.domain.googleStatusIsAdult = this.domainParser.googleStatusIsAdult;
89 return this;
90 }
91
92 addFbShares(): DomainBuilder {
93 this.domain.fbShares = this.domainParser.fbShares;
94 return this;
95 }
96
97 addFbReact(): DomainBuilder {
98 this.domain.fbReact = this.domainParser.fbReact;
99 return this;
100 }
101
102 addMajesticExtbacklinks(): DomainBuilder {
103 this.domain.majesticExtbacklinks = this.domainParser.majesticExtbacklinks;
104 return this;
105 }
106
107 addMajesticCitationflow(): DomainBuilder {
108 this.domain.majesticCitationflow = this.domainParser.majesticCitationflow;
109 return this;
110 }
111
112 addMajesticTrustflow(): DomainBuilder {
113 this.domain.majesticTrustflow = this.domainParser.majesticTrustflow;
114 return this;
115 }
116
117 addMozDomainAuthority(): DomainBuilder {
118 this.domain.mozDomainAuthority = this.domainParser.mozDomainAuthority;
119 return this;
120 }
121
122 addMozPageAuthority(): DomainBuilder {
123 this.domain.mozPageAuthority = this.domainParser.mozPageAuthority;
124 return this;
125 }
126
127 addOwnranksBrandrank(): DomainBuilder {
128 this.domain.ownranksBrandrank = this.domainParser.ownranksBrandrank;
129 return this;
130 }
131
132 addOwnranksTldsaturation(): DomainBuilder {
133 this.domain.ownranksTldsaturation = this.domainParser.ownranksTldsaturation;
134 return this;
135 }
136
137 addOwnranksRadiotest(): DomainBuilder {
138 this.domain.ownranksRadiotest = this.domainParser.ownranksRadiotest;
139 return this;
140 }
141
142 build(): Domain {
143 return this.domain;
144 }
145}
src/builders/requestBuilder.ts
1import { log, RequestOptions } from 'crawlee';
2import { config } from '../config.js';
3import { Input } from '../types/input.js';
4
5export class RequestBuilder {
6 private filters: string;
7 private sort: string;
8 private length: string;
9 private page: number;
10 private id: number;
11 private input: Input;
12
13 private includedColumns: string[] = [
14 'link',
15 'enddate',
16 'tld',
17 'godaddy_price',
18 'godaddy_bid',
19 'auction_type',
20 'ai_price_pred_price',
21 'rdap_registration',
22 'wayback_first_year',
23 'wayback_count',
24 'search_volume_volume',
25 'search_volume_cpc',
26 'google_status_is_blocked',
27 'google_status_is_faillisted',
28 'google_status_is_adult',
29 'fb_shares',
30 'fb_react',
31 'moz_domain_authority',
32 'moz_page_authority',
33 'majestic_citationflow',
34 'majestic_trustflow',
35 'majestic_extbacklinks',
36 'ownranks_brandrank',
37 'ownranks_tldsaturation',
38 'ownranks_radiotest',
39 ];
40
41 constructor(input: Input, page?: number) {
42 this.filters = '';
43 this.sort = '';
44 this.length = '';
45 this.page = page ?? 0;
46 this.id = 0;
47 this.input = input;
48 }
49
50 addTldFilters(): RequestBuilder {
51 if (this.input.filterTldCom) this.filters += 'com,';
52 if (this.input.filterTldNet) this.filters += 'net,';
53 if (this.input.filterTldOrg) this.filters += 'org,';
54 if (this.input.filterTldIo) this.filters += 'io,';
55 if (this.input.filterTldUk) this.filters += 'uk,';
56 if (this.input.filterTldN) this.filters += 'n,';
57 if (this.input.filterTldOther) this.filters += 'other,';
58
59 return this;
60 }
61
62 addLengthFilters(): RequestBuilder {
63 if (this.input.filterLengthMin > this.input.filterLengthMax) throw new Error('Min length cannot be greater than max length');
64 this.length += `min[domain_length]=${this.input.filterLengthMin}&max[domain_length]=${this.input.filterLengthMax}`;
65
66 return this;
67 }
68
69 addCharasterFilters(): RequestBuilder {
70 if (this.input.filterCharsHyp) this.filters += 'no-dashes,';
71 if (this.input.filterCharsNum) this.filters += 'no-numbers,';
72 if (this.input.filterCharsLet) this.filters += 'no-letters,';
73
74 return this;
75 }
76
77 addGoogleFilters(): RequestBuilder {
78 if (this.input.filterGglNA) this.filters += 'not-adult,';
79 if (this.input.filterGglNB) this.filters += 'not-faillisted-or-blocked,';
80 if (this.input.filterGglA) this.filters += 'Adult,';
81 if (this.input.filterGglB) this.filters += 'faillisted-or-blocked,';
82
83 return this;
84 }
85
86 addAuctionFilters(): RequestBuilder {
87 if (this.input.filterAucTypAuc) this.filters += 'godaddy-expiring,';
88 if (this.input.filterAucTypClo) this.filters += 'godaddy-closeouts,';
89 if (this.input.filterAucTypDel) this.filters += 'pending-delete,';
90
91 return this;
92 }
93
94 addExpDateFilter(): RequestBuilder {
95 const dateObj = new Date(this.input.filterExpDate);
96 const todayObj = new Date();
97 todayObj.setHours(0, 0, 0, 0);
98
99 const diffTime = dateObj.getTime() - todayObj.getTime();
100
101 let diffDays = Math.floor(diffTime / (1000 * 60 * 60 * 24));
102
103 if (diffTime < 0) {
104 log.warning('Selected date is in the past, setting it to today.');
105 diffDays = 0;
106 }
107
108 switch (diffDays) {
109 case 0:
110 this.filters += 'today,';
111 break;
112 case 1:
113 this.filters += 'tomorrow,';
114 break;
115 case 2:
116 this.filters += 'today2,';
117 break;
118 case 3:
119 this.filters += 'today3,';
120 break;
121 case 4:
122 this.filters += 'today4,';
123 break;
124 default:
125 log.warning('Date is outside of allowed range of 4 days from today. Setting it to today.');
126 this.filters += 'today,';
127 break;
128 }
129
130 return this;
131 }
132
133 addSort(): RequestBuilder {
134 this.sort = `${this.input.sortDir === 'asc' ? '' : '-'}${this.input.sortBy}`;
135
136 return this;
137 }
138
139 build(): RequestOptions {
140 const columns = this.includedColumns.map((col) => `columns[]=${col}`).join('&');
141 const payload = `filters=${this.filters}&sort=${this.sort}&page=${this.page}&${columns}&id=${this.id}`;
142
143 return {
144 url: config.fetchUrl,
145 method: 'POST',
146 uniqueKey: `page-${this.page}`,
147 headers: {
148 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
149 origin: config.originUrl,
150 },
151 payload,
152 };
153 }
154}
src/helpers/helper.ts
1import { RequestOptions } from 'crawlee';
2import { RequestBuilder } from '../builders/requestBuilder.js';
3import { config } from '../config.js';
4import { Input } from '../types/input.js';
5
6export async function generateRequests(input: Input): Promise<RequestOptions[]> {
7 const requests: RequestOptions[] = [];
8
9 const pages = Math.ceil(input.numDomains / config.domainsPerPage);
10
11 for (let i = 0; i < pages; i++) {
12 const requestBuilder = new RequestBuilder(input, i);
13
14 const request = requestBuilder
15 .addTldFilters()
16 .addLengthFilters()
17 .addCharasterFilters()
18 .addGoogleFilters()
19 .addAuctionFilters()
20 .addExpDateFilter()
21 .addSort()
22 .build();
23
24 requests.push(request);
25 }
26
27 return requests;
28}
29
30export function convertToNumber(str: string) {
31 if (str.startsWith('$')) {
32 str = str.slice(1);
33 }
34
35 let number = parseFloat(str);
36
37 if (str.toLowerCase().endsWith('k')) {
38 number *= 1000;
39 }
40
41 if (str.toLowerCase().endsWith('m')) {
42 number *= 1000000;
43 }
44
45 return number;
46}
src/parsers/domainParser.ts
1import { Cheerio, Element } from 'cheerio';
2import { config } from '../config.js';
3import { convertToNumber } from '../helpers/helper.js';
4
5export class DomainParser {
6 element: Cheerio<Element>;
7
8 constructor(element: Cheerio<Element>) {
9 this.element = element;
10 }
11
12 get name(): string {
13 return this.element.find('td:nth-child(1)').text();
14 }
15
16 get link(): string | undefined {
17 const ref = this.element.find('td:nth-child(2) > a').attr('href');
18 return ref ? `${config.originUrl}${ref}` : undefined;
19 }
20
21 get enddate(): Date {
22 return new Date(this.element.find('td:nth-child(3)').text());
23 }
24
25 get tld(): string {
26 return this.element.find('td:nth-child(4)').text();
27 }
28
29 get godaddyPrice(): number | undefined {
30 const price = this.element.find('td:nth-child(5)').text();
31 return price !== '' ? convertToNumber(price) : undefined;
32 }
33
34 get godaddyBid(): number | undefined {
35 const bid = this.element.find('td:nth-child(6)').text();
36 return bid !== '' ? convertToNumber(bid) : undefined;
37 }
38
39 get auctionType(): string | undefined {
40 const type = this.element.find('td:nth-child(7)').text();
41 return type !== '' ? type : undefined;
42 }
43
44 get aiPricePredPrice(): number | undefined {
45 const price = this.element.find('td:nth-child(8)').text();
46 return price !== '' ? convertToNumber(price) : undefined;
47 }
48
49 get rdapRegistration(): number | undefined {
50 const year = this.element.find('td:nth-child(9)').text();
51 return year !== '' ? convertToNumber(year) : undefined;
52 }
53
54 get waybackFirstYear(): number | undefined {
55 const year = this.element.find('td:nth-child(10)').text();
56 return year !== '' ? convertToNumber(year) : undefined;
57 }
58
59 get waybackCount(): number | undefined {
60 const count = this.element.find('td:nth-child(11)').text();
61 return count !== '' ? convertToNumber(count) : undefined;
62 }
63
64 get searchVolumeVolume(): number | undefined {
65 const volume = this.element.find('td:nth-child(12)').text();
66 return volume !== '' ? convertToNumber(volume) : undefined;
67 }
68
69 get searchVolumeCpc(): number | undefined {
70 const cpc = this.element.find('td:nth-child(13)').text();
71 return cpc !== '' ? convertToNumber(cpc) : undefined;
72 }
73
74 get googleStatusIsBlocked(): boolean {
75 const status = this.element.find('td:nth-child(14)').text();
76 return status === '1';
77 }
78
79 get googleStatusIsFaillisted(): boolean {
80 const status = this.element.find('td:nth-child(15)').text();
81 return status === '1';
82 }
83
84 get googleStatusIsAdult(): boolean {
85 const status = this.element.find('td:nth-child(16)').text();
86 return status === '1';
87 }
88
89 get fbShares(): number | undefined {
90 const shares = this.element.find('td:nth-child(17)').text();
91 return shares !== '' ? convertToNumber(shares) : undefined;
92 }
93
94 get fbReact(): number | undefined {
95 const react = this.element.find('td:nth-child(18)').text();
96 return react !== '' ? convertToNumber(react) : undefined;
97 }
98
99 get mozDomainAuthority(): number | undefined {
100 const authority = this.element.find('td:nth-child(19)').text();
101 return authority !== '' ? convertToNumber(authority) : undefined;
102 }
103
104 get mozPageAuthority(): number | undefined {
105 const authority = this.element.find('td:nth-child(20)').text();
106 return authority !== '' ? convertToNumber(authority) : undefined;
107 }
108
109 get majesticCitationflow(): number | undefined {
110 const flow = this.element.find('td:nth-child(21)').text();
111 return flow !== '' ? convertToNumber(flow) : undefined;
112 }
113
114 get majesticTrustflow(): number | undefined {
115 const flow = this.element.find('td:nth-child(22)').text();
116 return flow !== '' ? convertToNumber(flow) : undefined;
117 }
118
119 get majesticExtbacklinks(): number | undefined {
120 const links = this.element.find('td:nth-child(23)').text();
121 return links !== '' ? convertToNumber(links) : undefined;
122 }
123
124 get ownranksBrandrank(): number | undefined {
125 const rank = this.element.find('td:nth-child(24)').text();
126 return rank !== '' ? convertToNumber(rank) : undefined;
127 }
128
129 get ownranksTldsaturation(): number | undefined {
130 const saturation = this.element.find('td:nth-child(25)').text();
131 return saturation !== '' ? convertToNumber(saturation) : undefined;
132 }
133
134 get ownranksRadiotest(): number | undefined {
135 const test = this.element.find('td:nth-child(26)').text();
136 return test !== '' ? convertToNumber(test) : undefined;
137 }
138}
src/parsers/pageParser.ts
1import { Cheerio, CheerioAPI, Element } from 'cheerio';
2import { DomainParser } from '../parsers/domainParser.js';
3import { DomainBuilder } from '../builders/domainBuilder.js';
4import { Domain } from '../types/domain.js';
5
6export class PageParser {
7 $: CheerioAPI;
8
9 constructor(api: CheerioAPI) {
10 this.$ = api;
11 }
12
13 get domains(): Domain[] {
14 const domains: Domain[] = [];
15
16 this.$('#result_win > table > tbody > tr').each((_i, el) => {
17 const domainElement: Cheerio<Element> = this.$(el);
18 const domainParser = new DomainParser(domainElement);
19 const domainBuilder = new DomainBuilder(domainParser);
20
21 const domain: Domain = domainBuilder
22 .addName()
23 .addLink()
24 .addEnddate()
25 .addTld()
26 .addGodaddyPrice()
27 .addGodaddyBid()
28 .addAuctionType()
29 .addAiPricePredPrice()
30 .addRdapRegistration()
31 .addWaybackFirstYear()
32 .addWaybackCount()
33 .addSearchVolumeVolume()
34 .addSearchVolumeCpc()
35 .addGoogleStatusIsBlocked()
36 .addGoogleStatusIsFaillisted()
37 .addGoogleStatusIsAdult()
38 .addFbShares()
39 .addFbReact()
40 .addMozDomainAuthority()
41 .addMozPageAuthority()
42 .addMajesticCitationflow()
43 .addMajesticTrustflow()
44 .addMajesticExtbacklinks()
45 .addOwnranksBrandrank()
46 .addOwnranksTldsaturation()
47 .addOwnranksRadiotest()
48 .build();
49
50 domains.push(domain);
51 });
52
53 return domains;
54 }
55}
src/types/config.ts
1export type Config = {
2 domainsPerPage: number;
3 originUrl: string;
4 fetchUrl: string;
5}
src/types/domain.ts
1export type Domain = {
2 name: string;
3 link: string | undefined;
4 enddate: Date;
5 tld: string;
6 godaddyPrice: number | undefined;
7 godaddyBid: number | undefined;
8 auctionType: string | undefined;
9 aiPricePredPrice: number | undefined;
10 rdapRegistration: number | undefined;
11 waybackFirstYear: number | undefined;
12 waybackCount: number | undefined;
13 searchVolumeVolume: number | undefined;
14 searchVolumeCpc: number | undefined;
15 googleStatusIsBlocked: boolean | undefined;
16 googleStatusIsFaillisted: boolean | undefined;
17 googleStatusIsAdult: boolean | undefined;
18 fbShares: number | undefined;
19 fbReact: number | undefined;
20 majesticExtbacklinks: number | undefined;
21 majesticCitationflow: number | undefined;
22 majesticTrustflow: number | undefined;
23 mozDomainAuthority: number | undefined;
24 mozPageAuthority: number | undefined;
25 ownranksBrandrank: number | undefined;
26 ownranksTldsaturation: number | undefined;
27 ownranksRadiotest: number | undefined;
28}
src/types/input.ts
1export type Input = {
2 numDomains: number;
3 sortBy: SortByOption;
4 sortDir: SortDirection;
5 filterTldCom: boolean;
6 filterTldNet: boolean;
7 filterTldOrg: boolean;
8 filterTldIo: boolean;
9 filterTldUk: boolean;
10 filterTldN: boolean;
11 filterTldOther: boolean;
12 filterLengthMin: number;
13 filterLengthMax: number;
14 filterCharsHyp: boolean;
15 filterCharsNum: boolean;
16 filterCharsLet: boolean;
17 filterGglNA: boolean;
18 filterGglNB: boolean;
19 filterGglA: boolean;
20 filterGglB: boolean;
21 filterAucTypAuc: boolean;
22 filterAucTypClo: boolean;
23 filterAucTypDel: boolean;
24 filterExpDate: string;
25}
26
27export type SortByOption =
28 | 'name'
29 | 'link'
30 | 'enddate'
31 | 'tld'
32 | 'godaddy_price'
33 | 'auction_type'
34 | 'godaddy_bid'
35 | 'ai_price_pred_price'
36 | 'rdap_registration'
37 | 'wayback_first_year'
38 | 'wayback_count'
39 | 'search_volume_volume'
40 | 'search_volume_cpc'
41 | 'google_status_is_blocked'
42 | 'google_status_is_faillisted'
43 | 'google_status_is_adult'
44 | 'fb_shares'
45 | 'fb_react'
46 | 'majestic_extbacklinks'
47 | 'majestic_citationflow'
48 | 'majestic_trustflow'
49 | 'moz_domain_authority'
50 | 'moz_page_authority'
51 | 'ownranks_brandrank'
52 | 'ownranks_tldsaturation'
53 | 'ownranks_radiotest';
54
55export type SortDirection = 'asc' | 'desc';
src/config.ts
1import { Config } from './types/config.js';
2
3export const config: Config = {
4 domainsPerPage: 25,
5 originUrl: 'https://expireddomains.com',
6 fetchUrl: 'https://expireddomains.com/site/search',
7};
src/main.ts
1import { Actor } from 'apify';
2import { CheerioCrawler, Dataset } from 'crawlee';
3import { load } from 'cheerio';
4import { PageParser } from './parsers/pageParser.js';
5import { generateRequests } from './helpers/helper.js';
6import { Input } from './types/input.js';
7import { Domain } from './types/domain.js';
8
9await Actor.main(async () => {
10 const input = await Actor.getInput<Input>();
11
12 if (!input) throw new Error('Input is missing');
13
14 const proxyConfiguration = await Actor.createProxyConfiguration();
15
16 const crawler = new CheerioCrawler({
17 proxyConfiguration,
18 requestHandler: async ({ body }) => {
19 const bodyJson = JSON.parse(body.toString());
20 const $ = load(bodyJson.result);
21
22 const pageParser = new PageParser($);
23
24 for (const domain of pageParser.domains) {
25 await Dataset.pushData<Domain>(domain);
26 }
27 },
28 });
29
30 const requests = await generateRequests(input);
31
32 await crawler.run(requests);
33});
test/domainParser.test.ts
1import { DomainParser } from '../src/parsers/domainParser.js'
2import { Cheerio, load } from 'cheerio'
3
4const sampleHtml = `
5<div>
6 <table>
7 <tbody>
8 <tr>
9 <td>example.com</td>
10 <td><a href="/link">Link</a></td>
11 <td>2024-06-01</td>
12 <td>com</td>
13 <td>$10</td>
14 <td>$5</td>
15 <td>Auction</td>
16 <td>$20</td>
17 <td>1999</td>
18 <td>2000</td>
19 <td>100</td>
20 <td>5000</td>
21 <td>$2</td>
22 <td>1</td>
23 <td>1</td>
24 <td>1</td>
25 <td>100</td>
26 <td>200</td>
27 <td>50</td>
28 <td>60</td>
29 <td>70</td>
30 <td>80</td>
31 <td>1000</td>
32 <td>90</td>
33 <td>95</td>
34 <td>85</td>
35 </tr>
36 </tbody>
37 </table>
38</div>
39`
40
41describe('DomainParser', () => {
42 let parser: DomainParser
43
44 beforeAll(() => {
45 const $ = load(sampleHtml)
46 const element: Cheerio<any> = $('tr')
47 parser = new DomainParser(element)
48 })
49
50 test('should parse name correctly', () => {
51 expect(parser.name).toBe('example.com')
52 })
53
54 test('should parse link correctly', () => {
55 expect(parser.link).toBe('https://expireddomains.com/link')
56 })
57
58 test('should parse enddate correctly', () => {
59 expect(parser.enddate).toEqual(new Date('2024-06-01'))
60 })
61
62 test('should parse tld correctly', () => {
63 expect(parser.tld).toBe('com')
64 })
65
66 test('should parse godaddyPrice correctly', () => {
67 expect(parser.godaddyPrice).toBe(10)
68 })
69
70 test('should parse godaddyBid correctly', () => {
71 expect(parser.godaddyBid).toBe(5)
72 })
73
74 test('should parse auctionType correctly', () => {
75 expect(parser.auctionType).toBe('Auction')
76 })
77
78 test('should parse aiPricePredPrice correctly', () => {
79 expect(parser.aiPricePredPrice).toBe(20)
80 })
81
82 test('should parse rdapRegistration correctly', () => {
83 expect(parser.rdapRegistration).toBe(1999)
84 })
85
86 test('should parse waybackFirstYear correctly', () => {
87 expect(parser.waybackFirstYear).toBe(2000)
88 })
89
90 test('should parse waybackCount correctly', () => {
91 expect(parser.waybackCount).toBe(100)
92 })
93
94 test('should parse searchVolumeVolume correctly', () => {
95 expect(parser.searchVolumeVolume).toBe(5000)
96 })
97
98 test('should parse searchVolumeCpc correctly', () => {
99 expect(parser.searchVolumeCpc).toBe(2)
100 })
101
102 test('should parse googleStatusIsBlocked correctly', () => {
103 expect(parser.googleStatusIsBlocked).toBe(true)
104 })
105
106 test('should parse googleStatusIsFaillisted correctly', () => {
107 expect(parser.googleStatusIsFaillisted).toBe(true)
108 })
109
110 test('should parse googleStatusIsAdult correctly', () => {
111 expect(parser.googleStatusIsAdult).toBe(true)
112 })
113
114 test('should parse fbShares correctly', () => {
115 expect(parser.fbShares).toBe(100)
116 })
117
118 test('should parse fbReact correctly', () => {
119 expect(parser.fbReact).toBe(200)
120 })
121
122 test('should parse mozDomainAuthority correctly', () => {
123 expect(parser.mozDomainAuthority).toBe(50)
124 })
125
126 test('should parse mozPageAuthority correctly', () => {
127 expect(parser.mozPageAuthority).toBe(60)
128 })
129
130 test('should parse majesticCitationflow correctly', () => {
131 expect(parser.majesticCitationflow).toBe(70)
132 })
133
134 test('should parse majesticTrustflow correctly', () => {
135 expect(parser.majesticTrustflow).toBe(80)
136 })
137
138 test('should parse majesticExtbacklinks correctly', () => {
139 expect(parser.majesticExtbacklinks).toBe(1000)
140 })
141
142 test('should parse ownranksBrandrank correctly', () => {
143 expect(parser.ownranksBrandrank).toBe(90)
144 })
145
146 test('should parse ownranksTldsaturation correctly', () => {
147 expect(parser.ownranksTldsaturation).toBe(95)
148 })
149
150 test('should parse ownranksRadiotest correctly', () => {
151 expect(parser.ownranksRadiotest).toBe(85)
152 })
153})
.dockerignore
1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "root": true,
3 "env": {
4 "browser": true,
5 "es2020": true,
6 "node": true
7 },
8 "extends": [
9 "@apify/eslint-config-ts"
10 ],
11 "parserOptions": {
12 "project": "./tsconfig.json",
13 "ecmaVersion": 2020
14 },
15 "ignorePatterns": [
16 "node_modules",
17 "dist",
18 "**/*.d.ts"
19 ],
20 "rules": {
21 "import/no-extraneous-dependencies": [
22 "error", {
23 "devDependencies": true
24 }
25 ]
26 }
27
28}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv
jest.config.js
1/** @type {import('ts-jest').JestConfigWithTsJest} */
2export default {
3 preset: 'ts-jest',
4 testEnvironment: 'node',
5 moduleFileExtensions: ['ts', 'js'],
6 globals: {
7 'ts-jest': {
8 tsconfig: 'tsconfig.json',
9 },
10 },
11 moduleNameMapper: {
12 "^(\\.\\.?\\/.+)\\.js$": "$1",
13 }
14};
package.json
1{
2 "name": "expired-domains-scraper",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "The Expired Domains Scraper automates finding valuable expired domains from expireddomains.com, offering filters and sorting by SEO metrics and auction details for efficient domain acquisition.",
6 "engines": {
7 "node": ">=18.0.0"
8 },
9 "dependencies": {
10 "apify": "^3.1.10",
11 "cheerio": "^1.0.0-rc.12",
12 "crawlee": "^3.5.4"
13 },
14 "devDependencies": {
15 "@apify/eslint-config-ts": "^0.3.0",
16 "@apify/tsconfig": "^0.1.0",
17 "@types/jest": "^29.5.12",
18 "@typescript-eslint/eslint-plugin": "^6.7.2",
19 "@typescript-eslint/parser": "^6.7.2",
20 "eslint": "^8.50.0",
21 "jest": "^29.7.0",
22 "ts-jest": "^29.1.5",
23 "tsx": "^4.6.2",
24 "typescript": "^5.3.3"
25 },
26 "scripts": {
27 "start": "npm run start:dev",
28 "start:prod": "node dist/main.js",
29 "start:dev": "tsx src/main.ts",
30 "build": "tsc",
31 "lint": "eslint ./src --ext .ts",
32 "lint:fix": "eslint ./src --ext .ts --fix",
33 "test": "jest"
34 },
35 "author": "Martin Fanta",
36 "license": "MIT"
37}
tsconfig.json
1{
2 "extends": "@apify/tsconfig",
3 "compilerOptions": {
4 "esModuleInterop": true,
5 "module": "NodeNext",
6 "moduleResolution": "NodeNext",
7 "target": "ES2022",
8 "outDir": "dist",
9 "noUnusedLocals": false,
10 "skipLibCheck": true,
11 "lib": ["DOM"],
12 "paths": {
13 "src/*": [
14 "./src/*"
15 ],
16 }
17 },
18 "include": [
19 "./src/**/*"
20 ]
21}
Developer
Maintained by Community
Actor Metrics
11 monthly users
-
3 stars
>99% runs succeeded
Created in Jun 2024
Modified 5 months ago
Categories