Expired Domains Scraper avatar

Expired Domains Scraper

Try for free

No credit card required

View all Actors
Expired Domains Scraper

Expired Domains Scraper

martin1080p/expired-domains-scraper
Try for free

No credit card required

The Expired Domains Scraper automates finding valuable expired domains from expireddomains.com, offering filters and sorting by SEO metrics and auction details for efficient domain acquisition.

.actor/Dockerfile

1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node:20 AS builder
5
6# Copy just package.json and package-lock.json
7# to speed up the build using Docker layer cache.
8COPY package*.json ./
9
10# Install all dependencies. Don't audit to speed up the installation.
11RUN npm install --include=dev --audit=false
12
13# Next, copy the source files using the user set
14# in the base image.
15COPY . ./
16
17# Install all dependencies and build the project.
18# Don't audit to speed up the installation.
19RUN npm run build
20
21# Create final image
22FROM apify/actor-node:20
23
24# Copy just package.json and package-lock.json
25# to speed up the build using Docker layer cache.
26COPY package*.json ./
27
28# Install NPM packages, skip optional and development dependencies to
29# keep the image small. Avoid logging too much and print the dependency
30# tree for debugging
31RUN npm --quiet set progress=false \
32    && npm install --omit=dev --omit=optional \
33    && echo "Installed NPM packages:" \
34    && (npm list --omit=dev --all || true) \
35    && echo "Node.js version:" \
36    && node --version \
37    && echo "NPM version:" \
38    && npm --version \
39    && rm -r ~/.npm
40
41# Copy built JS files from builder image
42COPY --from=builder /usr/src/app/dist ./dist
43
44# Next, copy the remaining files and directories with the source code.
45# Since we do this after NPM install, quick build will be really fast
46# for most source file changes.
47COPY . ./
48
49
50# Run the image.
51CMD npm run start:prod --silent

.actor/actor.json

1{
2	"actorSpecification": 1,
3	"name": "expired-domains-scraper",
4	"title": "Expired Domains Scraper",
5	"description": "Expired Domains Scraper is a robust tool designed to automate the process of identifying and extracting information about expired domain names. This tool is ideal for digital marketers, domain investors, SEO specialists, and businesses looking to acquire valuable domains that have lapsed in registration. By leveraging this scraper, users can gain a competitive edge in acquiring high-quality domains that may still have significant SEO value, backlinks, or brand potential.",
6	"version": "0.0",
7	"meta": {
8		"templateId": "ts-crawlee-cheerio"
9	},
10	"input": "./input_schema.json",
11	"dockerfile": "./Dockerfile",
12	"storages": {
13        "dataset": {
14            "actorSpecification": 1,
15            "fields": {},
16            "views": {
17                "overview": {
18                    "title": "Overview",
19                    "transformation": {
20						"fields": [
21							"name",
22							"link",
23							"enddate",
24							"tld",
25							"godaddyPrice",
26							"godaddyBid",
27							"auctionType",
28							"rdapRegistration",
29							"waybackFirstYear",
30							"waybackCount",
31							"searchVolumeVolume",
32							"searchVolumeCpc",
33							"googleStatusIsBlocked",
34							"googleStatusIsFaillisted",
35							"googleStatusIsAdult",
36							"mozDomainAuthority",
37							"mozPageAuthority",
38							"majesticCitationflow",
39							"majesticTrustflow",
40							"majesticExtbacklinks",
41							"ownranksBrandrank",
42							"ownranksTldsaturation",
43							"ownranksRadiotest"
44						]
45					},
46                    "display": {
47						"component": "table",
48						"properties": {
49							"name": {
50								"label": "Name",
51								"format": "text"
52							},
53							"link": {
54								"label": "Link",
55								"format": "link"
56							},
57							"enddate": {
58								"label": "End date",
59								"format": "date"
60							},
61							"tld": {
62								"label": "TLD",
63								"format": "text"
64							},
65							"godaddyPrice": {
66								"label": "GoDaddy price",
67								"format": "number"
68							},
69							"godaddyBid": {
70								"label": "N# of bids",
71								"format": "number"
72							},
73							"auctionType": {
74								"label": "Auction type",
75								"format": "text"
76							},
77							"rdapRegistration": {
78								"label": "Whois first registration",
79								"format": "number"
80							},
81							"waybackFirstYear": {
82								"label": "Wayback first year",
83								"format": "number"
84							},
85							"waybackCount": {
86								"label": "Wayback count",
87								"format": "number"
88							},
89							"searchVolumeVolume": {
90								"label": "Search volume",
91								"format": "number"
92							},
93							"searchVolumeCpc": {
94								"label": "Search volume CPC",
95								"format": "number"
96							},
97							"googleStatusIsBlocked": {
98								"label": "Google status is blocked",
99								"format": "boolean"
100							},
101							"googleStatusIsFaillisted": {
102								"label": "Google status is faillisted",
103								"format": "boolean"
104							},
105							"googleStatusIsAdult": {
106								"label": "Google status is adult",
107								"format": "boolean"
108							},
109							"mozDomainAuthority": {
110								"label": "Moz domain authority",
111								"format": "number"
112							},
113							"mozPageAuthority": {
114								"label": "Moz page authority",
115								"format": "number"
116							},
117							"majesticCitationflow": {
118								"label": "Majestic citation flow",
119								"format": "number"
120							},
121							"majesticTrustflow": {
122								"label": "Majestic trust flow",
123								"format": "number"
124							},
125							"majesticExtbacklinks": {
126								"label": "Majestic external backlinks",
127								"format": "number"
128							},
129							"ownranksBrandrank": {
130								"label": "Ownranks brand rank",
131								"format": "number"
132							},
133							"ownranksTldsaturation": {
134								"label": "Ownranks TLD saturation",
135								"format": "number"
136							},
137							"ownranksRadiotest": {
138								"label": "Ownranks radio test",
139								"format": "number"
140							}
141						}
142					}
143                }
144            }
145        }
146    }
147}

.actor/input_schema.json

1{
2    "title": "Expired Domains Scraper",
3    "type": "object",
4    "schemaVersion": 1,
5    "required": ["numDomains", "sortBy", "sortDir", "filterLengthMin", "filterLengthMax", "filterExpDate"],
6    "properties": {
7        "numDomains": {
8            "title": "Number of domains",
9            "description": "Total number of domains to be fetched.",
10            "type": "integer",
11            "editor": "number",
12            "default": 50,
13            "minimum": 25
14        },
15        "sortBy": {
16            "sectionCaption": "Sort settings",
17            "sectionDescription": "Domains to be fetched in specific order.",
18            "title": "Sort by",
19            "description": "Select one of the options.",
20            "type": "string",
21            "enum": ["name", "link", "enddate", "tld", "godaddy_price", "auction_type", "godaddy_bid", "ai_price_pred_price", "rdap_registration", "wayback_first_year", "wayback_count", "search_volume_volume", "search_volume_cpc", "google_status_is_blocked", "google_status_is_faillisted", "google_status_is_adult", "fb_shares", "fb_react", "majestic_extbacklinks", "majestic_citationflow", "majestic_trustflow", "moz_domain_authority", "moz_page_authority", "ownranks_brandrank", "ownranks_tldsaturation", "ownranks_radiotest"],
22            "enumTitles": ["Name", "Link", "Expiration date", "TLD", "Current price", "Auction type", "Godaddy bid count", "Est. auction price", "WHOIS birth year", "Internet archive birth year", "Internet archive count", "Search volume", "Cost per click", "Google blocked", "Google Faillisted", "Google adult", "Facebook shares", "Facebook react", "External backlinks", "Citation flow", "Trust flow", "MOZ domain authority", "MOZ page authority", "BrandRank score", "TLD Saturation", "RadioTest"],
23            "default": "name"
24        },
25        "sortDir": {
26            "title": "Sort direction",
27            "description": "Select one of the options.",
28            "type": "string",
29            "enum": ["asc", "desc"],
30            "enumTitles": ["ASC", "DESC"],
31            "default": "asc"
32        },
33        "filterTldCom": {
34            "sectionCaption": "Filter settings",
35            "sectionDescription": "Filter fetched domains by specific properties.",
36            "groupCaption": "Filter by TLD",
37            "groupDescription": "Specific TLDs to be fetched.",
38            "title": "com",
39            "description": "Domains ending with .com.",
40            "type": "boolean",
41            "default": true
42        },
43        "filterTldNet": {
44            "title": "net",
45            "description": "Domains ending with .net.",
46            "type": "boolean",
47            "default": true
48        },
49        "filterTldOrg": {
50            "title": "org",
51            "description": "Domains ending with .org.",
52            "type": "boolean",
53            "default": true
54        },
55        "filterTldIo": {
56            "title": "org",
57            "description": "Domains ending with .io.",
58            "type": "boolean",
59            "default": true
60        },
61        "filterTldUk": {
62            "title": "uk",
63            "description": "Domains ending with .uk.",
64            "type": "boolean",
65            "default": true
66        },
67        "filterTldN": {
68            "title": "nTLD",
69            "description": "Domains ending with .agency, .bio, etc.",
70            "type": "boolean",
71            "default": true
72        },
73        "filterTldOther": {
74            "title": "other TLDs",
75            "description": "Domains ending with all other extensions.",
76            "type": "boolean",
77            "default": true
78        },
79        "filterLengthMin": {
80            "title": "Length min",
81            "description": "Domains with minimum length of specified number.",
82            "type": "integer",
83            "default": 1,
84            "minimum": 1,
85            "maximum": 255,
86            "nullable": true
87        },
88        "filterLengthMax": {
89            "title": "Length max",
90            "description": "Domains with maximum length of specified number.",
91            "type": "integer",
92            "default": 255,
93            "minimum": 1,
94            "maximum": 255,
95            "nullable": true
96        },
97        "filterCharsHyp": {
98            "groupCaption": "Filter by used characters",
99            "groupDescription": "Domains excluding certain characters.",
100            "title": "No hyphens",
101            "description": "Domains excluding hyphens in their name.",
102            "type": "boolean",
103            "default": false
104        },
105        "filterCharsNum": {
106            "title": "No numbers",
107            "description": "Domains excluding numbers in their name.",
108            "type": "boolean",
109            "default": false
110        },
111        "filterCharsLet": {
112            "title": "No letters",
113            "description": "Domains excluding letters in their name.",
114            "type": "boolean",
115            "default": false
116        },
117        "filterGglNA": {
118            "groupCaption": "Filter by Google status",
119            "groupDescription": "Domains listed in certain google lists.",
120            "title": "Not adult",
121            "description": "Domains not marked as adult content.",
122            "type": "boolean",
123            "default": true
124        },
125        "filterGglNB": {
126            "title": "Not blocked",
127            "description": "Domains not marked as blocked.",
128            "type": "boolean",
129            "default": true
130        },
131        "filterGglA": {
132            "title": "Adult",
133            "description": "Domains marked as adult content.",
134            "type": "boolean",
135            "default": false
136        },
137        "filterGglB": {
138            "title": "Blocked",
139            "description": "Domains marked as blocked.",
140            "type": "boolean",
141            "default": false
142        },
143        "filterAucTypAuc": {
144            "groupCaption": "Filter by auction type",
145            "groupDescription": "Domains in certain auction processes.",
146            "title": "Godaddy auctions",
147            "description": "Domains with listed Godaddy auction.",
148            "type": "boolean",
149            "default": true
150        },
151        "filterAucTypClo": {
152            "title": "Godaddy closeouts",
153            "description": "Domains in the middle of closeout process.",
154            "type": "boolean",
155            "default": true
156        },
157        "filterAucTypDel": {
158            "title": "Pending delete",
159            "description": "Domains awaiting pending deletion.",
160            "type": "boolean",
161            "default": true
162        },
163        "filterExpDate": {
164            "title": "Expiration date",
165            "description": "Domains with selected date of expiration. Allowed date range is from today to 4 days after today. For example, if today is 2024-06-24, the allowed range is 2024-06-24 to 2024-06-28.",
166            "type": "string",
167            "editor": "datepicker",
168            "default": "2024-01-01",
169            "prefill": "2024-01-01"
170        }
171    }
172}

.vscode/launch.json

1{
2    "version": "0.2.0",
3    "configurations": [
4        {
5            "type": "node",
6            "request": "launch",
7            "name": "Launch via apify",
8            "runtimeExecutable": "apify",
9            "runtimeArgs": [
10              "run",
11              "--purge"
12            ],
13            "skipFiles": [
14              "<node_internals>/**"
15            ],
16          }
17    ]
18}

src/builders/domainBuilder.ts

1import { DomainParser } from '../parsers/domainParser.js';
2import { Domain } from '../types/domain.js';
3
4export class DomainBuilder {
5    domainParser: DomainParser;
6    domain: Domain = {} as Domain;
7
8    constructor(domainParser: DomainParser) {
9        this.domainParser = domainParser;
10    }
11
12    addName(): DomainBuilder {
13        this.domain.name = this.domainParser.name;
14        return this;
15    }
16
17    addLink(): DomainBuilder {
18        this.domain.link = this.domainParser.link;
19        return this;
20    }
21
22    addEnddate(): DomainBuilder {
23        this.domain.enddate = this.domainParser.enddate;
24        return this;
25    }
26
27    addTld(): DomainBuilder {
28        this.domain.tld = this.domainParser.tld;
29        return this;
30    }
31
32    addGodaddyPrice(): DomainBuilder {
33        this.domain.godaddyPrice = this.domainParser.godaddyPrice;
34        return this;
35    }
36
37    addGodaddyBid(): DomainBuilder {
38        this.domain.godaddyBid = this.domainParser.godaddyBid;
39        return this;
40    }
41
42    addAuctionType(): DomainBuilder {
43        this.domain.auctionType = this.domainParser.auctionType;
44        return this;
45    }
46
47    addAiPricePredPrice(): DomainBuilder {
48        this.domain.aiPricePredPrice = this.domainParser.aiPricePredPrice;
49        return this;
50    }
51
52    addRdapRegistration(): DomainBuilder {
53        this.domain.rdapRegistration = this.domainParser.rdapRegistration;
54        return this;
55    }
56
57    addWaybackFirstYear(): DomainBuilder {
58        this.domain.waybackFirstYear = this.domainParser.waybackFirstYear;
59        return this;
60    }
61
62    addWaybackCount(): DomainBuilder {
63        this.domain.waybackCount = this.domainParser.waybackCount;
64        return this;
65    }
66
67    addSearchVolumeVolume(): DomainBuilder {
68        this.domain.searchVolumeVolume = this.domainParser.searchVolumeVolume;
69        return this;
70    }
71
72    addSearchVolumeCpc(): DomainBuilder {
73        this.domain.searchVolumeCpc = this.domainParser.searchVolumeCpc;
74        return this;
75    }
76
77    addGoogleStatusIsBlocked(): DomainBuilder {
78        this.domain.googleStatusIsBlocked = this.domainParser.googleStatusIsBlocked;
79        return this;
80    }
81
82    addGoogleStatusIsFaillisted(): DomainBuilder {
83        this.domain.googleStatusIsFaillisted = this.domainParser.googleStatusIsFaillisted;
84        return this;
85    }
86
87    addGoogleStatusIsAdult(): DomainBuilder {
88        this.domain.googleStatusIsAdult = this.domainParser.googleStatusIsAdult;
89        return this;
90    }
91
92    addFbShares(): DomainBuilder {
93        this.domain.fbShares = this.domainParser.fbShares;
94        return this;
95    }
96
97    addFbReact(): DomainBuilder {
98        this.domain.fbReact = this.domainParser.fbReact;
99        return this;
100    }
101
102    addMajesticExtbacklinks(): DomainBuilder {
103        this.domain.majesticExtbacklinks = this.domainParser.majesticExtbacklinks;
104        return this;
105    }
106
107    addMajesticCitationflow(): DomainBuilder {
108        this.domain.majesticCitationflow = this.domainParser.majesticCitationflow;
109        return this;
110    }
111
112    addMajesticTrustflow(): DomainBuilder {
113        this.domain.majesticTrustflow = this.domainParser.majesticTrustflow;
114        return this;
115    }
116
117    addMozDomainAuthority(): DomainBuilder {
118        this.domain.mozDomainAuthority = this.domainParser.mozDomainAuthority;
119        return this;
120    }
121
122    addMozPageAuthority(): DomainBuilder {
123        this.domain.mozPageAuthority = this.domainParser.mozPageAuthority;
124        return this;
125    }
126
127    addOwnranksBrandrank(): DomainBuilder {
128        this.domain.ownranksBrandrank = this.domainParser.ownranksBrandrank;
129        return this;
130    }
131
132    addOwnranksTldsaturation(): DomainBuilder {
133        this.domain.ownranksTldsaturation = this.domainParser.ownranksTldsaturation;
134        return this;
135    }
136
137    addOwnranksRadiotest(): DomainBuilder {
138        this.domain.ownranksRadiotest = this.domainParser.ownranksRadiotest;
139        return this;
140    }
141
142    build(): Domain {
143        return this.domain;
144    }
145}

src/builders/requestBuilder.ts

1import { log, RequestOptions } from 'crawlee';
2import { config } from '../config.js';
3import { Input } from '../types/input.js';
4
5export class RequestBuilder {
6    private filters: string;
7    private sort: string;
8    private length: string;
9    private page: number;
10    private id: number;
11    private input: Input;
12
13    private includedColumns: string[] = [
14        'link',
15        'enddate',
16        'tld',
17        'godaddy_price',
18        'godaddy_bid',
19        'auction_type',
20        'ai_price_pred_price',
21        'rdap_registration',
22        'wayback_first_year',
23        'wayback_count',
24        'search_volume_volume',
25        'search_volume_cpc',
26        'google_status_is_blocked',
27        'google_status_is_faillisted',
28        'google_status_is_adult',
29        'fb_shares',
30        'fb_react',
31        'moz_domain_authority',
32        'moz_page_authority',
33        'majestic_citationflow',
34        'majestic_trustflow',
35        'majestic_extbacklinks',
36        'ownranks_brandrank',
37        'ownranks_tldsaturation',
38        'ownranks_radiotest',
39    ];
40
41    constructor(input: Input, page?: number) {
42        this.filters = '';
43        this.sort = '';
44        this.length = '';
45        this.page = page ?? 0;
46        this.id = 0;
47        this.input = input;
48    }
49
50    addTldFilters(): RequestBuilder {
51        if (this.input.filterTldCom) this.filters += 'com,';
52        if (this.input.filterTldNet) this.filters += 'net,';
53        if (this.input.filterTldOrg) this.filters += 'org,';
54        if (this.input.filterTldIo) this.filters += 'io,';
55        if (this.input.filterTldUk) this.filters += 'uk,';
56        if (this.input.filterTldN) this.filters += 'n,';
57        if (this.input.filterTldOther) this.filters += 'other,';
58
59        return this;
60    }
61
62    addLengthFilters(): RequestBuilder {
63        if (this.input.filterLengthMin > this.input.filterLengthMax) throw new Error('Min length cannot be greater than max length');
64        this.length += `min[domain_length]=${this.input.filterLengthMin}&max[domain_length]=${this.input.filterLengthMax}`;
65
66        return this;
67    }
68
69    addCharasterFilters(): RequestBuilder {
70        if (this.input.filterCharsHyp) this.filters += 'no-dashes,';
71        if (this.input.filterCharsNum) this.filters += 'no-numbers,';
72        if (this.input.filterCharsLet) this.filters += 'no-letters,';
73
74        return this;
75    }
76
77    addGoogleFilters(): RequestBuilder {
78        if (this.input.filterGglNA) this.filters += 'not-adult,';
79        if (this.input.filterGglNB) this.filters += 'not-faillisted-or-blocked,';
80        if (this.input.filterGglA) this.filters += 'Adult,';
81        if (this.input.filterGglB) this.filters += 'faillisted-or-blocked,';
82
83        return this;
84    }
85
86    addAuctionFilters(): RequestBuilder {
87        if (this.input.filterAucTypAuc) this.filters += 'godaddy-expiring,';
88        if (this.input.filterAucTypClo) this.filters += 'godaddy-closeouts,';
89        if (this.input.filterAucTypDel) this.filters += 'pending-delete,';
90
91        return this;
92    }
93
94    addExpDateFilter(): RequestBuilder {
95        const dateObj = new Date(this.input.filterExpDate);
96        const todayObj = new Date();
97        todayObj.setHours(0, 0, 0, 0);
98
99        const diffTime = dateObj.getTime() - todayObj.getTime();
100
101        let diffDays = Math.floor(diffTime / (1000 * 60 * 60 * 24));
102
103        if (diffTime < 0) {
104            log.warning('Selected date is in the past, setting it to today.');
105            diffDays = 0;
106        }
107
108        switch (diffDays) {
109            case 0:
110                this.filters += 'today,';
111                break;
112            case 1:
113                this.filters += 'tomorrow,';
114                break;
115            case 2:
116                this.filters += 'today2,';
117                break;
118            case 3:
119                this.filters += 'today3,';
120                break;
121            case 4:
122                this.filters += 'today4,';
123                break;
124            default:
125                log.warning('Date is outside of allowed range of 4 days from today. Setting it to today.');
126                this.filters += 'today,';
127                break;
128        }
129
130        return this;
131    }
132
133    addSort(): RequestBuilder {
134        this.sort = `${this.input.sortDir === 'asc' ? '' : '-'}${this.input.sortBy}`;
135
136        return this;
137    }
138
139    build(): RequestOptions {
140        const columns = this.includedColumns.map((col) => `columns[]=${col}`).join('&');
141        const payload = `filters=${this.filters}&sort=${this.sort}&page=${this.page}&${columns}&id=${this.id}`;
142
143        return {
144            url: config.fetchUrl,
145            method: 'POST',
146            uniqueKey: `page-${this.page}`,
147            headers: {
148                'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
149                origin: config.originUrl,
150            },
151            payload,
152        };
153    }
154}

src/helpers/helper.ts

1import { RequestOptions } from 'crawlee';
2import { RequestBuilder } from '../builders/requestBuilder.js';
3import { config } from '../config.js';
4import { Input } from '../types/input.js';
5
6export async function generateRequests(input: Input): Promise<RequestOptions[]> {
7    const requests: RequestOptions[] = [];
8
9    const pages = Math.ceil(input.numDomains / config.domainsPerPage);
10
11    for (let i = 0; i < pages; i++) {
12        const requestBuilder = new RequestBuilder(input, i);
13
14        const request = requestBuilder
15            .addTldFilters()
16            .addLengthFilters()
17            .addCharasterFilters()
18            .addGoogleFilters()
19            .addAuctionFilters()
20            .addExpDateFilter()
21            .addSort()
22            .build();
23
24        requests.push(request);
25    }
26
27    return requests;
28}
29
30export function convertToNumber(str: string) {
31    if (str.startsWith('$')) {
32        str = str.slice(1);
33    }
34
35    let number = parseFloat(str);
36
37    if (str.toLowerCase().endsWith('k')) {
38        number *= 1000;
39    }
40
41    if (str.toLowerCase().endsWith('m')) {
42        number *= 1000000;
43    }
44
45    return number;
46}

src/parsers/domainParser.ts

1import { Cheerio, Element } from 'cheerio';
2import { config } from '../config.js';
3import { convertToNumber } from '../helpers/helper.js';
4
5export class DomainParser {
6    element: Cheerio<Element>;
7
8    constructor(element: Cheerio<Element>) {
9        this.element = element;
10    }
11
12    get name(): string {
13        return this.element.find('td:nth-child(1)').text();
14    }
15
16    get link(): string | undefined {
17        const ref = this.element.find('td:nth-child(2) > a').attr('href');
18        return ref ? `${config.originUrl}${ref}` : undefined;
19    }
20
21    get enddate(): Date {
22        return new Date(this.element.find('td:nth-child(3)').text());
23    }
24
25    get tld(): string {
26        return this.element.find('td:nth-child(4)').text();
27    }
28
29    get godaddyPrice(): number | undefined {
30        const price = this.element.find('td:nth-child(5)').text();
31        return price !== '' ? convertToNumber(price) : undefined;
32    }
33
34    get godaddyBid(): number | undefined {
35        const bid = this.element.find('td:nth-child(6)').text();
36        return bid !== '' ? convertToNumber(bid) : undefined;
37    }
38
39    get auctionType(): string | undefined {
40        const type = this.element.find('td:nth-child(7)').text();
41        return type !== '' ? type : undefined;
42    }
43
44    get aiPricePredPrice(): number | undefined {
45        const price = this.element.find('td:nth-child(8)').text();
46        return price !== '' ? convertToNumber(price) : undefined;
47    }
48
49    get rdapRegistration(): number | undefined {
50        const year = this.element.find('td:nth-child(9)').text();
51        return year !== '' ? convertToNumber(year) : undefined;
52    }
53
54    get waybackFirstYear(): number | undefined {
55        const year = this.element.find('td:nth-child(10)').text();
56        return year !== '' ? convertToNumber(year) : undefined;
57    }
58
59    get waybackCount(): number | undefined {
60        const count = this.element.find('td:nth-child(11)').text();
61        return count !== '' ? convertToNumber(count) : undefined;
62    }
63
64    get searchVolumeVolume(): number | undefined {
65        const volume = this.element.find('td:nth-child(12)').text();
66        return volume !== '' ? convertToNumber(volume) : undefined;
67    }
68
69    get searchVolumeCpc(): number | undefined {
70        const cpc = this.element.find('td:nth-child(13)').text();
71        return cpc !== '' ? convertToNumber(cpc) : undefined;
72    }
73
74    get googleStatusIsBlocked(): boolean {
75        const status = this.element.find('td:nth-child(14)').text();
76        return status === '1';
77    }
78
79    get googleStatusIsFaillisted(): boolean {
80        const status = this.element.find('td:nth-child(15)').text();
81        return status === '1';
82    }
83
84    get googleStatusIsAdult(): boolean {
85        const status = this.element.find('td:nth-child(16)').text();
86        return status === '1';
87    }
88
89    get fbShares(): number | undefined {
90        const shares = this.element.find('td:nth-child(17)').text();
91        return shares !== '' ? convertToNumber(shares) : undefined;
92    }
93
94    get fbReact(): number | undefined {
95        const react = this.element.find('td:nth-child(18)').text();
96        return react !== '' ? convertToNumber(react) : undefined;
97    }
98
99    get mozDomainAuthority(): number | undefined {
100        const authority = this.element.find('td:nth-child(19)').text();
101        return authority !== '' ? convertToNumber(authority) : undefined;
102    }
103
104    get mozPageAuthority(): number | undefined {
105        const authority = this.element.find('td:nth-child(20)').text();
106        return authority !== '' ? convertToNumber(authority) : undefined;
107    }
108
109    get majesticCitationflow(): number | undefined {
110        const flow = this.element.find('td:nth-child(21)').text();
111        return flow !== '' ? convertToNumber(flow) : undefined;
112    }
113
114    get majesticTrustflow(): number | undefined {
115        const flow = this.element.find('td:nth-child(22)').text();
116        return flow !== '' ? convertToNumber(flow) : undefined;
117    }
118
119    get majesticExtbacklinks(): number | undefined {
120        const links = this.element.find('td:nth-child(23)').text();
121        return links !== '' ? convertToNumber(links) : undefined;
122    }
123
124    get ownranksBrandrank(): number | undefined {
125        const rank = this.element.find('td:nth-child(24)').text();
126        return rank !== '' ? convertToNumber(rank) : undefined;
127    }
128
129    get ownranksTldsaturation(): number | undefined {
130        const saturation = this.element.find('td:nth-child(25)').text();
131        return saturation !== '' ? convertToNumber(saturation) : undefined;
132    }
133
134    get ownranksRadiotest(): number | undefined {
135        const test = this.element.find('td:nth-child(26)').text();
136        return test !== '' ? convertToNumber(test) : undefined;
137    }
138}

src/parsers/pageParser.ts

1import { Cheerio, CheerioAPI, Element } from 'cheerio';
2import { DomainParser } from '../parsers/domainParser.js';
3import { DomainBuilder } from '../builders/domainBuilder.js';
4import { Domain } from '../types/domain.js';
5
6export class PageParser {
7    $: CheerioAPI;
8
9    constructor(api: CheerioAPI) {
10        this.$ = api;
11    }
12
13    get domains(): Domain[] {
14        const domains: Domain[] = [];
15
16        this.$('#result_win > table > tbody > tr').each((_i, el) => {
17            const domainElement: Cheerio<Element> = this.$(el);
18            const domainParser = new DomainParser(domainElement);
19            const domainBuilder = new DomainBuilder(domainParser);
20
21            const domain: Domain = domainBuilder
22                .addName()
23                .addLink()
24                .addEnddate()
25                .addTld()
26                .addGodaddyPrice()
27                .addGodaddyBid()
28                .addAuctionType()
29                .addAiPricePredPrice()
30                .addRdapRegistration()
31                .addWaybackFirstYear()
32                .addWaybackCount()
33                .addSearchVolumeVolume()
34                .addSearchVolumeCpc()
35                .addGoogleStatusIsBlocked()
36                .addGoogleStatusIsFaillisted()
37                .addGoogleStatusIsAdult()
38                .addFbShares()
39                .addFbReact()
40                .addMozDomainAuthority()
41                .addMozPageAuthority()
42                .addMajesticCitationflow()
43                .addMajesticTrustflow()
44                .addMajesticExtbacklinks()
45                .addOwnranksBrandrank()
46                .addOwnranksTldsaturation()
47                .addOwnranksRadiotest()
48                .build();
49
50            domains.push(domain);
51        });
52
53        return domains;
54    }
55}

src/types/config.ts

1export type Config = {
2    domainsPerPage: number;
3    originUrl: string;
4    fetchUrl: string;
5}

src/types/domain.ts

1export type Domain = {
2    name: string;
3    link: string | undefined;
4    enddate: Date;
5    tld: string;
6    godaddyPrice: number | undefined;
7    godaddyBid: number | undefined;
8    auctionType: string | undefined;
9    aiPricePredPrice: number | undefined;
10    rdapRegistration: number | undefined;
11    waybackFirstYear: number | undefined;
12    waybackCount: number | undefined;
13    searchVolumeVolume: number | undefined;
14    searchVolumeCpc: number | undefined;
15    googleStatusIsBlocked: boolean | undefined;
16    googleStatusIsFaillisted: boolean | undefined;
17    googleStatusIsAdult: boolean | undefined;
18    fbShares: number | undefined;
19    fbReact: number | undefined;
20    majesticExtbacklinks: number | undefined;
21    majesticCitationflow: number | undefined;
22    majesticTrustflow: number | undefined;
23    mozDomainAuthority: number | undefined;
24    mozPageAuthority: number | undefined;
25    ownranksBrandrank: number | undefined;
26    ownranksTldsaturation: number | undefined;
27    ownranksRadiotest: number | undefined;
28}

src/types/input.ts

1export type Input = {
2    numDomains: number;
3    sortBy: SortByOption;
4    sortDir: SortDirection;
5    filterTldCom: boolean;
6    filterTldNet: boolean;
7    filterTldOrg: boolean;
8    filterTldIo: boolean;
9    filterTldUk: boolean;
10    filterTldN: boolean;
11    filterTldOther: boolean;
12    filterLengthMin: number;
13    filterLengthMax: number;
14    filterCharsHyp: boolean;
15    filterCharsNum: boolean;
16    filterCharsLet: boolean;
17    filterGglNA: boolean;
18    filterGglNB: boolean;
19    filterGglA: boolean;
20    filterGglB: boolean;
21    filterAucTypAuc: boolean;
22    filterAucTypClo: boolean;
23    filterAucTypDel: boolean;
24    filterExpDate: string;
25}
26
27export type SortByOption =
28    | 'name'
29    | 'link'
30    | 'enddate'
31    | 'tld'
32    | 'godaddy_price'
33    | 'auction_type'
34    | 'godaddy_bid'
35    | 'ai_price_pred_price'
36    | 'rdap_registration'
37    | 'wayback_first_year'
38    | 'wayback_count'
39    | 'search_volume_volume'
40    | 'search_volume_cpc'
41    | 'google_status_is_blocked'
42    | 'google_status_is_faillisted'
43    | 'google_status_is_adult'
44    | 'fb_shares'
45    | 'fb_react'
46    | 'majestic_extbacklinks'
47    | 'majestic_citationflow'
48    | 'majestic_trustflow'
49    | 'moz_domain_authority'
50    | 'moz_page_authority'
51    | 'ownranks_brandrank'
52    | 'ownranks_tldsaturation'
53    | 'ownranks_radiotest';
54
55export type SortDirection = 'asc' | 'desc';

src/config.ts

1import { Config } from './types/config.js';
2
3export const config: Config = {
4    domainsPerPage: 25,
5    originUrl: 'https://expireddomains.com',
6    fetchUrl: 'https://expireddomains.com/site/search',
7};

src/main.ts

1import { Actor } from 'apify';
2import { CheerioCrawler, Dataset } from 'crawlee';
3import { load } from 'cheerio';
4import { PageParser } from './parsers/pageParser.js';
5import { generateRequests } from './helpers/helper.js';
6import { Input } from './types/input.js';
7import { Domain } from './types/domain.js';
8
9await Actor.main(async () => {
10    const input = await Actor.getInput<Input>();
11
12    if (!input) throw new Error('Input is missing');
13
14    const proxyConfiguration = await Actor.createProxyConfiguration();
15
16    const crawler = new CheerioCrawler({
17        proxyConfiguration,
18        requestHandler: async ({ body }) => {
19            const bodyJson = JSON.parse(body.toString());
20            const $ = load(bodyJson.result);
21
22            const pageParser = new PageParser($);
23
24            for (const domain of pageParser.domains) {
25                await Dataset.pushData<Domain>(domain);
26            }
27        },
28    });
29
30    const requests = await generateRequests(input);
31
32    await crawler.run(requests);
33});

test/domainParser.test.ts

1import { DomainParser } from '../src/parsers/domainParser.js'
2import { Cheerio, load } from 'cheerio'
3
4const sampleHtml = `
5<div>
6    <table>
7        <tbody>
8            <tr>
9                <td>example.com</td>
10                <td><a href="/link">Link</a></td>
11                <td>2024-06-01</td>
12                <td>com</td>
13                <td>$10</td>
14                <td>$5</td>
15                <td>Auction</td>
16                <td>$20</td>
17                <td>1999</td>
18                <td>2000</td>
19                <td>100</td>
20                <td>5000</td>
21                <td>$2</td>
22                <td>1</td>
23                <td>1</td>
24                <td>1</td>
25                <td>100</td>
26                <td>200</td>
27                <td>50</td>
28                <td>60</td>
29                <td>70</td>
30                <td>80</td>
31                <td>1000</td>
32                <td>90</td>
33                <td>95</td>
34                <td>85</td>
35            </tr>
36        </tbody>
37    </table>
38</div>
39`
40
41describe('DomainParser', () => {
42    let parser: DomainParser
43
44    beforeAll(() => {
45        const $ = load(sampleHtml)
46        const element: Cheerio<any> = $('tr')
47        parser = new DomainParser(element)
48    })
49
50    test('should parse name correctly', () => {
51        expect(parser.name).toBe('example.com')
52    })
53
54    test('should parse link correctly', () => {
55        expect(parser.link).toBe('https://expireddomains.com/link')
56    })
57
58    test('should parse enddate correctly', () => {
59        expect(parser.enddate).toEqual(new Date('2024-06-01'))
60    })
61
62    test('should parse tld correctly', () => {
63        expect(parser.tld).toBe('com')
64    })
65
66    test('should parse godaddyPrice correctly', () => {
67        expect(parser.godaddyPrice).toBe(10)
68    })
69
70    test('should parse godaddyBid correctly', () => {
71        expect(parser.godaddyBid).toBe(5)
72    })
73
74    test('should parse auctionType correctly', () => {
75        expect(parser.auctionType).toBe('Auction')
76    })
77
78    test('should parse aiPricePredPrice correctly', () => {
79        expect(parser.aiPricePredPrice).toBe(20)
80    })
81
82    test('should parse rdapRegistration correctly', () => {
83        expect(parser.rdapRegistration).toBe(1999)
84    })
85
86    test('should parse waybackFirstYear correctly', () => {
87        expect(parser.waybackFirstYear).toBe(2000)
88    })
89
90    test('should parse waybackCount correctly', () => {
91        expect(parser.waybackCount).toBe(100)
92    })
93
94    test('should parse searchVolumeVolume correctly', () => {
95        expect(parser.searchVolumeVolume).toBe(5000)
96    })
97
98    test('should parse searchVolumeCpc correctly', () => {
99        expect(parser.searchVolumeCpc).toBe(2)
100    })
101
102    test('should parse googleStatusIsBlocked correctly', () => {
103        expect(parser.googleStatusIsBlocked).toBe(true)
104    })
105
106    test('should parse googleStatusIsFaillisted correctly', () => {
107        expect(parser.googleStatusIsFaillisted).toBe(true)
108    })
109
110    test('should parse googleStatusIsAdult correctly', () => {
111        expect(parser.googleStatusIsAdult).toBe(true)
112    })
113
114    test('should parse fbShares correctly', () => {
115        expect(parser.fbShares).toBe(100)
116    })
117
118    test('should parse fbReact correctly', () => {
119        expect(parser.fbReact).toBe(200)
120    })
121
122    test('should parse mozDomainAuthority correctly', () => {
123        expect(parser.mozDomainAuthority).toBe(50)
124    })
125
126    test('should parse mozPageAuthority correctly', () => {
127        expect(parser.mozPageAuthority).toBe(60)
128    })
129
130    test('should parse majesticCitationflow correctly', () => {
131        expect(parser.majesticCitationflow).toBe(70)
132    })
133
134    test('should parse majesticTrustflow correctly', () => {
135        expect(parser.majesticTrustflow).toBe(80)
136    })
137
138    test('should parse majesticExtbacklinks correctly', () => {
139        expect(parser.majesticExtbacklinks).toBe(1000)
140    })
141
142    test('should parse ownranksBrandrank correctly', () => {
143        expect(parser.ownranksBrandrank).toBe(90)
144    })
145
146    test('should parse ownranksTldsaturation correctly', () => {
147        expect(parser.ownranksTldsaturation).toBe(95)
148    })
149
150    test('should parse ownranksRadiotest correctly', () => {
151        expect(parser.ownranksRadiotest).toBe(85)
152    })
153})

.dockerignore

1# configurations
2.idea
3
4# crawlee and apify storage folders
5apify_storage
6crawlee_storage
7storage
8
9# installed files
10node_modules
11
12# git folder
13.git

.editorconfig

1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf

.eslintrc

1{
2    "root": true,
3    "env": {
4        "browser": true,
5        "es2020": true,
6        "node": true
7    },
8    "extends": [
9        "@apify/eslint-config-ts"
10    ],
11    "parserOptions": {
12        "project": "./tsconfig.json",
13        "ecmaVersion": 2020
14    },
15    "ignorePatterns": [
16        "node_modules",
17        "dist",
18        "**/*.d.ts"
19    ],
20    "rules": {
21        "import/no-extraneous-dependencies": [
22            "error", {
23                "devDependencies": true
24            }
25        ]
26    }
27
28}

.gitignore

1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5dist
6node_modules
7apify_storage
8storage
9
10# Added by Apify CLI
11.venv

jest.config.js

1/** @type {import('ts-jest').JestConfigWithTsJest} */
2export default {
3  preset: 'ts-jest',
4  testEnvironment: 'node',
5  moduleFileExtensions: ['ts', 'js'],
6  globals: {
7    'ts-jest': {
8      tsconfig: 'tsconfig.json',
9    },
10  },
11  moduleNameMapper: {
12    "^(\\.\\.?\\/.+)\\.js$": "$1",
13  }
14};

package.json

1{
2	"name": "expired-domains-scraper",
3	"version": "0.0.1",
4	"type": "module",
5	"description": "The Expired Domains Scraper automates finding valuable expired domains from expireddomains.com, offering filters and sorting by SEO metrics and auction details for efficient domain acquisition.",
6	"engines": {
7		"node": ">=18.0.0"
8	},
9	"dependencies": {
10		"apify": "^3.1.10",
11		"cheerio": "^1.0.0-rc.12",
12		"crawlee": "^3.5.4"
13	},
14	"devDependencies": {
15		"@apify/eslint-config-ts": "^0.3.0",
16		"@apify/tsconfig": "^0.1.0",
17		"@types/jest": "^29.5.12",
18		"@typescript-eslint/eslint-plugin": "^6.7.2",
19		"@typescript-eslint/parser": "^6.7.2",
20		"eslint": "^8.50.0",
21		"jest": "^29.7.0",
22		"ts-jest": "^29.1.5",
23		"tsx": "^4.6.2",
24		"typescript": "^5.3.3"
25	},
26	"scripts": {
27		"start": "npm run start:dev",
28		"start:prod": "node dist/main.js",
29		"start:dev": "tsx src/main.ts",
30		"build": "tsc",
31		"lint": "eslint ./src --ext .ts",
32		"lint:fix": "eslint ./src --ext .ts --fix",
33		"test": "jest"
34	},
35	"author": "Martin Fanta",
36	"license": "MIT"
37}

tsconfig.json

1{
2    "extends": "@apify/tsconfig",
3    "compilerOptions": {
4        "esModuleInterop": true,
5        "module": "NodeNext",
6        "moduleResolution": "NodeNext",
7        "target": "ES2022",
8        "outDir": "dist",
9        "noUnusedLocals": false,
10        "skipLibCheck": true,
11        "lib": ["DOM"],
12        "paths": {
13            "src/*": [
14                "./src/*"
15            ],
16        }
17    },
18    "include": [
19        "./src/**/*"
20    ]
21}
Developer
Maintained by Community
Actor metrics
  • 12 monthly users
  • 3 stars
  • 100.0% runs succeeded
  • Created in Jun 2024
  • Modified 3 months ago