Spotify Albums Scraper avatar
Spotify Albums Scraper

Pricing

$19.99/month + usage

Go to Store
Spotify Albums Scraper

Spotify Albums Scraper

Developed by

EasyApi

EasyApi

Maintained by Community

Scrape Spotify albums by keywords. Extract comprehensive album data including artist details, cover art, release dates, and playability status. Perfect for music cataloging, album research, and industry analysis.

5.0 (1)

Pricing

$19.99/month + usage

1

Total users

9

Monthly users

3

Runs succeeded

>99%

Last modified

12 days ago

import { Actor } from 'apify'; import { PuppeteerCrawler } from 'crawlee'; import puppeteerExtra from 'puppeteer-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import Redis from "ioredis" import randomUseragent from 'random-useragent'; import fetch from 'node-fetch';

puppeteerExtra.use(stealthPlugin());

// const redisClient = new Redis("rediss://default:AY9xAAIjcDE2YTMyNjQyYTI5ODQ0NDQ3YjVhOTE1YWRiYWEyMTZjN3AxMA@sharing-wren-36721.upstash.io:6379"); // let sessionid = await redisClient.get('instagram-sessionid-cookie');

class SpotifyAlbumsScraper {

async run(input) {
const { keywords, maxItems = Infinity } = input;
this.maxItems = maxItems; // 存储maxItems为类属性
for(let keyword of keywords) {
this.currentSearchUrl = keyword; // 存储当前处理的postUrl
const crawler = new PuppeteerCrawler({
launchContext: {
launcher: puppeteerExtra,
launchOptions: {
headless: true,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--disable-gpu',
'--disable-geolocation', // 添加这一行
'--disable-notifications', // 添加这一行
`--user-agent=${randomUseragent.getRandom(function (ua) {
return ua.browserName === 'Chrome' && parseFloat(ua.browserVersion) >= 80;
})}`,
],
// Other Puppeteer options
},
},
//maxRequestsPerCrawl: maxItems,
requestHandlerTimeoutSecs: 3600, // 设置请求处理器超时时间为1小时
navigationTimeoutSecs: 300, // 5分钟
preNavigationHooks: [
async ({ page }) => {
await this.setCookies(page);
},
],
requestHandler: async ({ page, request }) => {
console.log(`Processing ${request.url}...`);
let count = 0;
let endOfResults = false;
let interceptedData = []; // 初始化局部变量
let lastDataLength = 0;
let noNewDataCount = 0;
const MAX_NO_NEW_DATA_COUNT = 5; // 10秒 / 2秒 = 5次
await this.setupInterceptors(page, interceptedData);
await page.goto(request.url, { waitUntil: 'networkidle0' });
while (!endOfResults && interceptedData.length < maxItems) {
count++;
await new Promise(resolve => setTimeout(resolve, 2000));
if (endOfResults) {
console.log('Reached end of results');
} else if (interceptedData.length === lastDataLength) {
noNewDataCount++;
// console.log(`No new data for ${noNewDataCount * 5} seconds`);
if (noNewDataCount >= MAX_NO_NEW_DATA_COUNT) {
// console.log('No new data for 20 seconds, assuming end of results');
endOfResults = true;
}
} else {
noNewDataCount = 0;
lastDataLength = interceptedData.length;
}
}
console.log(`Saved ${interceptedData.length} items`);
},
});
await crawler.run([`https://open.spotify.com/search/${keyword}/albums`]);
}
}
async setCookies(page) {
await page.setCookie(
{
name: 'sid_guard',
value: 'f917952be638f4225aa82251d42e8d5a%7C1729996200%7C15551986%7CFri%2C+25-Apr-2025+02%3A29%3A46+GMT',
domain: '.tiktok.com',
path: '/',
expires: Math.floor(new Date('2025-11-11').getTime() / 1000),
httpOnly: true,
secure: true
}
);
}
async setupInterceptors(page, interceptedData) {
await page.setRequestInterception(true);
page.on('request', (request) => request.continue());
page.on('response', async (response) => {
const request = response.request();
const url = request.url();
try {
if (await this.shouldInterceptRequest(url, request)) {
let responseBody = await response.json();
await this.processResponseData(responseBody, interceptedData);
if (interceptedData.length < this.maxItems) {
await new Promise(resolve => setTimeout(resolve, 3000));
// 解析当前offset
let searchParams = new URL(url)?.searchParams;
const variables = JSON.parse(searchParams.get('variables') || '{}');
const currentOffset = parseInt(variables.offset || '0');
const newOffset = currentOffset + 30;
// 在浏览器环境中发起请求
await page.evaluate(async ({ url, currentOffset, newOffset, headers }) => {
const searchParams = new URL(url).searchParams;
const variables = JSON.parse(searchParams.get('variables'));
variables.offset = newOffset;
const newSearchParams = new URLSearchParams(searchParams);
newSearchParams.set('variables', JSON.stringify(variables));
const newUrl = `${url.split('?')[0]}?${newSearchParams.toString()}`;
// 在浏览器中发起请求
await fetch(newUrl, {
method: 'GET',
headers: headers,
credentials: 'include'
});
}, {
url,
currentOffset,
newOffset,
headers: request.headers()
});
}
}
} catch (error) {
console.error(`Error processing response:`, error);
}
});
}
async processResponseData(responseBody, interceptedData) {
let currentData = [];
responseBody?.data?.searchV2?.albumsV2?.items?.forEach(e => {
const data = e?.data;
if (!data) return;
const processedData = {
...data,
albumUrl: 'https://open.spotify.com/albums/' + data.uri.replace('spotify:albums:', ''),
keyword: this.currentSearchUrl // 保留原有的关键词字段
};
if (interceptedData.length < this.maxItems) {
interceptedData.push(processedData);
currentData.push(processedData);
}
});
// 如果超过最大数量,截断currentData
if (interceptedData.length > this.maxItems) {
const overflow = interceptedData.length - this.maxItems;
currentData = currentData.slice(0, -overflow);
interceptedData = interceptedData.slice(0, this.maxItems);
}
if (currentData.length > 0) {
await Actor.pushData(currentData);
console.log(`Pushed ${currentData.length} items`);
}
}
async shouldInterceptRequest(url, request) {
if (url.includes('/pathfinder/v1/query') && request.method() === 'GET') {
try {
let searchParams = new URL(url)?.searchParams;
const operationName = searchParams?.get('operationName');
if (operationName === 'searchAlbums') {
return true;
}
} catch (error) {
console.error('Error parsing URL:', error);
}
}
return false;
}
async scrollPage(page) {
await page.evaluate(() => {
return new Promise((resolve) => {
const presentationDiv = document.querySelector('[data-testid="track-list"] > div:nth-child(2)');
if (!presentationDiv) {
console.log('Presentation div not found');
resolve();
return;
}
const initialHeight = presentationDiv.style.height?.replace('px', '') || 1000;
const scrollIncrement = 1000; // 每次增加的高度,可根据需要调整
let currentHeight = initialHeight;
const scrollSteps = Math.floor(Math.random() * 3) + 3; // 随机3-5步
let scrollCount = 0;
const scrollInterval = setInterval(() => {
if (scrollCount >= scrollSteps) {
clearInterval(scrollInterval);
resolve();
return;
}
// 增加高度以模拟滚动
currentHeight += scrollIncrement + (Math.random() * 200 - 100); // 添加一些随机性
presentationDiv.style.height = `${currentHeight}px`;
scrollCount++;
}, 500 + Math.random() * 500); // 随机间隔500-1000毫秒
});
});
}
// 在滚动和其他操作之间添加随机延迟
async randomDelay(min = 1000, max = 3000) {
const delay = Math.floor(Math.random() * (max - min + 1) + min);
await new Promise(resolve => setTimeout(resolve, delay));
}

}

await Actor.init();

Actor.main(async () => { const input = await Actor.getInput();

const crawler = new SpotifyAlbumsScraper();
await crawler.run(input);

});