
Spotify Albums Scraper
Pricing
$19.99/month + usage

Spotify Albums Scraper
Scrape Spotify albums by keywords. Extract comprehensive album data including artist details, cover art, release dates, and playability status. Perfect for music cataloging, album research, and industry analysis.
5.0 (1)
Pricing
$19.99/month + usage
1
Total users
9
Monthly users
3
Runs succeeded
>99%
Last modified
12 days ago
import { Actor } from 'apify'; import { PuppeteerCrawler } from 'crawlee'; import puppeteerExtra from 'puppeteer-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import Redis from "ioredis" import randomUseragent from 'random-useragent'; import fetch from 'node-fetch';
puppeteerExtra.use(stealthPlugin());
// const redisClient = new Redis("rediss://default:AY9xAAIjcDE2YTMyNjQyYTI5ODQ0NDQ3YjVhOTE1YWRiYWEyMTZjN3AxMA@sharing-wren-36721.upstash.io:6379"); // let sessionid = await redisClient.get('instagram-sessionid-cookie');
class SpotifyAlbumsScraper {
async run(input) {const { keywords, maxItems = Infinity } = input;this.maxItems = maxItems; // 存储maxItems为类属性for(let keyword of keywords) {this.currentSearchUrl = keyword; // 存储当前处理的postUrlconst crawler = new PuppeteerCrawler({launchContext: {launcher: puppeteerExtra,launchOptions: {headless: true,args: ['--no-sandbox','--disable-setuid-sandbox','--disable-dev-shm-usage','--disable-accelerated-2d-canvas','--no-first-run','--no-zygote','--disable-gpu','--disable-geolocation', // 添加这一行'--disable-notifications', // 添加这一行`--user-agent=${randomUseragent.getRandom(function (ua) {return ua.browserName === 'Chrome' && parseFloat(ua.browserVersion) >= 80;})}`,],// Other Puppeteer options},},//maxRequestsPerCrawl: maxItems,requestHandlerTimeoutSecs: 3600, // 设置请求处理器超时时间为1小时navigationTimeoutSecs: 300, // 5分钟preNavigationHooks: [async ({ page }) => {await this.setCookies(page);},],requestHandler: async ({ page, request }) => {console.log(`Processing ${request.url}...`);let count = 0;let endOfResults = false;let interceptedData = []; // 初始化局部变量let lastDataLength = 0;let noNewDataCount = 0;const MAX_NO_NEW_DATA_COUNT = 5; // 10秒 / 2秒 = 5次await this.setupInterceptors(page, interceptedData);await page.goto(request.url, { waitUntil: 'networkidle0' });while (!endOfResults && interceptedData.length < maxItems) {count++;await new Promise(resolve => setTimeout(resolve, 2000));if (endOfResults) {console.log('Reached end of results');} else if (interceptedData.length === lastDataLength) {noNewDataCount++;// console.log(`No new data for ${noNewDataCount * 5} seconds`);if (noNewDataCount >= MAX_NO_NEW_DATA_COUNT) {// console.log('No new data for 20 seconds, assuming end of results');endOfResults = true;}} else {noNewDataCount = 0;lastDataLength = interceptedData.length;}}console.log(`Saved ${interceptedData.length} items`);},});await crawler.run([`https://open.spotify.com/search/${keyword}/albums`]);}}async setCookies(page) {await page.setCookie({name: 'sid_guard',value: 'f917952be638f4225aa82251d42e8d5a%7C1729996200%7C15551986%7CFri%2C+25-Apr-2025+02%3A29%3A46+GMT',domain: '.tiktok.com',path: '/',expires: Math.floor(new Date('2025-11-11').getTime() / 1000),httpOnly: true,secure: true});}async setupInterceptors(page, interceptedData) {await page.setRequestInterception(true);page.on('request', (request) => request.continue());page.on('response', async (response) => {const request = response.request();const url = request.url();try {if (await this.shouldInterceptRequest(url, request)) {let responseBody = await response.json();await this.processResponseData(responseBody, interceptedData);if (interceptedData.length < this.maxItems) {await new Promise(resolve => setTimeout(resolve, 3000));// 解析当前offsetlet searchParams = new URL(url)?.searchParams;const variables = JSON.parse(searchParams.get('variables') || '{}');const currentOffset = parseInt(variables.offset || '0');const newOffset = currentOffset + 30;// 在浏览器环境中发起请求await page.evaluate(async ({ url, currentOffset, newOffset, headers }) => {const searchParams = new URL(url).searchParams;const variables = JSON.parse(searchParams.get('variables'));variables.offset = newOffset;const newSearchParams = new URLSearchParams(searchParams);newSearchParams.set('variables', JSON.stringify(variables));const newUrl = `${url.split('?')[0]}?${newSearchParams.toString()}`;// 在浏览器中发起请求await fetch(newUrl, {method: 'GET',headers: headers,credentials: 'include'});}, {url,currentOffset,newOffset,headers: request.headers()});}}} catch (error) {console.error(`Error processing response:`, error);}});}async processResponseData(responseBody, interceptedData) {let currentData = [];responseBody?.data?.searchV2?.albumsV2?.items?.forEach(e => {const data = e?.data;if (!data) return;const processedData = {...data,albumUrl: 'https://open.spotify.com/albums/' + data.uri.replace('spotify:albums:', ''),keyword: this.currentSearchUrl // 保留原有的关键词字段};if (interceptedData.length < this.maxItems) {interceptedData.push(processedData);currentData.push(processedData);}});// 如果超过最大数量,截断currentDataif (interceptedData.length > this.maxItems) {const overflow = interceptedData.length - this.maxItems;currentData = currentData.slice(0, -overflow);interceptedData = interceptedData.slice(0, this.maxItems);}if (currentData.length > 0) {await Actor.pushData(currentData);console.log(`Pushed ${currentData.length} items`);}}async shouldInterceptRequest(url, request) {if (url.includes('/pathfinder/v1/query') && request.method() === 'GET') {try {let searchParams = new URL(url)?.searchParams;const operationName = searchParams?.get('operationName');if (operationName === 'searchAlbums') {return true;}} catch (error) {console.error('Error parsing URL:', error);}}return false;}async scrollPage(page) {await page.evaluate(() => {return new Promise((resolve) => {const presentationDiv = document.querySelector('[data-testid="track-list"] > div:nth-child(2)');if (!presentationDiv) {console.log('Presentation div not found');resolve();return;}const initialHeight = presentationDiv.style.height?.replace('px', '') || 1000;const scrollIncrement = 1000; // 每次增加的高度,可根据需要调整let currentHeight = initialHeight;const scrollSteps = Math.floor(Math.random() * 3) + 3; // 随机3-5步let scrollCount = 0;const scrollInterval = setInterval(() => {if (scrollCount >= scrollSteps) {clearInterval(scrollInterval);resolve();return;}// 增加高度以模拟滚动currentHeight += scrollIncrement + (Math.random() * 200 - 100); // 添加一些随机性presentationDiv.style.height = `${currentHeight}px`;scrollCount++;}, 500 + Math.random() * 500); // 随机间隔500-1000毫秒});});}// 在滚动和其他操作之间添加随机延迟async randomDelay(min = 1000, max = 3000) {const delay = Math.floor(Math.random() * (max - min + 1) + min);await new Promise(resolve => setTimeout(resolve, delay));}
}
await Actor.init();
Actor.main(async () => { const input = await Actor.getInput();
const crawler = new SpotifyAlbumsScraper();await crawler.run(input);
});