
YouTube Daily Upload Video Link Scraper
Under maintenance
Pricing
$10.00 / 1,000 results
Go to Store

YouTube Daily Upload Video Link Scraper
Under maintenance
Scrapes a YouTube channel and returns a list of video links uploaded on the same day the actor runs. Useful for daily monitoring of new content. Supports all YouTube channel formats. Results are saved in an Apify dataset and can be exported as JSON, CSV, or other formats.
0.0 (0)
Pricing
$10.00 / 1,000 results
0
Monthly users
1
Last modified
3 days ago
.actor/Dockerfile
1# Specify the base Docker image. You can read more about
2# the available images at https://crawlee.dev/docs/guides/docker-images
3# You can also use any other image from Docker Hub.
4FROM apify/actor-node-playwright-chrome:20 AS builder
5
6# Check preinstalled packages
7RUN npm ls crawlee apify puppeteer playwright
8
9# Copy just package.json and package-lock.json
10# to speed up the build using Docker layer cache.
11COPY package*.json ./
12
13# Install all dependencies. Don't audit to speed up the installation.
14RUN npm install --include=dev --audit=false
15
16# Next, copy the source files using the user set
17# in the base image.
18COPY . ./
19
20# Install all dependencies and build the project.
21# Don't audit to speed up the installation.
22RUN npm run build
23
24# Create final image
25FROM apify/actor-node-playwright-chrome:20
26
27# Check preinstalled packages
28RUN npm ls crawlee apify puppeteer playwright
29
30# Copy just package.json and package-lock.json
31# to speed up the build using Docker layer cache.
32COPY package*.json ./
33
34# Install NPM packages, skip optional and development dependencies to
35# keep the image small. Avoid logging too much and print the dependency
36# tree for debugging
37RUN npm --quiet set progress=false \
38 && npm install --omit=dev --omit=optional \
39 && echo "Installed NPM packages:" \
40 && (npm list --omit=dev --all || true) \
41 && echo "Node.js version:" \
42 && node --version \
43 && echo "NPM version:" \
44 && npm --version \
45 && rm -r ~/.npm
46
47# Copy built JS files from builder image
48COPY /home/myuser/dist ./dist
49
50# Next, copy the remaining files and directories with the source code.
51# Since we do this after NPM install, quick build will be really fast
52# for most source file changes.
53COPY . ./
54
55
56# Run the image. If you know you won't need headful browsers,
57# you can remove the XVFB start script for a micro perf gain.
58CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent
.actor/actor.json
1{
2 "actorSpecification": 1,
3 "name": "my-actor",
4 "title": "Project Playwright Crawler Typescript",
5 "description": "Crawlee and Playwright project in typescript.",
6 "version": "0.0",
7 "meta": {
8 "templateId": "ts-crawlee-playwright-chrome"
9 },
10 "input": "./input_schema.json",
11 "dockerfile": "./Dockerfile",
12 "storages": {
13 "dataset": {
14 "actorSpecification": 1,
15 "views": {
16 "overview": {
17 "title": "Daily Videos",
18 "transformation": {
19 "fields": ["videos"]
20 },
21 "display": {
22 "component": "table",
23 "properties": {
24 "videos": {
25 "label": "Video Links",
26 "format": "array"
27 }
28 }
29 }
30 }
31 }
32 }
33 }
34}
.actor/input_schema.json
1{
2 "title": "Daily YouTube Channel Scraper",
3 "description": "Scrapes videos uploaded in the last 24 hours from a YouTube channel, given its identifier",
4 "type": "object",
5 "schemaVersion": 1,
6 "properties": {
7 "channel": {
8 "title": "YouTube Channel ID",
9 "type": "string",
10 "description": "Example: @midulive",
11 "minLength": 3,
12 "editor": "textfield"
13 }
14 },
15 "required": ["channel"]
16}
src/main.ts
1import { Actor } from 'apify';
2import { PlaywrightCrawler } from 'crawlee';
3import { router } from './routes.js';
4
5await Actor.init();
6
7// Input: solo un canal como string
8const input = await Actor.getInput<{ channel: string }>() ?? { channel: 'c/FreneticAI' };
9
10// Construimos la URL a partir del nombre del canal
11const startUrl = `https://www.youtube.com/${input.channel}/videos?hl=en&gl=US`;
12
13const crawler = new PlaywrightCrawler({
14 requestHandler: router,
15 maxRequestsPerCrawl: 1,
16 launchContext: {
17 launchOptions: {
18 args: ['--no-sandbox'],
19 }
20 }
21});
22
23// Ejecutamos el crawler con una sola URL
24await crawler.run([startUrl]);
25
26await Actor.exit();
src/routes.ts
1import { PlaywrightCrawlingContext, createPlaywrightRouter } from 'crawlee';
2import { Actor } from 'apify';
3
4export const router = createPlaywrightRouter();
5
6router.addDefaultHandler(async ({ page, request, log }: PlaywrightCrawlingContext) => {
7 await page.goto(request.url, { waitUntil: 'networkidle' });
8
9 try {
10 await page.waitForSelector('button:has-text("Reject all")', {
11 timeout: 10000,
12 });
13 await page.click('button:has-text("Reject all")');
14 await page.waitForLoadState("networkidle", { timeout: 10000 });
15 await page.waitForSelector("#meta", {
16 timeout: 10000,
17 });
18 } catch (e) {
19 console.log("No se mostró el popup de cookies.", e);
20 }
21
22 const videoLinks = await page.$$eval("#meta", (elements) => {
23 return elements
24 .filter((el) => {
25 const metaSpan = Array.from(
26 el.querySelectorAll("#metadata-line > span")
27 );
28 return metaSpan?.some((span) => span.textContent?.includes("hours") || span.textContent?.includes("hour"));
29 })
30 .map((el) => {
31 const link = el.querySelector("#video-title-link");
32 return link ? (link as HTMLAnchorElement).href : null;
33 })
34 .filter(Boolean);
35 });
36
37 log.info(`Today videos: ${videoLinks.length}`);
38
39 // Guardar resultado en dataset (opcional)
40 await Actor.pushData({
41 videos: videoLinks
42 });
43});
.dockerignore
1# configurations
2.idea
3.vscode
4
5# crawlee and apify storage folders
6apify_storage
7crawlee_storage
8storage
9
10# installed files
11node_modules
12
13# git folder
14.git
.editorconfig
1root = true
2
3[*]
4indent_style = space
5indent_size = 4
6charset = utf-8
7trim_trailing_whitespace = true
8insert_final_newline = true
9end_of_line = lf
.eslintrc
1{
2 "root": true,
3 "env": {
4 "browser": true,
5 "es2020": true,
6 "node": true
7 },
8 "extends": [
9 "@apify/eslint-config-ts"
10 ],
11 "parserOptions": {
12 "project": "./tsconfig.json",
13 "ecmaVersion": 2020
14 },
15 "ignorePatterns": [
16 "node_modules",
17 "dist",
18 "**/*.d.ts"
19 ]
20}
.gitignore
1# This file tells Git which files shouldn't be added to source control
2
3.DS_Store
4.idea
5.vscode
6.zed
7dist
8node_modules
9apify_storage
10storage
package.json
1{
2 "name": "crawlee-playwright-typescript",
3 "version": "0.0.1",
4 "type": "module",
5 "description": "This is an example of an Apify actor.",
6 "engines": {
7 "node": ">=18.0.0"
8 },
9 "dependencies": {
10 "apify": "^3.2.6",
11 "crawlee": "^3.11.5",
12 "playwright": "*"
13 },
14 "devDependencies": {
15 "@apify/eslint-config-ts": "^0.3.0",
16 "@apify/tsconfig": "^0.1.0",
17 "@typescript-eslint/eslint-plugin": "^7.18.0",
18 "@typescript-eslint/parser": "^7.18.0",
19 "eslint": "^8.50.0",
20 "tsx": "^4.6.2",
21 "typescript": "^5.3.3"
22 },
23 "scripts": {
24 "start": "npm run start:dev",
25 "start:prod": "node dist/main.js",
26 "start:dev": "tsx src/main.ts",
27 "build": "tsc",
28 "lint": "eslint ./src --ext .ts",
29 "lint:fix": "eslint ./src --ext .ts --fix",
30 "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1",
31 "postinstall": "npx crawlee install-playwright-browsers"
32 },
33 "author": "It's not you it's me",
34 "license": "ISC"
35}
tsconfig.json
1{
2 "extends": "@apify/tsconfig",
3 "compilerOptions": {
4 "module": "NodeNext",
5 "moduleResolution": "NodeNext",
6 "target": "ES2022",
7 "outDir": "dist",
8 "noUnusedLocals": false,
9 "skipLibCheck": true,
10 "lib": ["DOM"]
11 },
12 "include": [
13 "./src/**/*"
14 ]
15}
Pricing
Pricing model
Pay per resultThis Actor is paid per result. You are not charged for the Apify platform usage, but only a fixed price for each dataset of 1,000 items in the Actor outputs.
Price per 1,000 items
$10.00