
CSV File to Dataset
Pricing
Pay per usage
Go to Store

CSV File to Dataset
Upload a local or remote CSV/text file and convert it to Apify Dataset for further use.
0.0 (0)
Pricing
Pay per usage
5
Total users
93
Monthly users
11
Runs succeeded
>99%
Last modified
8 months ago
.actor/Dockerfile
# Specify the base Docker image. You can read more about# the available images at https://docs.apify.com/sdk/js/docs/guides/docker-images# You can also use any other image from Docker Hub.FROM apify/actor-node:18
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Next, copy the remaining files and directories with the source code.# Since we do this after NPM install, quick build will be really fast# for most source file changes.COPY . ./
# Run the image.CMD npm start --silent
.actor/actor.json
{ "actorSpecification": 1, "name": "my-actor-25", "title": "Project Cheerio Crawler Javascript", "description": "Crawlee and Cheerio project in javascript.", "version": "0.0", "meta": { "templateId": "js-crawlee-cheerio" }, "input": "./input_schema.json", "dockerfile": "./Dockerfile"}
.actor/input_schema.json
{ "title": "PlaywrightCrawler Template", "type": "object", "schemaVersion": 1, "properties": { "csvUrls": { "title": "Upload or link a CSV or text file", "type": "array", "description": "Upload or link a CSV with data", "editor": "requestListSources" }, "separator": { "title": "Column separator", "type": "string", "default": ",", "description": "Usually `,` or `;`", "editor": "textfield" } }}
src/main.js
1import { Actor, log } from 'apify';2
3import { gotScraping } from 'got-scraping';4import neatCsv from 'neat-csv';5// Initialize the Apify SDK6await Actor.init();7
8const { csvUrls, separator = ',' } = await Actor.getValue('INPUT');9
10const urls = csvUrls.map((req) => req?.url || req?.requestsFromUrl).filter(Boolean);11
12await Actor.setStatusMessage(`Received ${urls.length} CSV URLs. Starting download.`);13
14for (const url of urls) {15 const { body } = await gotScraping(url);16 let data;17 try {18 data = await neatCsv(body.toString(), { separator });19 } catch (e) {20 await Actor.fail(`Could not convert file to CSV with error: ${e}`)21 }22 await Actor.setStatusMessage(`Received ${data.length} rows from ${url}. Starting to push to the dataset, this might take a while.`);23 await Actor.pushData(data);24}25
26await Actor.exit(`CSV succefully converted to a dataset with ID: ${Actor.getEnv().defaultDatasetId}`);
.dockerignore
# configurations.idea
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.eslintrc
{ "extends": "@apify", "root": true}
.gitignore
# This file tells Git which files shouldn't be added to source control
.DS_Store.ideadistnode_modulesapify_storagestorage
package.json
{ "name": "crawlee-cheerio-javascript", "version": "0.0.1", "type": "module", "description": "This is a boilerplate of an Apify actor.", "engines": { "node": ">=18.0.0" }, "dependencies": { "apify": "^3.1.10", "crawlee": "^3.5.4", "neat-csv": "^7.0.0" }, "devDependencies": { "@apify/eslint-config": "^0.4.0", "eslint": "^8.50.0" }, "scripts": { "start": "node src/main.js", "lint": "eslint ./src --ext .js,.jsx", "lint:fix": "eslint ./src --ext .js,.jsx --fix", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}