Get all tokens name and address contract from Bloxy
- Modified
- Users5
- Runs454
Dockerfile
# This is a template for a Dockerfile used to run acts in Actor system.
# The base image name below is set during the act build, based on user settings.
# IMPORTANT: The base image must set a correct working directory, such as /usr/src/app or /home/user
FROM apify/actor-node-puppeteer-chrome
# Second, copy just package.json and package-lock.json since it should be
# the only file that affects "npm install" in the next step, to speed up the build
COPY package*.json ./
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --only=prod --no-optional \
&& echo "Installed NPM packages:" \
&& (npm list --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version
# Copy source code to container
# Do this in the last step, to have fast build if only the source code changed
COPY . ./
# NOTE: The CMD is already defined by the base image.
# Uncomment this for local node inspector debugging:
# CMD [ "node", "--inspect=0.0.0.0:9229", "main.js" ]
package.json
{
"name": "apify-project",
"version": "0.0.1",
"description": "",
"author": "It's not you it's me",
"license": "ISC",
"dependencies": {
"apify": "latest"
},
"scripts": {
"start": "node main.js"
}
}
main.js
const Apify = require('apify');
Apify.main(async () => {
// Apify.openRequestQueue() creates a preconfigured RequestQueue instance.
// We add our first request to it - the initial page the crawler will visit.
const requestQueue = await Apify.openRequestQueue();
await requestQueue.addRequest({ url: 'https://bloxy.info/list_tokens/ERC20?page=1' });
// Create an instance of the PlaywrightCrawler class - a crawler
// that automatically loads the URLs in headless Chrome / Playwright.
const input = await Apify.getInput();
const crawler = new Apify.PuppeteerCrawler({
requestQueue,
launchContext: {
// Here you can set options that are passed to the playwright .launch() function.
launchOptions: {
headless: true,
},
},
// Stop crawling after several pages
maxRequestsPerCrawl: input.maxPage || 50,
handlePageTimeoutSecs: 30,
handlePageFunction: async ({ request, page }) => {
console.log(`Processing ${request.url}...`);
// A function to be evaluated by Playwright within the browser context.
const data = await page.$$eval('tbody tr', $posts => {
const scrapedData = [];
// // We're getting the title, rank and URL of each post on Hacker News.
$posts.forEach($post => {
let linkContract = $post.querySelector('td:nth-child(1) a').href
let contract = linkContract.split('token_holders/')[1]
scrapedData.push({
title: $post.querySelector('td:nth-child(1)').textContent.trim(),
contract,
type: $post.querySelector('td:nth-child(2)').textContent.trim(),
senders_receivers: $post.querySelector('td:nth-child(3)').textContent.trim(),
transfers7days: $post.querySelector('td:nth-child(4)').textContent.trim(),
volume7days: $post.querySelector('td:nth-child(5)').textContent.trim(),
topTransferTx: $post.querySelector('td:nth-child(6)').textContent.trim(),
});
});
return scrapedData;
});
console.log(data);
// Store the results to the default dataset.
await Apify.pushData(data);
// Find a link to the next page and enqueue it if it exists.
const infos = await Apify.utils.enqueueLinks({
page,
requestQueue,
selector: '.next_page a',
});
if (infos.length === 0) console.log(`${request.url} is the last page!`);
},
// This function is called if the page processing failed more than maxRequestRetries+1 times.
handleFailedRequestFunction: async ({ request }) => {
console.log(`Request ${request.url} failed too many times.`);
},
});
// Run the crawler and wait for it to finish.
await crawler.run();
console.log('Crawler finished.');
});