
ArXiv MCP server
Pricing
Pay per usage
Go to Store

ArXiv MCP server
The ArXiv MCP server provides a bridge between AI assistants and arXiv's research repository through the Model Context Protocol (MCP). It allows AI models to search for papers and access their content in a programmatic way.
0.0 (0)
Pricing
Pay per usage
0
Total users
1
Monthly users
0
Last modified
3 days ago
.dockerignore
# configurations.idea.vscode
# crawlee and apify storage foldersapify_storagecrawlee_storagestorage
# installed filesnode_modules
# git folder.git
# dist folderdist
.editorconfig
root = true
[*]indent_style = spaceindent_size = 4charset = utf-8trim_trailing_whitespace = trueinsert_final_newline = trueend_of_line = lf
.gitignore
# This file tells Git which files shouldn't be added to source control
.idea.vscode.zedstorageapify_storagecrawlee_storagenode_modulesdisttsconfig.tsbuildinfo
# Added by Apify CLI.venv
biome.json
{ "$schema": "https://biomejs.dev/schemas/1.9.4/schema.json", "vcs": { "enabled": false, "clientKind": "git", "useIgnoreFile": false }, "files": { "ignoreUnknown": false, "ignore": [] }, "formatter": { "enabled": true, "indentStyle": "tab" }, "organizeImports": { "enabled": true }, "linter": { "enabled": true, "rules": { "recommended": true } }, "javascript": { "formatter": { "quoteStyle": "double" } }}
package.json
{ "name": "actor-arxiv-mcp-server", "version": "0.0.1", "type": "module", "description": "The ArXiv MCP Server provides a bridge between AI assistants and arXiv's research repository through the Model Context Protocol (MCP). It allows AI models to search for papers and access their content in a programmatic way.", "engines": { "node": ">=20.0.0" }, "dependencies": { "apify": "^3.4.1" }, "devDependencies": { "@apify/tsconfig": "^0.1.1", "@biomejs/biome": "1.9.4", "typescript": "^5.8.3" }, "scripts": { "start": "npm run start:dev", "start:prod": "node dist/main.js", "start:dev": "tsx src/main.ts", "build": "tsc", "lint": "biome lint", "lint:fix": "biome lint --write", "format": "biome format", "format:fix": "biome format --write", "test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1" }, "author": "It's not you it's me", "license": "ISC"}
tsconfig.json
{ "extends": "@apify/tsconfig", "compilerOptions": { "module": "NodeNext", "moduleResolution": "NodeNext", "target": "ES2022", "outDir": "dist", "noUnusedLocals": false, "skipLibCheck": true, "lib": ["DOM"] }, "include": ["./src/**/*"]}
.actor/Dockerfile
FROM node:22-alpine AS builder
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./
# Install all dependencies. Don't audit to speed up the installation.RUN npm install --include=dev --audit=false
# Next, copy the source files using the user set# in the base image.COPY . ./
# Install all dependencies and build the project.# Don't audit to speed up the installation.RUN npm run build
# Create final imageFROM node:22-alpine
# Install supergatewayRUN npm i -g supergateway# Install uvRUN apk add py3-uv
# Create and run as a non-root user.RUN adduser -h /home/apify -D apify# Create app directoryRUN mkdir -p /home/apify/appWORKDIR /home/apify/app
# Copy built JS files from builder imageCOPY /dist ./dist
# Copy just package.json and package-lock.json# to speed up the build using Docker layer cache.COPY package*.json ./RUN chown -R apify:apify /home/apify/appUSER apify
# Install NPM packages, skip optional and development dependencies to# keep the image small. Avoid logging too much and print the dependency# tree for debuggingRUN npm --quiet set progress=false \ && npm install --omit=dev --omit=optional \ && echo "Installed NPM packages:" \ && (npm list --omit=dev --all || true) \ && echo "Node.js version:" \ && node --version \ && echo "NPM version:" \ && npm --version \ && rm -r ~/.npm
# Install the MCP serverRUN uv tool install arxiv-mcp-server
# Tell Node.js this is a production environemntENV NODE_ENV=production
# Enable Node.js process to use a lot of memory (Actor has limit of 32GB)# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb.# However they did not think about all the sites there with large headers,# so we put back the old limit of 80kb, which seems to work just fine.ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000"
# Run the image.#CMD ["npm", "run", "start:prod", "--silent"]CMD ["node", "dist/main.js", "--silent"]
.actor/actor.json
{ "actorSpecification": 1, "name": "arxiv-mcp-server", "title": "Arxiv MCP server", "description": " A Model Context Protocol server for searching and analyzing arXiv papers.", "version": "0.0", "buildTag": "latest", "usesStandbyMode": true, "meta": { "templateId": "ts-standby" }, "input": { "title": "Actor input schema", "description": "This is actor input schema", "type": "object", "schemaVersion": 1, "properties": {}, "required": [] }, "dockerfile": "./Dockerfile"}
src/main.ts
1/**2 * ArXiv MCP Server - Main Entry Point3 *4 * This file serves as the entry point for the ArXiv MCP Server Actor.5 * It sets up a proxy server that forwards requests to the locally running6 * ArXiv MCP server, which provides a Model Context Protocol (MCP) interface7 * for AI assistants to search and access arXiv papers.8 */9
10import { spawn } from "node:child_process";11import http from "node:http";12// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/)13import { Actor, log } from "apify";14import { waitForServer } from "./utils.js";15
16// This is an ESM project, and as such, it requires you to specify extensions in your relative imports17// Read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions18// Note that we need to use `.js` even when inside TS files19// import { router } from './routes.js';20
21
22// Configuration constants for the MCP server23const MCP_SERVER_PORT = 3000; // Port on which the MCP server will run locally24const MCP_COMMAND = "uv tool run arxiv-mcp-server"; // Command to run the ArXiv MCP server using uv package manager25
26// Check if the Actor is running in standby mode27const STANDBY_MODE = Actor.getEnv().metaOrigin === 'STANDBY';28
29
30// Initialize the Apify Actor environment31// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()32await Actor.init();33
34if (!STANDBY_MODE) {35 // If the Actor is not in standby mode, we should not run the MCP server36 await Actor.fail('This actor is not meant to be run directly. It should be run in standby mode.');37}38
39/**40 * Spawn the supergateway process that will run the ArXiv MCP server41 *42 * supergateway is used to:43 * - Execute the ArXiv MCP server command44 * - Configure server endpoints45 * - Handle server-sent events (SSE) for real-time communication46 */47const child = spawn(48 "supergateway",49 [50 "--stdio",51 `"${MCP_COMMAND}"`, // Command to execute the ArXiv MCP server52 "--port",53 `${MCP_SERVER_PORT}`, // Port for the MCP server to listen on54 "--ssePath",55 "/sse", // Endpoint for server-sent events56 "--messagePath",57 "/message", // Endpoint for message handling58 "--healthEndpoint",59 "/health", // Endpoint for health checks60 ],61 {62 stdio: "inherit", // Inherit stdio streams to see logs in the Actor console63 shell: true, // Run command in a shell to support complex commands64 },65);66
67// Set up error handling for the child process68child.on("error", (err) => {69 log.error("Child process error:", err);70});71
72// Log when the child process exits73child.on("exit", (code) => {74 log.info(`Child process exited with code ${code}`);75});76
77/**78 * Cleanup function to gracefully terminate the child process79 * This ensures resources are properly released when the Actor stops80 */81function cleanup() {82 log.info("Cleaning up before exit...");83 child.kill("SIGINT"); // Send SIGINT signal to allow graceful shutdown84}85
86// Register cleanup handlers for various termination scenarios87Actor.on("exit", () => {88 cleanup();89});90Actor.on("aborting", () => {91 cleanup();92});93process.on("SIGINT", () => {94 cleanup();95 process.exit(0);96});97
98// Wait for the MCP server to start and become available99log.info("Waiting for MCP server to start...");100const mcpServerHealthEndpoint = `http://localhost:${MCP_SERVER_PORT}/health`;101// Poll the health endpoint until it responds or times out (30 seconds with 1-second intervals)102await waitForServer(mcpServerHealthEndpoint, 30000, 1000);103log.info("MCP server is online!");104
105/**106 * Create an HTTP server that acts as a proxy between external clients and the local MCP server107 * This allows the Actor to expose the MCP server to the internet through the Apify platform108 */109const server = http.createServer((req, res) => {110 // Handle Apify readiness probe for platform health checks111 if (req.headers["x-apify-container-server-readiness-probe"]) {112 res.writeHead(200);113 res.end("ok");114 return;115 }116
117 // Set up options for proxying the request to the local MCP server118 const options = {119 hostname: "localhost",120 port: MCP_SERVER_PORT,121 path: req.url, // Forward the same URL path122 method: req.method, // Use the same HTTP method123 headers: req.headers, // Forward all headers124 };125
126 // Create a request to the local MCP server127 const proxyReq = http.request(options, (proxyRes) => {128 // Ensure we received a valid response from the backend129 if (!proxyRes.statusCode) {130 res.writeHead(500);131 res.end("No status code from backend");132 return;133 }134
135 // Forward the backend response headers and status code to the client136 res.writeHead(proxyRes.statusCode, proxyRes.headers);137
138 // Pipe the backend response body directly to the client139 proxyRes.pipe(res, { end: true });140 });141
142 // Pipe the client request body to the backend request143 req.pipe(proxyReq, { end: true });144
145 // Handle errors in the proxy request146 proxyReq.on("error", (err) => {147 res.writeHead(500);148 res.end(`Proxy error: ${err.message}`);149 });150});151
152// Start the HTTP server on the port provided by the Apify platform153server.listen(Actor.config.get("standbyPort"));
src/utils.ts
1export async function waitForServer(2 url: string,3 timeoutMs = 30000,4 intervalMs = 1000,5): Promise<void> {6 const start = Date.now();7
8 while (Date.now() - start < timeoutMs) {9 try {10 const response = await fetch(url);11 if (response.status === 200) {12 return;13 }14 } catch (error) {15 // Ignore errors (server not up yet)16 }17 await new Promise((resolve) => setTimeout(resolve, intervalMs));18 }19
20 throw new Error(`Server at ${url} not online after ${timeoutMs}ms`);21}