ArXiv MCP server avatar
ArXiv MCP server

Pricing

Pay per usage

Go to Store
ArXiv MCP server

ArXiv MCP server

Developed by

Jakub Kopecký

Jakub Kopecký

Maintained by Community

The ArXiv MCP server provides a bridge between AI assistants and arXiv's research repository through the Model Context Protocol (MCP). It allows AI models to search for papers and access their content in a programmatic way.

0.0 (0)

Pricing

Pay per usage

0

Total users

1

Monthly users

0

Last modified

3 days ago

.dockerignore

# configurations
.idea
.vscode
# crawlee and apify storage folders
apify_storage
crawlee_storage
storage
# installed files
node_modules
# git folder
.git
# dist folder
dist

.editorconfig

root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
end_of_line = lf

.gitignore

# This file tells Git which files shouldn't be added to source control
.idea
.vscode
.zed
storage
apify_storage
crawlee_storage
node_modules
dist
tsconfig.tsbuildinfo
# Added by Apify CLI
.venv

biome.json

{
"$schema": "https://biomejs.dev/schemas/1.9.4/schema.json",
"vcs": {
"enabled": false,
"clientKind": "git",
"useIgnoreFile": false
},
"files": {
"ignoreUnknown": false,
"ignore": []
},
"formatter": {
"enabled": true,
"indentStyle": "tab"
},
"organizeImports": {
"enabled": true
},
"linter": {
"enabled": true,
"rules": {
"recommended": true
}
},
"javascript": {
"formatter": {
"quoteStyle": "double"
}
}
}

package.json

{
"name": "actor-arxiv-mcp-server",
"version": "0.0.1",
"type": "module",
"description": "The ArXiv MCP Server provides a bridge between AI assistants and arXiv's research repository through the Model Context Protocol (MCP). It allows AI models to search for papers and access their content in a programmatic way.",
"engines": {
"node": ">=20.0.0"
},
"dependencies": {
"apify": "^3.4.1"
},
"devDependencies": {
"@apify/tsconfig": "^0.1.1",
"@biomejs/biome": "1.9.4",
"typescript": "^5.8.3"
},
"scripts": {
"start": "npm run start:dev",
"start:prod": "node dist/main.js",
"start:dev": "tsx src/main.ts",
"build": "tsc",
"lint": "biome lint",
"lint:fix": "biome lint --write",
"format": "biome format",
"format:fix": "biome format --write",
"test": "echo \"Error: oops, the actor has no tests yet, sad!\" && exit 1"
},
"author": "It's not you it's me",
"license": "ISC"
}

tsconfig.json

{
"extends": "@apify/tsconfig",
"compilerOptions": {
"module": "NodeNext",
"moduleResolution": "NodeNext",
"target": "ES2022",
"outDir": "dist",
"noUnusedLocals": false,
"skipLibCheck": true,
"lib": ["DOM"]
},
"include": ["./src/**/*"]
}

.actor/Dockerfile

FROM node:22-alpine AS builder
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
# Install all dependencies. Don't audit to speed up the installation.
RUN npm install --include=dev --audit=false
# Next, copy the source files using the user set
# in the base image.
COPY . ./
# Install all dependencies and build the project.
# Don't audit to speed up the installation.
RUN npm run build
# Create final image
FROM node:22-alpine
# Install supergateway
RUN npm i -g supergateway
# Install uv
RUN apk add py3-uv
# Create and run as a non-root user.
RUN adduser -h /home/apify -D apify
# Create app directory
RUN mkdir -p /home/apify/app
WORKDIR /home/apify/app
# Copy built JS files from builder image
COPY --from=builder /dist ./dist
# Copy just package.json and package-lock.json
# to speed up the build using Docker layer cache.
COPY package*.json ./
RUN chown -R apify:apify /home/apify/app
USER apify
# Install NPM packages, skip optional and development dependencies to
# keep the image small. Avoid logging too much and print the dependency
# tree for debugging
RUN npm --quiet set progress=false \
&& npm install --omit=dev --omit=optional \
&& echo "Installed NPM packages:" \
&& (npm list --omit=dev --all || true) \
&& echo "Node.js version:" \
&& node --version \
&& echo "NPM version:" \
&& npm --version \
&& rm -r ~/.npm
# Install the MCP server
RUN uv tool install arxiv-mcp-server
# Tell Node.js this is a production environemnt
ENV NODE_ENV=production
# Enable Node.js process to use a lot of memory (Actor has limit of 32GB)
# Increases default size of headers. The original limit was 80kb, but from node 10+ they decided to lower it to 8kb.
# However they did not think about all the sites there with large headers,
# so we put back the old limit of 80kb, which seems to work just fine.
ENV NODE_OPTIONS="--max_old_space_size=30000 --max-http-header-size=80000"
# Run the image.
#CMD ["npm", "run", "start:prod", "--silent"]
CMD ["node", "dist/main.js", "--silent"]

.actor/actor.json

{
"actorSpecification": 1,
"name": "arxiv-mcp-server",
"title": "Arxiv MCP server",
"description": " A Model Context Protocol server for searching and analyzing arXiv papers.",
"version": "0.0",
"buildTag": "latest",
"usesStandbyMode": true,
"meta": {
"templateId": "ts-standby"
},
"input": {
"title": "Actor input schema",
"description": "This is actor input schema",
"type": "object",
"schemaVersion": 1,
"properties": {},
"required": []
},
"dockerfile": "./Dockerfile"
}

src/main.ts

1/**
2 * ArXiv MCP Server - Main Entry Point
3 *
4 * This file serves as the entry point for the ArXiv MCP Server Actor.
5 * It sets up a proxy server that forwards requests to the locally running
6 * ArXiv MCP server, which provides a Model Context Protocol (MCP) interface
7 * for AI assistants to search and access arXiv papers.
8 */
9
10import { spawn } from "node:child_process";
11import http from "node:http";
12// Apify SDK - toolkit for building Apify Actors (Read more at https://docs.apify.com/sdk/js/)
13import { Actor, log } from "apify";
14import { waitForServer } from "./utils.js";
15
16// This is an ESM project, and as such, it requires you to specify extensions in your relative imports
17// Read more about this here: https://nodejs.org/docs/latest-v18.x/api/esm.html#mandatory-file-extensions
18// Note that we need to use `.js` even when inside TS files
19// import { router } from './routes.js';
20
21
22// Configuration constants for the MCP server
23const MCP_SERVER_PORT = 3000; // Port on which the MCP server will run locally
24const MCP_COMMAND = "uv tool run arxiv-mcp-server"; // Command to run the ArXiv MCP server using uv package manager
25
26// Check if the Actor is running in standby mode
27const STANDBY_MODE = Actor.getEnv().metaOrigin === 'STANDBY';
28
29
30// Initialize the Apify Actor environment
31// The init() call configures the Actor for its environment. It's recommended to start every Actor with an init()
32await Actor.init();
33
34if (!STANDBY_MODE) {
35 // If the Actor is not in standby mode, we should not run the MCP server
36 await Actor.fail('This actor is not meant to be run directly. It should be run in standby mode.');
37}
38
39/**
40 * Spawn the supergateway process that will run the ArXiv MCP server
41 *
42 * supergateway is used to:
43 * - Execute the ArXiv MCP server command
44 * - Configure server endpoints
45 * - Handle server-sent events (SSE) for real-time communication
46 */
47const child = spawn(
48 "supergateway",
49 [
50 "--stdio",
51 `"${MCP_COMMAND}"`, // Command to execute the ArXiv MCP server
52 "--port",
53 `${MCP_SERVER_PORT}`, // Port for the MCP server to listen on
54 "--ssePath",
55 "/sse", // Endpoint for server-sent events
56 "--messagePath",
57 "/message", // Endpoint for message handling
58 "--healthEndpoint",
59 "/health", // Endpoint for health checks
60 ],
61 {
62 stdio: "inherit", // Inherit stdio streams to see logs in the Actor console
63 shell: true, // Run command in a shell to support complex commands
64 },
65);
66
67// Set up error handling for the child process
68child.on("error", (err) => {
69 log.error("Child process error:", err);
70});
71
72// Log when the child process exits
73child.on("exit", (code) => {
74 log.info(`Child process exited with code ${code}`);
75});
76
77/**
78 * Cleanup function to gracefully terminate the child process
79 * This ensures resources are properly released when the Actor stops
80 */
81function cleanup() {
82 log.info("Cleaning up before exit...");
83 child.kill("SIGINT"); // Send SIGINT signal to allow graceful shutdown
84}
85
86// Register cleanup handlers for various termination scenarios
87Actor.on("exit", () => {
88 cleanup();
89});
90Actor.on("aborting", () => {
91 cleanup();
92});
93process.on("SIGINT", () => {
94 cleanup();
95 process.exit(0);
96});
97
98// Wait for the MCP server to start and become available
99log.info("Waiting for MCP server to start...");
100const mcpServerHealthEndpoint = `http://localhost:${MCP_SERVER_PORT}/health`;
101// Poll the health endpoint until it responds or times out (30 seconds with 1-second intervals)
102await waitForServer(mcpServerHealthEndpoint, 30000, 1000);
103log.info("MCP server is online!");
104
105/**
106 * Create an HTTP server that acts as a proxy between external clients and the local MCP server
107 * This allows the Actor to expose the MCP server to the internet through the Apify platform
108 */
109const server = http.createServer((req, res) => {
110 // Handle Apify readiness probe for platform health checks
111 if (req.headers["x-apify-container-server-readiness-probe"]) {
112 res.writeHead(200);
113 res.end("ok");
114 return;
115 }
116
117 // Set up options for proxying the request to the local MCP server
118 const options = {
119 hostname: "localhost",
120 port: MCP_SERVER_PORT,
121 path: req.url, // Forward the same URL path
122 method: req.method, // Use the same HTTP method
123 headers: req.headers, // Forward all headers
124 };
125
126 // Create a request to the local MCP server
127 const proxyReq = http.request(options, (proxyRes) => {
128 // Ensure we received a valid response from the backend
129 if (!proxyRes.statusCode) {
130 res.writeHead(500);
131 res.end("No status code from backend");
132 return;
133 }
134
135 // Forward the backend response headers and status code to the client
136 res.writeHead(proxyRes.statusCode, proxyRes.headers);
137
138 // Pipe the backend response body directly to the client
139 proxyRes.pipe(res, { end: true });
140 });
141
142 // Pipe the client request body to the backend request
143 req.pipe(proxyReq, { end: true });
144
145 // Handle errors in the proxy request
146 proxyReq.on("error", (err) => {
147 res.writeHead(500);
148 res.end(`Proxy error: ${err.message}`);
149 });
150});
151
152// Start the HTTP server on the port provided by the Apify platform
153server.listen(Actor.config.get("standbyPort"));

src/utils.ts

1export async function waitForServer(
2 url: string,
3 timeoutMs = 30000,
4 intervalMs = 1000,
5): Promise<void> {
6 const start = Date.now();
7
8 while (Date.now() - start < timeoutMs) {
9 try {
10 const response = await fetch(url);
11 if (response.status === 200) {
12 return;
13 }
14 } catch (error) {
15 // Ignore errors (server not up yet)
16 }
17 await new Promise((resolve) => setTimeout(resolve, intervalMs));
18 }
19
20 throw new Error(`Server at ${url} not online after ${timeoutMs}ms`);
21}