Skip to content

Commit 8708809

Browse files
authored
Merge pull request #8 from prosdevlab/feat/ollama-extraction-strategy
feat(extract): implement Ollama extraction strategy
2 parents f8fda31 + 24fc43f commit 8708809

36 files changed

+1201
-139
lines changed

AGENTS.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,11 @@ The project is organized as a monorepo using pnpm workspaces:
2626

2727
```
2828
packages/
29-
├── cli/ # CLI entry point and MCP server implementation
30-
├── core/ # Shared types, interfaces, and core logic
31-
├── extract/ # Document extraction logic (AI integration)
32-
└── vector-store/ # Vector database interaction and semantic search
29+
├── cli/ # CLI entry point and MCP server
30+
├── core/ # Shared types and interfaces
31+
├── extract/ # Document extraction (Gemini, Ollama)
32+
├── storage/ # SQLite persistence (Drizzle ORM)
33+
└── vector-store/ # Vector database for semantic search
3334
```
3435

3536
## Setup Commands

README.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,13 @@ npm install -g doc-agent
2525
doc extract invoice.pdf
2626
```
2727

28-
**With specific provider:**
28+
**With Ollama (local, privacy-first):**
29+
```bash
30+
# Ensure Ollama is running
31+
doc extract invoice.pdf --provider ollama
32+
```
33+
34+
**With Gemini (cloud):**
2935
```bash
3036
export GEMINI_API_KEY=your_key_here
3137
doc extract invoice.pdf --provider gemini

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
"@types/node": "^24.10.1",
4949
"@vitest/coverage-v8": "^4.0.15",
5050
"lint-staged": "16.2.7",
51+
"tsup": "^8.5.1",
5152
"typescript": "^5.9.3",
5253
"vitest": "^4.0.15"
5354
}

packages/cli/README.md

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# @doc-agent/cli
2+
3+
Command-line interface and MCP server for document extraction.
4+
5+
## Use cases
6+
7+
- Extract data from documents via terminal
8+
- Integrate with Claude Desktop or Cursor via MCP
9+
- Batch process directories of documents (planned)
10+
11+
## Commands
12+
13+
| Command | Description |
14+
|---------|-------------|
15+
| `doc extract <file>` | Extract structured data from PDF/image |
16+
| `doc mcp` | Start MCP server for AI assistant integration |
17+
| `doc search <query>` | Search indexed documents (planned) |
18+
| `doc index <dir>` | Batch index directory (planned) |
19+
20+
## Options
21+
22+
```
23+
extract:
24+
-p, --provider <provider> AI provider: gemini, openai, ollama (default: ollama)
25+
-m, --model <model> Model name (default: llama3.2-vision)
26+
```
27+
28+
## Environment variables
29+
30+
| Variable | Required for |
31+
|----------|--------------|
32+
| `GEMINI_API_KEY` | `--provider gemini` |
33+
| `OPENAI_API_KEY` | `--provider openai` |
34+
35+
## MCP tools
36+
37+
When running `doc mcp`, exposes:
38+
- `extract_document` — Extract data from a file path
39+
- `search_documents` — Search indexed documents (planned)
40+
41+
## Depends on
42+
43+
- `@doc-agent/core` — Types
44+
- `@doc-agent/extract` — Extraction logic
45+
- `@doc-agent/vector-store` — Search (planned)
46+

packages/cli/package.json

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
"doc-agent": "./dist/cli.js"
1111
},
1212
"scripts": {
13-
"build": "tsc",
13+
"build": "tsup",
1414
"dev": "tsx src/cli.ts",
1515
"mcp": "tsx src/mcp/server.ts"
1616
},
@@ -23,7 +23,8 @@
2323
"chalk": "^5.6.2",
2424
"commander": "^14.0.2",
2525
"ora": "^9.0.0",
26-
"vectordb": "^0.21.2"
26+
"vectordb": "^0.21.2",
27+
"zod": "^3.23.8"
2728
},
2829
"devDependencies": {
2930
"@types/node": "^24.10.1",

packages/cli/src/mcp/index.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,5 @@
1-
export { startMCPServer } from './server.js';
1+
// Server
2+
3+
// SDK re-exports (for consumers who need MCP types)
4+
export { McpServer, StdioServerTransport } from './sdk';
5+
export { startMCPServer } from './server';

packages/cli/src/mcp/sdk.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/**
2+
* MCP SDK re-exports
3+
* Barrel file to provide clean imports without .js extensions
4+
*/
5+
6+
// Server
7+
export { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
8+
export { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';

packages/cli/src/mcp/server.ts

Lines changed: 36 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,123 +1,63 @@
11
import type { Config } from '@doc-agent/core';
22
import { extractDocument } from '@doc-agent/extract';
3-
import { Server } from '@modelcontextprotocol/sdk/server';
4-
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
5-
import {
6-
type CallToolRequest,
7-
CallToolRequestSchema,
8-
ListToolsRequestSchema,
9-
} from '@modelcontextprotocol/sdk/types.js';
3+
import { z } from 'zod';
4+
import { McpServer, StdioServerTransport } from './sdk';
105

11-
const server = new Server(
12-
{
13-
name: 'doc-agent',
14-
version: '0.1.0',
15-
},
16-
{
17-
capabilities: {
18-
tools: {},
19-
},
20-
}
21-
);
22-
23-
// List available tools
24-
server.setRequestHandler(ListToolsRequestSchema, async () => {
25-
return {
26-
tools: [
27-
{
28-
name: 'extract_document',
29-
description: 'Extract structured data from invoice, receipt, or bank statement',
30-
inputSchema: {
31-
type: 'object',
32-
properties: {
33-
filepath: {
34-
type: 'string',
35-
description: 'Path to the document file',
36-
},
37-
provider: {
38-
type: 'string',
39-
enum: ['gemini', 'openai', 'ollama'],
40-
description: 'AI provider to use',
41-
default: 'gemini',
42-
},
43-
},
44-
required: ['filepath'],
45-
},
46-
},
47-
{
48-
name: 'search_documents',
49-
description: 'Search indexed documents using natural language',
50-
inputSchema: {
51-
type: 'object',
52-
properties: {
53-
query: {
54-
type: 'string',
55-
description: 'Search query in natural language',
56-
},
57-
limit: {
58-
type: 'number',
59-
description: 'Maximum number of results',
60-
default: 10,
61-
},
62-
},
63-
required: ['query'],
64-
},
65-
},
66-
],
67-
};
6+
const server = new McpServer({
7+
name: 'doc-agent',
8+
version: '0.1.0',
689
});
6910

70-
// Handle tool calls
71-
server.setRequestHandler(CallToolRequestSchema, async (request: CallToolRequest) => {
72-
if (request.params.name === 'extract_document') {
73-
const { filepath, provider = 'gemini' } = request.params.arguments as {
74-
filepath: string;
75-
provider?: string;
76-
};
77-
11+
// Register extract_document tool
12+
server.registerTool(
13+
'extract_document',
14+
{
15+
description: 'Extract structured data from invoice, receipt, or bank statement',
16+
inputSchema: {
17+
filepath: z.string().describe('Path to the document file'),
18+
provider: z
19+
.enum(['gemini', 'openai', 'ollama'])
20+
.default('gemini')
21+
.describe('AI provider to use'),
22+
},
23+
},
24+
async ({ filepath, provider }) => {
7825
const config: Config = {
79-
aiProvider: provider as 'gemini' | 'openai' | 'ollama',
26+
aiProvider: provider,
8027
geminiApiKey: process.env.GEMINI_API_KEY,
8128
openaiApiKey: process.env.OPENAI_API_KEY,
8229
};
8330

8431
try {
8532
const result = await extractDocument(filepath, config);
86-
8733
return {
88-
content: [
89-
{
90-
type: 'text',
91-
text: JSON.stringify(result, null, 2),
92-
},
93-
],
34+
content: [{ type: 'text', text: JSON.stringify(result, null, 2) }],
9435
};
9536
} catch (error) {
9637
return {
97-
content: [
98-
{
99-
type: 'text',
100-
text: `Error: ${(error as Error).message}`,
101-
},
102-
],
38+
content: [{ type: 'text', text: `Error: ${(error as Error).message}` }],
10339
isError: true,
10440
};
10541
}
10642
}
43+
);
10744

108-
if (request.params.name === 'search_documents') {
45+
// Register search_documents tool
46+
server.registerTool(
47+
'search_documents',
48+
{
49+
description: 'Search indexed documents using natural language',
50+
inputSchema: {
51+
query: z.string().describe('Search query in natural language'),
52+
limit: z.number().default(10).describe('Maximum number of results'),
53+
},
54+
},
55+
async () => {
10956
return {
110-
content: [
111-
{
112-
type: 'text',
113-
text: 'Search functionality not yet implemented',
114-
},
115-
],
57+
content: [{ type: 'text', text: 'Search functionality not yet implemented' }],
11658
};
11759
}
118-
119-
throw new Error(`Unknown tool: ${request.params.name}`);
120-
});
60+
);
12161

12262
export async function startMCPServer() {
12363
const transport = new StdioServerTransport();

packages/cli/tsconfig.json

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
"rootDir": "./src"
66
},
77
"include": ["src/**/*"],
8-
"exclude": ["node_modules", "dist"],
9-
"references": [{ "path": "../core" }, { "path": "../extract" }, { "path": "../vector-store" }]
8+
"exclude": ["node_modules", "dist", "**/*.test.ts"],
9+
"references": [{ "path": "../core" }, { "path": "../extract" }, { "path": "../vector-store" }],
10+
"ts-node": {
11+
"esm": true
12+
}
1013
}

packages/cli/tsup.config.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import { defineConfig } from 'tsup';
2+
3+
export default defineConfig({
4+
entry: {
5+
cli: 'src/cli.ts',
6+
'mcp/server': 'src/mcp/server.ts',
7+
'mcp/index': 'src/mcp/index.ts',
8+
},
9+
format: ['esm'],
10+
dts: false,
11+
sourcemap: true,
12+
clean: true,
13+
splitting: false,
14+
treeshake: true,
15+
external: [
16+
'@doc-agent/core',
17+
'@doc-agent/extract',
18+
'@doc-agent/vector-store',
19+
'@google/generative-ai',
20+
'@modelcontextprotocol/sdk',
21+
'chalk',
22+
'commander',
23+
'ora',
24+
'vectordb',
25+
],
26+
tsconfig: './tsconfig.json',
27+
});

0 commit comments

Comments
 (0)