This is the companion code post to ← Production-Grade RAG with Spring AI 1.1.0. That article explains how every layer works. This post gives you the complete project — every file, ready to clone and run.
Prerequisites
- Java 21+ and Maven 3.9+
- Docker and Docker Compose (for PostgreSQL + pgvector)
- An OpenAI API key (
export OPENAI_API_KEY=sk-...)
Project Structure
spring-ai-rag/
├── docker-compose.yml
├── init.sql
├── pom.xml
└── src/main/
├── java/com/example/rag/
│ ├── RagApplication.java
│ ├── config/
│ │ └── RagConfig.java
│ ├── controller/
│ │ └── RagController.java
│ ├── ingestion/
│ │ ├── DocumentIngestionService.java
│ │ ├── IngestionException.java
│ │ └── IngestionTracker.java
│ ├── model/
│ │ ├── FaithfulnessResult.java
│ │ ├── IngestionResult.java
│ │ ├── QueryRequest.java
│ │ ├── RagQueryOptions.java
│ │ ├── RagResponse.java
│ │ ├── ScoredDocument.java
│ │ └── SourceAttribution.java
│ └── query/
│ ├── CrossEncoderReranker.java
│ ├── HallucinationGuard.java
│ └── RagQueryService.java
└── resources/
└── application.yml
Step 1 — Infrastructure
# docker-compose.yml
version: "3.9"
services:
postgres:
image: pgvector/pgvector:pg16
environment:
POSTGRES_USER: raguser
POSTGRES_PASSWORD: ragpass
POSTGRES_DB: ragdb
ports:
- "5432:5432"
volumes:
- pgdata:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql
volumes:
pgdata:
-- init.sql
CREATE EXTENSION IF NOT EXISTS vector;
CREATE SCHEMA IF NOT EXISTS rag;
CREATE TABLE IF NOT EXISTS rag.document_chunks (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
content TEXT NOT NULL,
metadata JSONB,
embedding VECTOR(1536)
);
-- HNSW index: fastest for <5 M rows at query time
CREATE INDEX IF NOT EXISTS document_chunks_hnsw_idx
ON rag.document_chunks
USING hnsw (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
Step 2 — pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>spring-ai-rag</artifactId>
<version>1.0.0-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>3.4.5</version>
</parent>
<properties>
<java.version>21</java.version>
<spring-ai.version>1.1.0</spring-ai.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-bom</artifactId>
<version>${spring-ai.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-data-jpa</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-openai-spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-pgvector-store-spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-pdf-document-reader</artifactId>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-registry-prometheus</artifactId>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
Step 3 — application.yml
# src/main/resources/application.yml
spring:
application:
name: spring-ai-rag
datasource:
url: jdbc:postgresql://localhost:5432/ragdb
username: raguser
password: ragpass
driver-class-name: org.postgresql.Driver
jpa:
hibernate:
ddl-auto: none # schema managed by init.sql
ai:
openai:
api-key: ${OPENAI_API_KEY}
chat:
options:
model: gpt-4o
temperature: 0.1
max-tokens: 1024
embedding:
options:
model: text-embedding-3-small # 1536 dims
vectorstore:
pgvector:
index-type: HNSW
distance-type: COSINE_DISTANCE
dimensions: 1536
initialize-schema: false
schema-name: rag
table-name: document_chunks
management:
endpoints:
web:
exposure:
include: health, prometheus, metrics
metrics:
tags:
application: ${spring.application.name}
logging:
level:
org.springframework.ai: INFO
Step 4 — Source Files
// RagApplication.java
package com.example.rag;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class RagApplication {
public static void main(String[] args) {
SpringApplication.run(RagApplication.class, args);
}
}
// config/RagConfig.java
package com.example.rag.config;
import com.example.rag.ingestion.IngestionTracker;
import com.example.rag.query.CrossEncoderReranker;
import com.example.rag.query.HallucinationGuard;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class RagConfig {
@Bean
public IngestionTracker ingestionTracker() {
return new IngestionTracker();
}
@Bean
public CrossEncoderReranker crossEncoderReranker(ChatClient.Builder builder) {
return new CrossEncoderReranker(builder);
}
@Bean
public HallucinationGuard hallucinationGuard(ChatClient.Builder builder) {
return new HallucinationGuard(builder);
}
}
// model/IngestionResult.java
package com.example.rag.model;
public record IngestionResult(String filename, String status, int chunksCreated, String message) {
public static IngestionResult success(String filename, int chunks) {
return new IngestionResult(filename, "ingested", chunks, chunks + " chunks written to vector store.");
}
public static IngestionResult skipped(String filename) {
return new IngestionResult(filename, "skipped", 0, "Document unchanged since last ingestion. Skipped.");
}
}
// model/FaithfulnessResult.java
package com.example.rag.model;
public record FaithfulnessResult(double score, boolean isGrounded) {}
// model/ScoredDocument.java
package com.example.rag.model;
import org.springframework.ai.document.Document;
public record ScoredDocument(Document document, double score) {}
// model/SourceAttribution.java
package com.example.rag.model;
public record SourceAttribution(String filename, String page, String chunkPreview) {}
// model/RagQueryOptions.java
package com.example.rag.model;
public record RagQueryOptions(String metadataFilter) {
public static RagQueryOptions noFilter() { return new RagQueryOptions(null); }
}
// model/QueryRequest.java
package com.example.rag.model;
public record QueryRequest(String question, String metadataFilter) {}
// model/RagResponse.java
package com.example.rag.model;
import org.springframework.ai.document.Document;
import java.util.List;
public record RagResponse(
String answer,
List<SourceAttribution> sources,
double faithfulnessScore,
boolean isGrounded,
String status
) {
public static RagResponse noContext(String msg) {
return new RagResponse(msg, List.of(), 0.0, false, "no_context");
}
public static RagResponse answered(String answer, List<Document> chunks,
double score, boolean grounded) {
List<SourceAttribution> sources = chunks.stream()
.map(d -> new SourceAttribution(
(String) d.getMetadata().getOrDefault("source_file", "unknown"),
String.valueOf(d.getMetadata().getOrDefault("page_number", "?")),
d.getText().substring(0, Math.min(200, d.getText().length())) + "…"))
.toList();
return new RagResponse(answer, sources, score, grounded,
grounded ? "answered" : "ungrounded");
}
}
// ingestion/IngestionException.java
package com.example.rag.ingestion;
public class IngestionException extends RuntimeException {
public IngestionException(String message, Throwable cause) {
super(message, cause);
}
}
// ingestion/IngestionTracker.java
package com.example.rag.ingestion;
import org.springframework.stereotype.Component;
import java.util.concurrent.ConcurrentHashMap;
/**
* Tracks ingested documents by filename → SHA-256 hash.
* In production: swap for a JdbcIngestionTracker backed by a DB table
* so state survives application restarts.
*/
@Component
public class IngestionTracker {
private final ConcurrentHashMap<String, String> hashes = new ConcurrentHashMap<>();
public boolean isAlreadyIngested(String filename, String hash) {
return hash.equals(hashes.get(filename));
}
public void markIngested(String filename, String hash, int chunks) {
hashes.put(filename, hash);
System.out.printf("[IngestionTracker] %s → %d chunks (hash=%s…)%n",
filename, chunks, hash.substring(0, 8));
}
}
// ingestion/DocumentIngestionService.java
package com.example.rag.ingestion;
import com.example.rag.model.IngestionResult;
import io.micrometer.core.instrument.MeterRegistry;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.core.io.Resource;
import org.springframework.stereotype.Service;
import java.security.MessageDigest;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Service
public class DocumentIngestionService {
private final VectorStore vectorStore;
private final MeterRegistry metrics;
private final IngestionTracker tracker;
public DocumentIngestionService(VectorStore vectorStore,
MeterRegistry metrics,
IngestionTracker tracker) {
this.vectorStore = vectorStore;
this.metrics = metrics;
this.tracker = tracker;
}
public IngestionResult ingestPdf(Resource pdf, Map<String, Object> callerMeta) {
String hash = sha256(pdf);
if (tracker.isAlreadyIngested(pdf.getFilename(), hash)) {
return IngestionResult.skipped(pdf.getFilename());
}
try {
// One Document per PDF page preserves page_number metadata
var config = PdfDocumentReaderConfig.builder()
.withPagesPerDocument(1)
.build();
List<Document> pages = new PagePdfDocumentReader(pdf, config).get();
// Enrich each page-document with source metadata
List<Document> enriched = pages.stream().map(doc -> {
var meta = new HashMap<String, Object>(doc.getMetadata());
meta.put("source_file", pdf.getFilename());
meta.put("source_hash", hash);
meta.put("ingested_at", System.currentTimeMillis());
meta.putAll(callerMeta);
return new Document(doc.getText(), meta);
}).toList();
// 512-token chunks, 128-token overlap
var splitter = new TokenTextSplitter(512, 128, 5, 10_000, true);
List<Document> chunks = splitter.apply(enriched);
// Embed + store (Spring AI batches embedding calls internally)
vectorStore.add(chunks);
tracker.markIngested(pdf.getFilename(), hash, chunks.size());
metrics.counter("rag.chunks.ingested").increment(chunks.size());
return IngestionResult.success(pdf.getFilename(), chunks.size());
} catch (Exception ex) {
metrics.counter("rag.ingestion.errors").increment();
throw new IngestionException("Failed to ingest " + pdf.getFilename(), ex);
}
}
private String sha256(Resource r) {
try {
var digest = MessageDigest.getInstance("SHA-256");
return java.util.HexFormat.of()
.formatHex(digest.digest(r.getInputStream().readAllBytes()));
} catch (Exception e) {
throw new RuntimeException("Cannot hash resource", e);
}
}
}
// query/CrossEncoderReranker.java
package com.example.rag.query;
import com.example.rag.model.ScoredDocument;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.document.Document;
import org.springframework.stereotype.Component;
import java.util.List;
/**
* Scores each retrieved chunk's relevance (0–10) using GPT-4o-mini.
* Runs all candidates in parallel to keep wall-clock latency low.
* Production alternative: local ONNX cross-encoder for zero API cost.
*/
@Component
public class CrossEncoderReranker {
private final ChatClient client;
private static final String PROMPT = """
Rate relevance of PASSAGE to QUERY from 0 (irrelevant) to 10 (perfect match).
Respond with ONLY a single integer. No explanation.
QUERY: {query}
PASSAGE: {passage}
SCORE:""";
public CrossEncoderReranker(ChatClient.Builder builder) {
this.client = builder.build();
}
public List<ScoredDocument> rerank(String query, List<Document> candidates) {
return candidates.parallelStream()
.map(doc -> {
try {
String raw = client.prompt()
.user(u -> u.text(PROMPT)
.param("query", query)
.param("passage", doc.getText()
.substring(0, Math.min(512, doc.getText().length()))))
.call().content().trim();
return new ScoredDocument(doc, Double.parseDouble(raw));
} catch (Exception e) {
return new ScoredDocument(doc, 0.0);
}
}).toList();
}
}
// query/HallucinationGuard.java
package com.example.rag.query;
import com.example.rag.model.FaithfulnessResult;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.document.Document;
import org.springframework.stereotype.Component;
import java.util.List;
/**
* LLM judge that scores answer faithfulness to retrieved context (0.0–1.0).
* Answers below 0.7 are flagged as potentially hallucinated.
*/
@Component
public class HallucinationGuard {
private static final double THRESHOLD = 0.7;
private final ChatClient judge;
private static final String PROMPT = """
Rate how faithfully the ANSWER is supported by the CONTEXT.
1.0 = fully supported | 0.5 = partially | 0.0 = contradicted/unsupported
Respond with ONLY a decimal (e.g. 0.87). No explanation.
CONTEXT: {context}
ANSWER: {answer}
FAITHFULNESS SCORE:""";
public HallucinationGuard(ChatClient.Builder builder) {
this.judge = builder.build();
}
public FaithfulnessResult check(String answer, List<Document> chunks) {
String ctx = chunks.stream()
.map(Document::getText)
.reduce("", (a, b) -> a + "n---n" + b);
try {
double score = Double.parseDouble(
judge.prompt()
.user(u -> u.text(PROMPT).param("context", ctx).param("answer", answer))
.call().content().trim());
score = Math.max(0.0, Math.min(1.0, score));
return new FaithfulnessResult(score, score >= THRESHOLD);
} catch (Exception e) {
return new FaithfulnessResult(0.0, false);
}
}
}
// query/RagQueryService.java
package com.example.rag.query;
import com.example.rag.model.*;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.stereotype.Service;
import java.util.Comparator;
import java.util.List;
@Service
public class RagQueryService {
private static final int TOP_K = 20; // wide retrieval net
private static final int TOP_N = 5; // chunks kept after reranking
private final VectorStore vectorStore;
private final ChatClient chatClient;
private final CrossEncoderReranker reranker;
private final HallucinationGuard guard;
public RagQueryService(VectorStore vectorStore,
ChatClient.Builder builder,
CrossEncoderReranker reranker,
HallucinationGuard guard) {
this.vectorStore = vectorStore;
this.chatClient = builder.build();
this.reranker = reranker;
this.guard = guard;
}
public RagResponse query(String question, RagQueryOptions opts) {
// 1. Vector search — cast a wide net
var req = SearchRequest.builder()
.query(question)
.topK(TOP_K)
.similarityThreshold(0.60)
.filterExpression(opts.metadataFilter())
.build();
List<Document> candidates = vectorStore.similaritySearch(req);
if (candidates.isEmpty()) {
return RagResponse.noContext("No relevant documents found for your question.");
}
// 2. Rerank and prune to top N
List<Document> top = reranker.rerank(question, candidates).stream()
.sorted(Comparator.comparingDouble(ScoredDocument::score).reversed())
.limit(TOP_N)
.map(ScoredDocument::document)
.toList();
// 3. Build context with source attribution
var ctx = new StringBuilder();
for (int i = 0; i < top.size(); i++) {
var d = top.get(i);
ctx.append("--- Source ").append(i + 1).append(" ---n")
.append("File: ").append(d.getMetadata().get("source_file")).append("n")
.append("Page: ").append(d.getMetadata().get("page_number")).append("n")
.append(d.getText()).append("nn");
}
// 4. Generate answer — strict grounding prompt
String answer = chatClient.prompt()
.user(u -> u.text("""
Answer ONLY from the CONTEXT below. If the answer is not present, say:
"I don't have enough information in the provided documents."
Do NOT speculate or use outside knowledge.
CONTEXT:
{context}
QUESTION:
{question}
ANSWER:""")
.param("context", ctx.toString())
.param("question", question))
.call().content();
// 5. Faithfulness check
FaithfulnessResult f = guard.check(answer, top);
return RagResponse.answered(answer, top, f.score(), f.isGrounded());
}
}
// controller/RagController.java
package com.example.rag.controller;
import com.example.rag.ingestion.DocumentIngestionService;
import com.example.rag.model.*;
import com.example.rag.query.RagQueryService;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.util.Map;
@RestController
@RequestMapping("/api")
public class RagController {
private final DocumentIngestionService ingestion;
private final RagQueryService query;
public RagController(DocumentIngestionService ingestion, RagQueryService query) {
this.ingestion = ingestion;
this.query = query;
}
/**
* Upload and ingest a PDF.
* curl -X POST http://localhost:8080/api/ingest
* -F "[email protected]" -F "docType=policy" -F "tenantId=acme"
*/
@PostMapping(value = "/ingest", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<IngestionResult> ingest(
@RequestPart("file") MultipartFile file,
@RequestParam(defaultValue = "general") String docType,
@RequestParam(defaultValue = "default") String tenantId) throws IOException {
var resource = new ByteArrayResource(file.getBytes()) {
@Override public String getFilename() { return file.getOriginalFilename(); }
};
return ResponseEntity.ok(ingestion.ingestPdf(resource,
Map.of("doc_type", docType, "tenant_id", tenantId)));
}
/**
* Query the vector store.
* curl -X POST http://localhost:8080/api/query
* -H "Content-Type: application/json"
* -d '{"question":"What is the vacation policy?","metadataFilter":null}'
*/
@PostMapping("/query")
public ResponseEntity<RagResponse> query(@RequestBody QueryRequest req) {
return ResponseEntity.ok(
query.query(req.question(), new RagQueryOptions(req.metadataFilter())));
}
}
Step 5 — Build and Run
# 1. Start PostgreSQL + pgvector
docker-compose up -d
# Verify pgvector is ready (wait ~5 s on first run)
docker-compose logs postgres | grep "database system is ready"
# 2. Set your OpenAI API key
export OPENAI_API_KEY=sk-...
# 3. Build (skip tests for first run)
./mvnw clean package -DskipTests
# 4. Run
java -jar target/spring-ai-rag-1.0.0-SNAPSHOT.jar
# Expected startup output:
# Started RagApplication in 4.1 seconds (JVM running for 4.7)
# Tomcat started on port 8080
# 5. Verify
curl http://localhost:8080/actuator/health
# {"status":"UP","components":{"db":{"status":"UP"},"ping":{"status":"UP"}}}
Step 6 — End-to-End I/O Demo
Use any PDF with structured text (HR policy, product manual, legal document). The examples below use employee-handbook-2024.pdf — a 34-page policy document.
6a. Ingest the PDF
curl -X POST http://localhost:8080/api/ingest
-F "[email protected]"
-F "docType=policy"
-F "tenantId=acme-corp"
{
“filename”: “employee-handbook-2024.pdf”,
“status”: “ingested”,
“chunksCreated”: 127,
“message”: “127 chunks written to vector store.”
}
Re-uploading the same unchanged file returns "status": "skipped" — no embeddings are recomputed.
6b. Query the PDF
curl -X POST http://localhost:8080/api/query
-H "Content-Type: application/json"
-d '{
"question": "How many annual leave days are employees entitled to, and does unused leave carry over?",
"metadataFilter": "tenant_id == '''acme-corp''' && doc_type == '''policy'''"
}'
{
“answer”: “According to the Employee Handbook 2024 (Section 4.1), full-time employees are entitled to 20 working days of annual leave per calendar year. Part-time employees receive leave on a pro-rata basis. Unused leave may be carried over for a maximum of 5 days into the following calendar year, provided it is used by 31 March.”,
“sources”: [
{ “filename”: “employee-handbook-2024.pdf”, “page”: “9”, “chunkPreview”: “4.1 Annual Leave Entitlement. Full-time employees are entitled to 20 working days…” },
{ “filename”: “employee-handbook-2024.pdf”, “page”: “9”, “chunkPreview”: “4.2 Leave Carryover. Unused annual leave may be carried over for a maximum of 5 days…” },
{ “filename”: “employee-handbook-2024.pdf”, “page”: “10”, “chunkPreview”: “4.3 Requesting Leave. All requests must be submitted via the HR portal…” },
{ “filename”: “employee-handbook-2024.pdf”, “page”: “11”, “chunkPreview”: “4.7 Sick Leave. Sick leave is separate from annual leave and not subject to…” },
{ “filename”: “employee-handbook-2024.pdf”, “page”: “8”, “chunkPreview”: “3.5 Probationary Period. During the 3-month probationary period, annual leave…” }
],
“faithfulnessScore”: 0.96,
“isGrounded”: true,
“status”: “answered”
}
6c. Query with no matching content
curl -X POST http://localhost:8080/api/query
-H "Content-Type: application/json"
-d '{"question": "What is the current company share price?", "metadataFilter": null}'
{
“answer”: “No relevant documents found for your question.”,
“sources”: [],
“faithfulnessScore”: 0.0,
“isGrounded”: false,
“status”: “no_context”
}
The system never guesses. Queries with no matching content in the vector store are cleanly rejected with "status": "no_context".
6d. Check Prometheus metrics
curl -s http://localhost:8080/actuator/prometheus | grep rag_
# rag_chunks_ingested_total 127.0
# rag_ingestion_errors_total 0.0
← Back to the full technical explanation: Production-Grade RAG with Spring AI 1.1.0