Spring AI RAG in Java — Complete Runnable Code & End-to-End Demo

This is the companion code post to ← Production-Grade RAG with Spring AI 1.1.0. That article explains how every layer works. This post gives you the complete project — every file, ready to clone and run.

Prerequisites

Java 21+ and Maven 3.9+
Docker and Docker Compose (for PostgreSQL + pgvector)
An OpenAI API key (export OPENAI_API_KEY=sk-...)

Project Structure

spring-ai-rag/
├── docker-compose.yml
├── init.sql
├── pom.xml
└── src/main/
    ├── java/com/example/rag/
    │   ├── RagApplication.java
    │   ├── config/
    │   │   └── RagConfig.java
    │   ├── controller/
    │   │   └── RagController.java
    │   ├── ingestion/
    │   │   ├── DocumentIngestionService.java
    │   │   ├── IngestionException.java
    │   │   └── IngestionTracker.java
    │   ├── model/
    │   │   ├── FaithfulnessResult.java
    │   │   ├── IngestionResult.java
    │   │   ├── QueryRequest.java
    │   │   ├── RagQueryOptions.java
    │   │   ├── RagResponse.java
    │   │   ├── ScoredDocument.java
    │   │   └── SourceAttribution.java
    │   └── query/
    │       ├── CrossEncoderReranker.java
    │       ├── HallucinationGuard.java
    │       └── RagQueryService.java
    └── resources/
        └── application.yml

Step 1 — Infrastructure

# docker-compose.yml
version: "3.9"
services:
  postgres:
    image: pgvector/pgvector:pg16
    environment:
      POSTGRES_USER: raguser
      POSTGRES_PASSWORD: ragpass
      POSTGRES_DB: ragdb
    ports:
      - "5432:5432"
    volumes:
      - pgdata:/var/lib/postgresql/data
      - ./init.sql:/docker-entrypoint-initdb.d/init.sql
volumes:
  pgdata:

-- init.sql
CREATE EXTENSION IF NOT EXISTS vector;
CREATE SCHEMA IF NOT EXISTS rag;

CREATE TABLE IF NOT EXISTS rag.document_chunks (
    id        UUID    PRIMARY KEY DEFAULT gen_random_uuid(),
    content   TEXT    NOT NULL,
    metadata  JSONB,
    embedding VECTOR(1536)
);

-- HNSW index: fastest for <5 M rows at query time
CREATE INDEX IF NOT EXISTS document_chunks_hnsw_idx
    ON rag.document_chunks
    USING hnsw (embedding vector_cosine_ops)
    WITH (m = 16, ef_construction = 64);

Step 2 — pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
             https://maven.apache.org/xsd/maven-4.0.0.xsd">

    <modelVersion>4.0.0</modelVersion>
    <groupId>com.example</groupId>
    <artifactId>spring-ai-rag</artifactId>
    <version>1.0.0-SNAPSHOT</version>

    <parent>
        <groupId>org.springframework.boot</groupId>
        <artifactId>spring-boot-starter-parent</artifactId>
        <version>3.4.5</version>
    </parent>

    <properties>
        <java.version>21</java.version>
        <spring-ai.version>1.1.0</spring-ai.version>
    </properties>

    <dependencyManagement>
        <dependencies>
            <dependency>
                <groupId>org.springframework.ai</groupId>
                <artifactId>spring-ai-bom</artifactId>
                <version>${spring-ai.version}</version>
                <type>pom</type>
                <scope>import</scope>
            </dependency>
        </dependencies>
    </dependencyManagement>

    <dependencies>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-web</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-data-jpa</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.boot</groupId>
            <artifactId>spring-boot-starter-actuator</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.ai</groupId>
            <artifactId>spring-ai-openai-spring-boot-starter</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.ai</groupId>
            <artifactId>spring-ai-pgvector-store-spring-boot-starter</artifactId>
        </dependency>
        <dependency>
            <groupId>org.springframework.ai</groupId>
            <artifactId>spring-ai-pdf-document-reader</artifactId>
        </dependency>
        <dependency>
            <groupId>org.postgresql</groupId>
            <artifactId>postgresql</artifactId>
            <scope>runtime</scope>
        </dependency>
        <dependency>
            <groupId>io.micrometer</groupId>
            <artifactId>micrometer-registry-prometheus</artifactId>
        </dependency>
    </dependencies>

    <build>
        <plugins>
            <plugin>
                <groupId>org.springframework.boot</groupId>
                <artifactId>spring-boot-maven-plugin</artifactId>
            </plugin>
        </plugins>
    </build>
</project>

Step 3 — application.yml

# src/main/resources/application.yml
spring:
  application:
    name: spring-ai-rag

  datasource:
    url: jdbc:postgresql://localhost:5432/ragdb
    username: raguser
    password: ragpass
    driver-class-name: org.postgresql.Driver

  jpa:
    hibernate:
      ddl-auto: none     # schema managed by init.sql

  ai:
    openai:
      api-key: ${OPENAI_API_KEY}
      chat:
        options:
          model: gpt-4o
          temperature: 0.1
          max-tokens: 1024
      embedding:
        options:
          model: text-embedding-3-small  # 1536 dims

    vectorstore:
      pgvector:
        index-type: HNSW
        distance-type: COSINE_DISTANCE
        dimensions: 1536
        initialize-schema: false
        schema-name: rag
        table-name: document_chunks

management:
  endpoints:
    web:
      exposure:
        include: health, prometheus, metrics
  metrics:
    tags:
      application: ${spring.application.name}

logging:
  level:
    org.springframework.ai: INFO

Step 4 — Source Files

// RagApplication.java
package com.example.rag;

import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;

@SpringBootApplication
public class RagApplication {
    public static void main(String[] args) {
        SpringApplication.run(RagApplication.class, args);
    }
}

// config/RagConfig.java
package com.example.rag.config;

import com.example.rag.ingestion.IngestionTracker;
import com.example.rag.query.CrossEncoderReranker;
import com.example.rag.query.HallucinationGuard;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class RagConfig {

    @Bean
    public IngestionTracker ingestionTracker() {
        return new IngestionTracker();
    }

    @Bean
    public CrossEncoderReranker crossEncoderReranker(ChatClient.Builder builder) {
        return new CrossEncoderReranker(builder);
    }

    @Bean
    public HallucinationGuard hallucinationGuard(ChatClient.Builder builder) {
        return new HallucinationGuard(builder);
    }
}

// model/IngestionResult.java
package com.example.rag.model;

public record IngestionResult(String filename, String status, int chunksCreated, String message) {
    public static IngestionResult success(String filename, int chunks) {
        return new IngestionResult(filename, "ingested", chunks, chunks + " chunks written to vector store.");
    }
    public static IngestionResult skipped(String filename) {
        return new IngestionResult(filename, "skipped", 0, "Document unchanged since last ingestion. Skipped.");
    }
}

// model/FaithfulnessResult.java
package com.example.rag.model;
public record FaithfulnessResult(double score, boolean isGrounded) {}

// model/ScoredDocument.java
package com.example.rag.model;
import org.springframework.ai.document.Document;
public record ScoredDocument(Document document, double score) {}

// model/SourceAttribution.java
package com.example.rag.model;
public record SourceAttribution(String filename, String page, String chunkPreview) {}

// model/RagQueryOptions.java
package com.example.rag.model;
public record RagQueryOptions(String metadataFilter) {
    public static RagQueryOptions noFilter() { return new RagQueryOptions(null); }
}

// model/QueryRequest.java
package com.example.rag.model;
public record QueryRequest(String question, String metadataFilter) {}

// model/RagResponse.java
package com.example.rag.model;

import org.springframework.ai.document.Document;
import java.util.List;

public record RagResponse(
    String answer,
    List<SourceAttribution> sources,
    double faithfulnessScore,
    boolean isGrounded,
    String status
) {
    public static RagResponse noContext(String msg) {
        return new RagResponse(msg, List.of(), 0.0, false, "no_context");
    }

    public static RagResponse answered(String answer, List<Document> chunks,
                                       double score, boolean grounded) {
        List<SourceAttribution> sources = chunks.stream()
            .map(d -> new SourceAttribution(
                (String) d.getMetadata().getOrDefault("source_file", "unknown"),
                String.valueOf(d.getMetadata().getOrDefault("page_number", "?")),
                d.getText().substring(0, Math.min(200, d.getText().length())) + "…"))
            .toList();
        return new RagResponse(answer, sources, score, grounded,
            grounded ? "answered" : "ungrounded");
    }
}

// ingestion/IngestionException.java
package com.example.rag.ingestion;

public class IngestionException extends RuntimeException {
    public IngestionException(String message, Throwable cause) {
        super(message, cause);
    }
}

// ingestion/IngestionTracker.java
package com.example.rag.ingestion;

import org.springframework.stereotype.Component;
import java.util.concurrent.ConcurrentHashMap;

/**
 * Tracks ingested documents by filename → SHA-256 hash.
 * In production: swap for a JdbcIngestionTracker backed by a DB table
 * so state survives application restarts.
 */
@Component
public class IngestionTracker {
    private final ConcurrentHashMap<String, String> hashes = new ConcurrentHashMap<>();

    public boolean isAlreadyIngested(String filename, String hash) {
        return hash.equals(hashes.get(filename));
    }

    public void markIngested(String filename, String hash, int chunks) {
        hashes.put(filename, hash);
        System.out.printf("[IngestionTracker] %s → %d chunks (hash=%s…)%n",
            filename, chunks, hash.substring(0, 8));
    }
}

// ingestion/DocumentIngestionService.java
package com.example.rag.ingestion;

import com.example.rag.model.IngestionResult;
import io.micrometer.core.instrument.MeterRegistry;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.pdf.PagePdfDocumentReader;
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.core.io.Resource;
import org.springframework.stereotype.Service;

import java.security.MessageDigest;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

@Service
public class DocumentIngestionService {

    private final VectorStore vectorStore;
    private final MeterRegistry metrics;
    private final IngestionTracker tracker;

    public DocumentIngestionService(VectorStore vectorStore,
                                    MeterRegistry metrics,
                                    IngestionTracker tracker) {
        this.vectorStore = vectorStore;
        this.metrics     = metrics;
        this.tracker     = tracker;
    }

    public IngestionResult ingestPdf(Resource pdf, Map<String, Object> callerMeta) {
        String hash = sha256(pdf);

        if (tracker.isAlreadyIngested(pdf.getFilename(), hash)) {
            return IngestionResult.skipped(pdf.getFilename());
        }

        try {
            // One Document per PDF page preserves page_number metadata
            var config = PdfDocumentReaderConfig.builder()
                .withPagesPerDocument(1)
                .build();
            List<Document> pages = new PagePdfDocumentReader(pdf, config).get();

            // Enrich each page-document with source metadata
            List<Document> enriched = pages.stream().map(doc -> {
                var meta = new HashMap<String, Object>(doc.getMetadata());
                meta.put("source_file",  pdf.getFilename());
                meta.put("source_hash",  hash);
                meta.put("ingested_at",  System.currentTimeMillis());
                meta.putAll(callerMeta);
                return new Document(doc.getText(), meta);
            }).toList();

            // 512-token chunks, 128-token overlap
            var splitter = new TokenTextSplitter(512, 128, 5, 10_000, true);
            List<Document> chunks = splitter.apply(enriched);

            // Embed + store (Spring AI batches embedding calls internally)
            vectorStore.add(chunks);

            tracker.markIngested(pdf.getFilename(), hash, chunks.size());
            metrics.counter("rag.chunks.ingested").increment(chunks.size());
            return IngestionResult.success(pdf.getFilename(), chunks.size());

        } catch (Exception ex) {
            metrics.counter("rag.ingestion.errors").increment();
            throw new IngestionException("Failed to ingest " + pdf.getFilename(), ex);
        }
    }

    private String sha256(Resource r) {
        try {
            var digest = MessageDigest.getInstance("SHA-256");
            return java.util.HexFormat.of()
                .formatHex(digest.digest(r.getInputStream().readAllBytes()));
        } catch (Exception e) {
            throw new RuntimeException("Cannot hash resource", e);
        }
    }
}

// query/CrossEncoderReranker.java
package com.example.rag.query;

import com.example.rag.model.ScoredDocument;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.document.Document;
import org.springframework.stereotype.Component;
import java.util.List;

/**
 * Scores each retrieved chunk's relevance (0–10) using GPT-4o-mini.
 * Runs all candidates in parallel to keep wall-clock latency low.
 * Production alternative: local ONNX cross-encoder for zero API cost.
 */
@Component
public class CrossEncoderReranker {

    private final ChatClient client;

    private static final String PROMPT = """
        Rate relevance of PASSAGE to QUERY from 0 (irrelevant) to 10 (perfect match).
        Respond with ONLY a single integer. No explanation.
        QUERY: {query}
        PASSAGE: {passage}
        SCORE:""";

    public CrossEncoderReranker(ChatClient.Builder builder) {
        this.client = builder.build();
    }

    public List<ScoredDocument> rerank(String query, List<Document> candidates) {
        return candidates.parallelStream()
            .map(doc -> {
                try {
                    String raw = client.prompt()
                        .user(u -> u.text(PROMPT)
                            .param("query",   query)
                            .param("passage", doc.getText()
                                .substring(0, Math.min(512, doc.getText().length()))))
                        .call().content().trim();
                    return new ScoredDocument(doc, Double.parseDouble(raw));
                } catch (Exception e) {
                    return new ScoredDocument(doc, 0.0);
                }
            }).toList();
    }
}

// query/HallucinationGuard.java
package com.example.rag.query;

import com.example.rag.model.FaithfulnessResult;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.document.Document;
import org.springframework.stereotype.Component;
import java.util.List;

/**
 * LLM judge that scores answer faithfulness to retrieved context (0.0–1.0).
 * Answers below 0.7 are flagged as potentially hallucinated.
 */
@Component
public class HallucinationGuard {

    private static final double THRESHOLD = 0.7;

    private final ChatClient judge;

    private static final String PROMPT = """
        Rate how faithfully the ANSWER is supported by the CONTEXT.
        1.0 = fully supported  |  0.5 = partially  |  0.0 = contradicted/unsupported
        Respond with ONLY a decimal (e.g. 0.87). No explanation.
        CONTEXT: {context}
        ANSWER:  {answer}
        FAITHFULNESS SCORE:""";

    public HallucinationGuard(ChatClient.Builder builder) {
        this.judge = builder.build();
    }

    public FaithfulnessResult check(String answer, List<Document> chunks) {
        String ctx = chunks.stream()
            .map(Document::getText)
            .reduce("", (a, b) -> a + "n---n" + b);
        try {
            double score = Double.parseDouble(
                judge.prompt()
                    .user(u -> u.text(PROMPT).param("context", ctx).param("answer", answer))
                    .call().content().trim());
            score = Math.max(0.0, Math.min(1.0, score));
            return new FaithfulnessResult(score, score >= THRESHOLD);
        } catch (Exception e) {
            return new FaithfulnessResult(0.0, false);
        }
    }
}

// query/RagQueryService.java
package com.example.rag.query;

import com.example.rag.model.*;
import org.springframework.ai.chat.client.ChatClient;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.SearchRequest;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.stereotype.Service;

import java.util.Comparator;
import java.util.List;

@Service
public class RagQueryService {

    private static final int TOP_K = 20;   // wide retrieval net
    private static final int TOP_N = 5;    // chunks kept after reranking

    private final VectorStore          vectorStore;
    private final ChatClient           chatClient;
    private final CrossEncoderReranker reranker;
    private final HallucinationGuard   guard;

    public RagQueryService(VectorStore vectorStore,
                           ChatClient.Builder builder,
                           CrossEncoderReranker reranker,
                           HallucinationGuard guard) {
        this.vectorStore = vectorStore;
        this.chatClient  = builder.build();
        this.reranker    = reranker;
        this.guard       = guard;
    }

    public RagResponse query(String question, RagQueryOptions opts) {

        // 1. Vector search — cast a wide net
        var req = SearchRequest.builder()
            .query(question)
            .topK(TOP_K)
            .similarityThreshold(0.60)
            .filterExpression(opts.metadataFilter())
            .build();

        List<Document> candidates = vectorStore.similaritySearch(req);
        if (candidates.isEmpty()) {
            return RagResponse.noContext("No relevant documents found for your question.");
        }

        // 2. Rerank and prune to top N
        List<Document> top = reranker.rerank(question, candidates).stream()
            .sorted(Comparator.comparingDouble(ScoredDocument::score).reversed())
            .limit(TOP_N)
            .map(ScoredDocument::document)
            .toList();

        // 3. Build context with source attribution
        var ctx = new StringBuilder();
        for (int i = 0; i < top.size(); i++) {
            var d = top.get(i);
            ctx.append("--- Source ").append(i + 1).append(" ---n")
               .append("File: ").append(d.getMetadata().get("source_file")).append("n")
               .append("Page: ").append(d.getMetadata().get("page_number")).append("n")
               .append(d.getText()).append("nn");
        }

        // 4. Generate answer — strict grounding prompt
        String answer = chatClient.prompt()
            .user(u -> u.text("""
                Answer ONLY from the CONTEXT below. If the answer is not present, say:
                "I don't have enough information in the provided documents."
                Do NOT speculate or use outside knowledge.

                CONTEXT:
                {context}

                QUESTION:
                {question}

                ANSWER:""")
                .param("context",  ctx.toString())
                .param("question", question))
            .call().content();

        // 5. Faithfulness check
        FaithfulnessResult f = guard.check(answer, top);
        return RagResponse.answered(answer, top, f.score(), f.isGrounded());
    }
}

// controller/RagController.java
package com.example.rag.controller;

import com.example.rag.ingestion.DocumentIngestionService;
import com.example.rag.model.*;
import com.example.rag.query.RagQueryService;
import org.springframework.core.io.ByteArrayResource;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;

import java.io.IOException;
import java.util.Map;

@RestController
@RequestMapping("/api")
public class RagController {

    private final DocumentIngestionService ingestion;
    private final RagQueryService          query;

    public RagController(DocumentIngestionService ingestion, RagQueryService query) {
        this.ingestion = ingestion;
        this.query     = query;
    }

    /**
     * Upload and ingest a PDF.
     * curl -X POST http://localhost:8080/api/ingest 
     *      -F "[email protected]" -F "docType=policy" -F "tenantId=acme"
     */
    @PostMapping(value = "/ingest", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
    public ResponseEntity<IngestionResult> ingest(
            @RequestPart("file")                        MultipartFile file,
            @RequestParam(defaultValue = "general")     String docType,
            @RequestParam(defaultValue = "default")     String tenantId) throws IOException {

        var resource = new ByteArrayResource(file.getBytes()) {
            @Override public String getFilename() { return file.getOriginalFilename(); }
        };
        return ResponseEntity.ok(ingestion.ingestPdf(resource,
            Map.of("doc_type", docType, "tenant_id", tenantId)));
    }

    /**
     * Query the vector store.
     * curl -X POST http://localhost:8080/api/query 
     *      -H "Content-Type: application/json" 
     *      -d '{"question":"What is the vacation policy?","metadataFilter":null}'
     */
    @PostMapping("/query")
    public ResponseEntity<RagResponse> query(@RequestBody QueryRequest req) {
        return ResponseEntity.ok(
            query.query(req.question(), new RagQueryOptions(req.metadataFilter())));
    }
}

Step 5 — Build and Run

# 1. Start PostgreSQL + pgvector
docker-compose up -d

# Verify pgvector is ready (wait ~5 s on first run)
docker-compose logs postgres | grep "database system is ready"

# 2. Set your OpenAI API key
export OPENAI_API_KEY=sk-...

# 3. Build (skip tests for first run)
./mvnw clean package -DskipTests

# 4. Run
java -jar target/spring-ai-rag-1.0.0-SNAPSHOT.jar

# Expected startup output:
# Started RagApplication in 4.1 seconds (JVM running for 4.7)
# Tomcat started on port 8080

# 5. Verify
curl http://localhost:8080/actuator/health
# {"status":"UP","components":{"db":{"status":"UP"},"ping":{"status":"UP"}}}

Step 6 — End-to-End I/O Demo

Use any PDF with structured text (HR policy, product manual, legal document). The examples below use employee-handbook-2024.pdf — a 34-page policy document.

6a. Ingest the PDF

curl -X POST http://localhost:8080/api/ingest 
     -F "[email protected]" 
     -F "docType=policy" 
     -F "tenantId=acme-corp"

{ “filename”: “employee-handbook-2024.pdf”, “status”: “ingested”, “chunksCreated”: 127, “message”: “127 chunks written to vector store.” }

Re-uploading the same unchanged file returns "status": "skipped" — no embeddings are recomputed.

6b. Query the PDF

curl -X POST http://localhost:8080/api/query 
     -H "Content-Type: application/json" 
     -d '{
       "question": "How many annual leave days are employees entitled to, and does unused leave carry over?",
       "metadataFilter": "tenant_id == '''acme-corp''' && doc_type == '''policy'''"
     }'

{ “answer”: “According to the Employee Handbook 2024 (Section 4.1), full-time employees are entitled to 20 working days of annual leave per calendar year. Part-time employees receive leave on a pro-rata basis. Unused leave may be carried over for a maximum of 5 days into the following calendar year, provided it is used by 31 March.”, “sources”: [ { “filename”: “employee-handbook-2024.pdf”, “page”: “9”, “chunkPreview”: “4.1 Annual Leave Entitlement. Full-time employees are entitled to 20 working days…” }, { “filename”: “employee-handbook-2024.pdf”, “page”: “9”, “chunkPreview”: “4.2 Leave Carryover. Unused annual leave may be carried over for a maximum of 5 days…” }, { “filename”: “employee-handbook-2024.pdf”, “page”: “10”, “chunkPreview”: “4.3 Requesting Leave. All requests must be submitted via the HR portal…” }, { “filename”: “employee-handbook-2024.pdf”, “page”: “11”, “chunkPreview”: “4.7 Sick Leave. Sick leave is separate from annual leave and not subject to…” }, { “filename”: “employee-handbook-2024.pdf”, “page”: “8”, “chunkPreview”: “3.5 Probationary Period. During the 3-month probationary period, annual leave…” } ], “faithfulnessScore”: 0.96, “isGrounded”: true, “status”: “answered” }

6c. Query with no matching content

curl -X POST http://localhost:8080/api/query 
     -H "Content-Type: application/json" 
     -d '{"question": "What is the current company share price?", "metadataFilter": null}'

{ “answer”: “No relevant documents found for your question.”, “sources”: [], “faithfulnessScore”: 0.0, “isGrounded”: false, “status”: “no_context” }

The system never guesses. Queries with no matching content in the vector store are cleanly rejected with "status": "no_context".

6d. Check Prometheus metrics

curl -s http://localhost:8080/actuator/prometheus | grep rag_

# rag_chunks_ingested_total 127.0
# rag_ingestion_errors_total 0.0

← Back to the full technical explanation: Production-Grade RAG with Spring AI 1.1.0

@ankurm