This is an automated email from the ASF dual-hosted git repository. jamesnetherton pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/camel-quarkus-examples.git
commit f02340274efaffb14a5b658892faa50a07967585 Author: aldettinger <[email protected]> AuthorDate: Mon Feb 3 10:53:39 2025 +0100 data-extract: switch to ollama structured output --- data-extract-langchain4j/README.adoc | 6 +++--- .../extraction/CustomPojoExtractionService.java | 25 +++++++++++++++------- .../src/main/java/org/acme/extraction/Routes.java | 1 - .../src/main/resources/application.properties | 1 - .../org/acme/extraction/OllamaTestResource.java | 2 +- .../test/java/org/acme/extraction/RouteTest.java | 6 +++--- ..._chat-32621e2b-3757-47c3-81a1-8a4fbe85fa70.json | 24 --------------------- ..._chat-608a9883-c373-4219-a55c-e560e83df720.json | 24 +++++++++++++++++++++ ..._chat-87de6be6-8581-403f-8adf-b5f278616d50.json | 24 +++++++++++++++++++++ ..._chat-9d0bfc1d-80d8-475c-85eb-a72a13e81d06.json | 24 +++++++++++++++++++++ ..._chat-af57842e-561e-4156-876d-75684fabb0a8.json | 24 --------------------- ..._chat-fe938c62-23dc-4814-8d9a-9833afc2be3d.json | 24 --------------------- 12 files changed, 96 insertions(+), 89 deletions(-) diff --git a/data-extract-langchain4j/README.adoc b/data-extract-langchain4j/README.adoc index f0854e7..9b8e0a6 100644 --- a/data-extract-langchain4j/README.adoc +++ b/data-extract-langchain4j/README.adoc @@ -14,11 +14,11 @@ In this example, we'll convert those text conversations into Java Objects that c image::schema.png[] -In order to achieve this extraction, we'll need a https://en.wikipedia.org/wiki/Large_language_model[Large Language Model (LLM)] that natively supports JSON output. +In order to achieve this extraction, we'll need a https://en.wikipedia.org/wiki/Large_language_model[Large Language Model (LLM)] and related serving framework that natively supports https://ollama.com/blog/structured-outputs[JSON structured output]. Here, we choose https://ollama.com/library/granite3-dense[granite3-dense] served through https://ollama.com/[ollama] as it seems compute friendly and under Apache V2 license. In order to request inference to the served model, we'll use the high-level LangChain4j APIs like https://docs.langchain4j.dev/tutorials/ai-services[AiServices]. More precisely, we'll setup the https://docs.quarkiverse.io/quarkus-langchain4j/dev/index.html[Quarkus LangChain4j extension] to register an AiService bean. -Finally, we'll invoke the AiService extraction method via the https://camel.apache.org/camel-quarkus/latest/reference/extensions/bean.html[Camel Quarkus bean extension] . +Finally, we'll invoke the AiService extraction method via the https://camel.apache.org/camel-quarkus/latest/reference/extensions/bean.html[Camel Quarkus bean extension]. === Start the Large Language Model @@ -26,7 +26,7 @@ Let's start a container to serve the LLM with Ollama, in a first shell type: [source,shell] ---- -docker run --rm -it -v cqex-data-extract-ollama:/root/.ollama -p 11434:11434 --name cqex-data-extract-ollama ollama/ollama:0.4.0-rc5 +docker run --rm -it -v cqex-data-extract-ollama:/root/.ollama -p 11434:11434 --name cqex-data-extract-ollama ollama/ollama:0.5.7 ---- After a moment, a log like below should be output: diff --git a/data-extract-langchain4j/src/main/java/org/acme/extraction/CustomPojoExtractionService.java b/data-extract-langchain4j/src/main/java/org/acme/extraction/CustomPojoExtractionService.java index 8a6923e..34f616e 100644 --- a/data-extract-langchain4j/src/main/java/org/acme/extraction/CustomPojoExtractionService.java +++ b/data-extract-langchain4j/src/main/java/org/acme/extraction/CustomPojoExtractionService.java @@ -16,7 +16,7 @@ */ package org.acme.extraction; -import java.time.LocalDate; +import java.time.Month; import java.util.Locale; import dev.langchain4j.service.UserMessage; @@ -24,7 +24,6 @@ import io.quarkiverse.langchain4j.RegisterAiService; import io.quarkus.runtime.annotations.RegisterForReflection; import jakarta.enterprise.context.ApplicationScoped; import org.apache.camel.Handler; -import org.apache.camel.Header; import org.apache.camel.jsonpath.JsonPath; @RegisterAiService @@ -35,24 +34,34 @@ public interface CustomPojoExtractionService { static class CustomPojo { public boolean customerSatisfied; public String customerName; - public LocalDate customerBirthday; + public CustomDate customerBirthday; public String summary; private final static String FORMAT = "\n{\n" + "\t\"customerSatisfied\": \"%s\",\n" + "\t\"customerName\": \"%s\",\n" - + "\t\"customerBirthday\": \"%td %tB %tY\",\n" + + "\t\"customerBirthday\": \"%s\",\n" + "\t\"summary\": \"%s\"\n" + "}\n"; public String toString() { return String.format(Locale.US, FORMAT, this.customerSatisfied, this.customerName, this.customerBirthday, - this.customerBirthday, this.customerBirthday, this.summary); + this.summary); + } + } + + @RegisterForReflection + static class CustomDate { + int year; + int month; + int day; + + public String toString() { + return String.format("%d %s %d", day, Month.of(month), year); } } static final String CUSTOM_POJO_EXTRACT_PROMPT = "Extract information about a customer from the text delimited by triple backticks: ```{text}```." - + "The customerBirthday field should be formatted as {dateFormat}." + "The summary field should concisely relate the customer main ask."; /** @@ -61,11 +70,11 @@ public interface CustomPojoExtractionService { * the pom.xml file. Without -parameters, one would need to use the @V annotation like in the method signature * proposed below: extractFromText(@dev.langchain4j.service.V("text") String text); * - * Notice how Camel maps the incoming exchange to the method parameters with annotations like @JsonPath and @Header. + * Notice how Camel maps the incoming exchange to the method parameters with annotations like @JsonPath. * More information on the Camel bean parameter binding feature could be found here: * https://camel.apache.org/manual/bean-binding.html#_parameter_binding */ @UserMessage(CUSTOM_POJO_EXTRACT_PROMPT) @Handler - CustomPojo extractFromText(@JsonPath("$.content") String text, @Header("expectedDateFormat") String dateFormat); + CustomPojo extractFromText(@JsonPath("$.content") String text); } diff --git a/data-extract-langchain4j/src/main/java/org/acme/extraction/Routes.java b/data-extract-langchain4j/src/main/java/org/acme/extraction/Routes.java index a376aca..f850fed 100644 --- a/data-extract-langchain4j/src/main/java/org/acme/extraction/Routes.java +++ b/data-extract-langchain4j/src/main/java/org/acme/extraction/Routes.java @@ -32,7 +32,6 @@ public class Routes extends RouteBuilder { // Consumes file documents that contain conversation transcripts (JSON format) from("file:target/transcripts?sortBy=file:name") .log("A document has been received by the camel-quarkus-file extension: ${body}") - .setHeader("expectedDateFormat", constant("YYYY-MM-DD")) // The CustomPojoExtractionService transforms the conversation transcript into a CustomPojoExtractionService.CustomPojo .bean(CustomPojoExtractionService.class) // Store extracted CustomPojoExtractionService.CustomPojos objects into the CustomPojoStore for later inspection diff --git a/data-extract-langchain4j/src/main/resources/application.properties b/data-extract-langchain4j/src/main/resources/application.properties index a37b08d..c1e2dcd 100644 --- a/data-extract-langchain4j/src/main/resources/application.properties +++ b/data-extract-langchain4j/src/main/resources/application.properties @@ -22,7 +22,6 @@ quarkus.banner.enabled = false quarkus.langchain4j.ollama.base-url = http://localhost:11434 quarkus.langchain4j.ollama.timeout = 3m quarkus.langchain4j.ollama.chat-model.model-id = granite3-dense -quarkus.langchain4j.ollama.chat-model.format = json quarkus.langchain4j.ollama.chat-model.temperature = 0 # Uncomment lines below to log Ollama client requests and responses #quarkus.langchain4j.ollama.log-requests=true diff --git a/data-extract-langchain4j/src/test/java/org/acme/extraction/OllamaTestResource.java b/data-extract-langchain4j/src/test/java/org/acme/extraction/OllamaTestResource.java index 08ef690..259d2cf 100644 --- a/data-extract-langchain4j/src/test/java/org/acme/extraction/OllamaTestResource.java +++ b/data-extract-langchain4j/src/test/java/org/acme/extraction/OllamaTestResource.java @@ -36,7 +36,7 @@ public class OllamaTestResource implements QuarkusTestResourceLifecycleManager { private static final Logger LOG = LoggerFactory.getLogger(OllamaTestResource.class); - private static final String OLLAMA_IMAGE = "ollama/ollama:0.4.0-rc5"; + private static final String OLLAMA_IMAGE = "ollama/ollama:0.5.7"; private static final int OLLAMA_SERVER_PORT = 11434; private static final String MODE_MOCK = "mock"; diff --git a/data-extract-langchain4j/src/test/java/org/acme/extraction/RouteTest.java b/data-extract-langchain4j/src/test/java/org/acme/extraction/RouteTest.java index fe466c4..b75d695 100644 --- a/data-extract-langchain4j/src/test/java/org/acme/extraction/RouteTest.java +++ b/data-extract-langchain4j/src/test/java/org/acme/extraction/RouteTest.java @@ -67,17 +67,17 @@ public class RouteTest { // Assert values of the first extracted POJO .body("pojos[0].customerSatisfied", is("true")) .body("pojos[0].customerName", is("Sarah London")) - .body("pojos[0].customerBirthday", is("10 July 1986")) + .body("pojos[0].customerBirthday", is("10 JULY 1986")) .body("pojos[0].summary", not(empty())) // Assert values of the second extracted POJO .body("pojos[1].customerSatisfied", is("false")) .body("pojos[1].customerName", is("John Doe")) - .body("pojos[1].customerBirthday", is("01 November 2001")) + .body("pojos[1].customerBirthday", is("1 NOVEMBER 2001")) .body("pojos[1].summary", not(empty())) // Assert values of the third extracted POJO .body("pojos[2].customerSatisfied", is("true")) .body("pojos[2].customerName", is("Kate Boss")) - .body("pojos[2].customerBirthday", is("13 August 1999")) + .body("pojos[2].customerBirthday", is("13 AUGUST 1999")) .body("pojos[2].summary", not(empty())); } diff --git a/data-extract-langchain4j/src/test/resources/mappings/api_chat-32621e2b-3757-47c3-81a1-8a4fbe85fa70.json b/data-extract-langchain4j/src/test/resources/mappings/api_chat-32621e2b-3757-47c3-81a1-8a4fbe85fa70.json deleted file mode 100644 index 1d0e5fa..0000000 --- a/data-extract-langchain4j/src/test/resources/mappings/api_chat-32621e2b-3757-47c3-81a1-8a4fbe85fa70.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "id" : "32621e2b-3757-47c3-81a1-8a4fbe85fa70", - "name" : "api_chat", - "request" : { - "url" : "/api/chat", - "method" : "POST", - "bodyPatterns" : [ { - "equalToJson" : "{\n \"model\" : \"granite3-dense\",\n \"messages\" : [ {\n \"role\" : \"assistant\",\n \"content\" : \"{\\n\\\"customerSatisfied\\\": true,\\n\\\"customerName\\\": \\\"Sarah London\\\",\\n\\\"customerBirthday\\\": \\\"1986-07-10\\\",\\n\\\"summary\\\": \\\"The customer, Sarah London, called to declare an accident on her main vehicle and was informed that all expenses related to the accident would be reimbursed.\\\"\\n}\"\n }, {\n \"role\" : \"user\",\n [...] - "ignoreArrayOrder" : true, - "ignoreExtraElements" : true - } ] - }, - "response" : { - "status" : 200, - "body" : "{\"model\":\"granite3-dense\",\"created_at\":\"2025-01-17T10:38:48.022476739Z\",\"message\":{\"role\":\"assistant\",\"content\":\"{\\n\\\"customerSatisfied\\\": false,\\n\\\"customerName\\\": \\\"John Doe\\\",\\n\\\"customerBirthday\\\": \\\"2001-11-01\\\",\\n\\\"summary\\\": \\\"The customer, John Doe, called to express his dissatisfaction with the insurance company's reimbursement policy and was informed that the full reimbursement option had been automatically cancelled. [...] - "headers" : { - "Date" : "Fri, 17 Jan 2025 10:38:48 GMT", - "Content-Type" : "application/json; charset=utf-8" - } - }, - "uuid" : "32621e2b-3757-47c3-81a1-8a4fbe85fa70", - "persistent" : true, - "insertionIndex" : 119 -} \ No newline at end of file diff --git a/data-extract-langchain4j/src/test/resources/mappings/api_chat-608a9883-c373-4219-a55c-e560e83df720.json b/data-extract-langchain4j/src/test/resources/mappings/api_chat-608a9883-c373-4219-a55c-e560e83df720.json new file mode 100644 index 0000000..60d06af --- /dev/null +++ b/data-extract-langchain4j/src/test/resources/mappings/api_chat-608a9883-c373-4219-a55c-e560e83df720.json @@ -0,0 +1,24 @@ +{ + "id" : "608a9883-c373-4219-a55c-e560e83df720", + "name" : "api_chat", + "request" : { + "url" : "/api/chat", + "method" : "POST", + "bodyPatterns" : [ { + "equalToJson" : "{\n \"model\" : \"granite3-dense\",\n \"messages\" : [ {\n \"role\" : \"assistant\",\n \"content\" : \"{\\n \\\"customerSatisfied\\\": false,\\n \\\"customerName\\\": \\\"John Doe\\\",\\n \\\"customerBirthday\\\": {\\n \\\"year\\\": 2001,\\n \\\"month\\\": 11,\\n \\\"day\\\": 1\\n },\\n \\\"summary\\\": \\\"The customer, John Doe, contacted the operator to express dissatisfaction with the partial reimbursement of accident-related expenses. The [...] + "ignoreArrayOrder" : true, + "ignoreExtraElements" : true + } ] + }, + "response" : { + "status" : 200, + "body" : "{\"model\":\"granite3-dense\",\"created_at\":\"2025-02-03T10:12:43.501203015Z\",\"message\":{\"role\":\"assistant\",\"content\":\"{\\n \\\"customerSatisfied\\\": true,\\n \\\"customerName\\\": \\\"Kate Boss\\\",\\n \\\"customerBirthday\\\": {\\n \\\"year\\\": 1999,\\n \\\"month\\\": 8,\\n \\\"day\\\": 13\\n },\\n \\\"summary\\\": \\\"The customer, Kate Boss, contacted the operator to request a proof of insurance for an accident she was involved in. The operato [...] + "headers" : { + "Date" : "Mon, 03 Feb 2025 10:12:43 GMT", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "608a9883-c373-4219-a55c-e560e83df720", + "persistent" : true, + "insertionIndex" : 4 +} \ No newline at end of file diff --git a/data-extract-langchain4j/src/test/resources/mappings/api_chat-87de6be6-8581-403f-8adf-b5f278616d50.json b/data-extract-langchain4j/src/test/resources/mappings/api_chat-87de6be6-8581-403f-8adf-b5f278616d50.json new file mode 100644 index 0000000..136d9bf --- /dev/null +++ b/data-extract-langchain4j/src/test/resources/mappings/api_chat-87de6be6-8581-403f-8adf-b5f278616d50.json @@ -0,0 +1,24 @@ +{ + "id" : "87de6be6-8581-403f-8adf-b5f278616d50", + "name" : "api_chat", + "request" : { + "url" : "/api/chat", + "method" : "POST", + "bodyPatterns" : [ { + "equalToJson" : "{\n \"model\" : \"granite3-dense\",\n \"messages\" : [ {\n \"role\" : \"user\",\n \"content\" : \"Extract information about a customer from the text delimited by triple backticks: ```Operator: Hello, how may I help you ?\\nCustomer: Hello, I'm calling because I need to declare an accident on my main vehicle.\\nOperator: Ok, can you please give me your name ?\\nCustomer: My name is Sarah London.\\nOperator: Could you please give me your birth date ?\\nCustom [...] + "ignoreArrayOrder" : true, + "ignoreExtraElements" : true + } ] + }, + "response" : { + "status" : 200, + "body" : "{\"model\":\"granite3-dense\",\"created_at\":\"2025-02-03T10:12:20.396157239Z\",\"message\":{\"role\":\"assistant\",\"content\":\"{\\n \\\"customerSatisfied\\\": true,\\n \\\"customerName\\\": \\\"Sarah London\\\",\\n \\\"customerBirthday\\\": {\\n \\\"year\\\": 1986,\\n \\\"month\\\": 7,\\n \\\"day\\\": 10\\n },\\n \\\"summary\\\": \\\"The customer, Sarah London, called to declare an accident on her main vehicle and was informed that the expenses related to t [...] + "headers" : { + "Date" : "Mon, 03 Feb 2025 10:12:20 GMT", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "87de6be6-8581-403f-8adf-b5f278616d50", + "persistent" : true, + "insertionIndex" : 6 +} \ No newline at end of file diff --git a/data-extract-langchain4j/src/test/resources/mappings/api_chat-9d0bfc1d-80d8-475c-85eb-a72a13e81d06.json b/data-extract-langchain4j/src/test/resources/mappings/api_chat-9d0bfc1d-80d8-475c-85eb-a72a13e81d06.json new file mode 100644 index 0000000..6c559af --- /dev/null +++ b/data-extract-langchain4j/src/test/resources/mappings/api_chat-9d0bfc1d-80d8-475c-85eb-a72a13e81d06.json @@ -0,0 +1,24 @@ +{ + "id" : "9d0bfc1d-80d8-475c-85eb-a72a13e81d06", + "name" : "api_chat", + "request" : { + "url" : "/api/chat", + "method" : "POST", + "bodyPatterns" : [ { + "equalToJson" : "{\n \"model\" : \"granite3-dense\",\n \"messages\" : [ {\n \"role\" : \"assistant\",\n \"content\" : \"{\\n \\\"customerSatisfied\\\": true,\\n \\\"customerName\\\": \\\"Sarah London\\\",\\n \\\"customerBirthday\\\": {\\n \\\"year\\\": 1986,\\n \\\"month\\\": 7,\\n \\\"day\\\": 10\\n },\\n \\\"summary\\\": \\\"The customer, Sarah London, called to declare an accident on her main vehicle and was informed that the expenses related to the acciden [...] + "ignoreArrayOrder" : true, + "ignoreExtraElements" : true + } ] + }, + "response" : { + "status" : 200, + "body" : "{\"model\":\"granite3-dense\",\"created_at\":\"2025-02-03T10:12:31.342721938Z\",\"message\":{\"role\":\"assistant\",\"content\":\"{\\n \\\"customerSatisfied\\\": false,\\n \\\"customerName\\\": \\\"John Doe\\\",\\n \\\"customerBirthday\\\": {\\n \\\"year\\\": 2001,\\n \\\"month\\\": 11,\\n \\\"day\\\": 1\\n },\\n \\\"summary\\\": \\\"The customer, John Doe, contacted the operator to express dissatisfaction with the partial reimbursement of accident-related exp [...] + "headers" : { + "Date" : "Mon, 03 Feb 2025 10:12:31 GMT", + "Content-Type" : "application/json; charset=utf-8" + } + }, + "uuid" : "9d0bfc1d-80d8-475c-85eb-a72a13e81d06", + "persistent" : true, + "insertionIndex" : 5 +} \ No newline at end of file diff --git a/data-extract-langchain4j/src/test/resources/mappings/api_chat-af57842e-561e-4156-876d-75684fabb0a8.json b/data-extract-langchain4j/src/test/resources/mappings/api_chat-af57842e-561e-4156-876d-75684fabb0a8.json deleted file mode 100644 index 7c82044..0000000 --- a/data-extract-langchain4j/src/test/resources/mappings/api_chat-af57842e-561e-4156-876d-75684fabb0a8.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "id" : "af57842e-561e-4156-876d-75684fabb0a8", - "name" : "api_chat", - "request" : { - "url" : "/api/chat", - "method" : "POST", - "bodyPatterns" : [ { - "equalToJson" : "{\n \"model\" : \"granite3-dense\",\n \"messages\" : [ {\n \"role\" : \"assistant\",\n \"content\" : \"{\\n\\\"customerSatisfied\\\": false,\\n\\\"customerName\\\": \\\"John Doe\\\",\\n\\\"customerBirthday\\\": \\\"2001-11-01\\\",\\n\\\"summary\\\": \\\"The customer, John Doe, called to express his dissatisfaction with the insurance company's reimbursement policy and was informed that the full reimbursement option had been automatically cancelled.\\\"\\n}\" [...] - "ignoreArrayOrder" : true, - "ignoreExtraElements" : true - } ] - }, - "response" : { - "status" : 200, - "body" : "{\"model\":\"granite3-dense\",\"created_at\":\"2025-01-17T10:39:13.035382934Z\",\"message\":{\"role\":\"assistant\",\"content\":\"{\\n\\\"customerSatisfied\\\": true,\\n\\\"customerName\\\": \\\"Kate Boss\\\",\\n\\\"customerBirthday\\\": \\\"1999-08-13\\\",\\n\\\"summary\\\": \\\"The customer, Kate Boss, called to request a proof of insurance after an accident and was informed that the contract was found and the proof would be sent to the police station.\\\"\\n}\"},\"done_r [...] - "headers" : { - "Date" : "Fri, 17 Jan 2025 10:39:13 GMT", - "Content-Type" : "application/json; charset=utf-8" - } - }, - "uuid" : "af57842e-561e-4156-876d-75684fabb0a8", - "persistent" : true, - "insertionIndex" : 118 -} \ No newline at end of file diff --git a/data-extract-langchain4j/src/test/resources/mappings/api_chat-fe938c62-23dc-4814-8d9a-9833afc2be3d.json b/data-extract-langchain4j/src/test/resources/mappings/api_chat-fe938c62-23dc-4814-8d9a-9833afc2be3d.json deleted file mode 100644 index 6076a8c..0000000 --- a/data-extract-langchain4j/src/test/resources/mappings/api_chat-fe938c62-23dc-4814-8d9a-9833afc2be3d.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "id" : "fe938c62-23dc-4814-8d9a-9833afc2be3d", - "name" : "api_chat", - "request" : { - "url" : "/api/chat", - "method" : "POST", - "bodyPatterns" : [ { - "equalToJson" : "{\n \"model\" : \"granite3-dense\",\n \"messages\" : [ {\n \"role\" : \"user\",\n \"content\" : \"Extract information about a customer from the text delimited by triple backticks: ```Operator: Hello, how may I help you ?\\nCustomer: Hello, I'm calling because I need to declare an accident on my main vehicle.\\nOperator: Ok, can you please give me your name ?\\nCustomer: My name is Sarah London.\\nOperator: Could you please give me your birth date ?\\nCustom [...] - "ignoreArrayOrder" : true, - "ignoreExtraElements" : true - } ] - }, - "response" : { - "status" : 200, - "body" : "{\"model\":\"granite3-dense\",\"created_at\":\"2025-01-17T10:38:23.380035411Z\",\"message\":{\"role\":\"assistant\",\"content\":\"{\\n\\\"customerSatisfied\\\": true,\\n\\\"customerName\\\": \\\"Sarah London\\\",\\n\\\"customerBirthday\\\": \\\"1986-07-10\\\",\\n\\\"summary\\\": \\\"The customer, Sarah London, called to declare an accident on her main vehicle and was informed that all expenses related to the accident would be reimbursed.\\\"\\n}\"},\"done_reason\":\"stop\", [...] - "headers" : { - "Date" : "Fri, 17 Jan 2025 10:38:23 GMT", - "Content-Type" : "application/json; charset=utf-8" - } - }, - "uuid" : "fe938c62-23dc-4814-8d9a-9833afc2be3d", - "persistent" : true, - "insertionIndex" : 120 -} \ No newline at end of file
