This is an automated email from the ASF dual-hosted git repository.
tballison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 63cbdd757f TIKA-4727: improvements for vlm (#2814)
63cbdd757f is described below
commit 63cbdd757f30135936b0f767d8ed5dc9b561acc5
Author: Tim Allison <[email protected]>
AuthorDate: Thu May 14 12:36:54 2026 -0400
TIKA-4727: improvements for vlm (#2814)
---
.../org/apache/tika/parser/vlm/AbstractVLMParser.java | 9 ++++++++-
.../apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java | 6 ++++--
.../tika/parser/vlm/MarkdownToXHTMLEmitterTest.java | 17 +++++++++++------
.../org/apache/tika/parser/vlm/OpenAIVLMParserTest.java | 7 ++++++-
4 files changed, 29 insertions(+), 10 deletions(-)
diff --git
a/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java
b/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java
index 6cdae6f6dd..40b1cd1d08 100644
---
a/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java
+++
b/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java
@@ -159,6 +159,10 @@ public abstract class AbstractVLMParser implements Parser,
Initializable {
public void parse(TikaInputStream tis, ContentHandler handler, Metadata
metadata,
ParseContext parseContext) throws IOException,
SAXException, TikaException {
+ if (!serverAvailable) {
+ return;
+ }
+
VLMOCRConfig config = getConfig(parseContext);
if (config.isSkipOcr()) {
@@ -235,7 +239,10 @@ public abstract class AbstractVLMParser implements Parser,
Initializable {
return;
}
try {
- httpClient.get(healthUrl, Map.of(),
defaultConfig.getTimeoutSeconds());
+ Map<String, String> healthHeaders = defaultConfig.getApiKey() !=
null
+ ? Map.of("Authorization", "Bearer " +
defaultConfig.getApiKey())
+ : Map.of();
+ httpClient.get(healthUrl, healthHeaders,
defaultConfig.getTimeoutSeconds());
serverAvailable = true;
LOG.info("VLM server is available at {}",
defaultConfig.getBaseUrl());
} catch (TikaException e) {
diff --git
a/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java
b/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java
index eb36cb4a1e..fee8b36b0b 100644
---
a/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java
+++
b/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java
@@ -77,6 +77,8 @@ import org.xml.sax.helpers.AttributesImpl;
*/
class MarkdownToXHTMLEmitter {
+ private static final String XHTML_NS = "http://www.w3.org/1999/xhtml";
+
private static final List<Extension> EXTENSIONS = Arrays.asList(
TablesExtension.create(),
StrikethroughExtension.create()
@@ -368,7 +370,7 @@ class MarkdownToXHTMLEmitter {
return;
}
try {
- handler.startElement("", localName, localName, attrs);
+ handler.startElement(XHTML_NS, localName, localName, attrs);
} catch (SAXException e) {
saxException = e;
}
@@ -379,7 +381,7 @@ class MarkdownToXHTMLEmitter {
return;
}
try {
- handler.endElement("", localName, localName);
+ handler.endElement(XHTML_NS, localName, localName);
} catch (SAXException e) {
saxException = e;
}
diff --git
a/tika-parsers/tika-parsers-ml/tika-vlm/src/test/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitterTest.java
b/tika-parsers/tika-parsers-ml/tika-vlm/src/test/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitterTest.java
index fa3181fca9..71ed9548e7 100644
---
a/tika-parsers/tika-parsers-ml/tika-vlm/src/test/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitterTest.java
+++
b/tika-parsers/tika-parsers-ml/tika-vlm/src/test/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitterTest.java
@@ -215,18 +215,23 @@ public class MarkdownToXHTMLEmitterTest {
void testEmptyInput() throws Exception {
String xml = emit("");
// Should produce just the root wrapper, no content elements
- assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?><root/>", xml);
+ assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ + "<root xmlns=\"http://www.w3.org/1999/xhtml\"/>", xml);
}
@Test
void testNullInput() throws Exception {
String xml = emit(null);
- assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?><root/>", xml);
+ assertEquals("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ + "<root xmlns=\"http://www.w3.org/1999/xhtml\"/>", xml);
}
+ private static final String XHTML_NS = "http://www.w3.org/1999/xhtml";
+
/**
- * Emit markdown through the emitter, wrapping in a root element so
- * the SAX output is well-formed XML we can assert against.
+ * Emit markdown through the emitter, wrapping in a root element in the
+ * XHTML namespace so emitter-emitted elements inherit the namespace and
+ * the serializer doesn't redeclare xmlns on every child.
*/
private String emit(String markdown) throws Exception {
StringWriter sw = new StringWriter();
@@ -238,9 +243,9 @@ public class MarkdownToXHTMLEmitterTest {
th.setResult(new StreamResult(sw));
th.startDocument();
- th.startElement("", "root", "root", new
org.xml.sax.helpers.AttributesImpl());
+ th.startElement(XHTML_NS, "root", "root", new
org.xml.sax.helpers.AttributesImpl());
MarkdownToXHTMLEmitter.emit(markdown, th);
- th.endElement("", "root", "root");
+ th.endElement(XHTML_NS, "root", "root");
th.endDocument();
return sw.toString();
diff --git
a/tika-parsers/tika-parsers-ml/tika-vlm/src/test/java/org/apache/tika/parser/vlm/OpenAIVLMParserTest.java
b/tika-parsers/tika-parsers-ml/tika-vlm/src/test/java/org/apache/tika/parser/vlm/OpenAIVLMParserTest.java
index 9eed29a142..f8c9079d95 100644
---
a/tika-parsers/tika-parsers-ml/tika-vlm/src/test/java/org/apache/tika/parser/vlm/OpenAIVLMParserTest.java
+++
b/tika-parsers/tika-parsers-ml/tika-vlm/src/test/java/org/apache/tika/parser/vlm/OpenAIVLMParserTest.java
@@ -161,7 +161,11 @@ public class OpenAIVLMParserTest {
@Test
void testApiKeyHeader() throws Exception {
config.setApiKey("sk-test-key");
+ // Default completions path -> initialize() will probe /v1/models.
+ server.enqueue(new TikaTestHttpServer.MockResponse(200,
"{\"object\":\"list\"}"));
parser = new OpenAIVLMParser(config);
+ parser.initialize();
+ server.clearRequests();
server.enqueue(new TikaTestHttpServer.MockResponse(200,
buildChatResponse("text", 10, 5)));
@@ -180,13 +184,14 @@ public class OpenAIVLMParserTest {
@Test
void testAzureStyleAuth() throws Exception {
config.setApiKey("azure-key-123");
- parser = new OpenAIVLMParser(config);
config.setCompletionsPath("/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01");
parser = new OpenAIVLMParser(config);
parser.setApiKeyHeaderName("api-key");
parser.setApiKeyPrefix("");
parser.setCompletionsPath(
"/openai/deployments/gpt-4o/chat/completions?api-version=2024-02-01");
+ // Non-default completions path -> health check is skipped.
+ parser.initialize();
server.enqueue(new TikaTestHttpServer.MockResponse(200,
buildChatResponse("text", 10, 5)));