This is an automated email from the ASF dual-hosted git repository. tballison pushed a commit to branch haystack-vlm-improvements in repository https://gitbox.apache.org/repos/asf/tika.git
commit 2c8be2c8b253fb659a2babad706d5e4ca446fba5 Author: tballison <[email protected]> AuthorDate: Wed May 13 12:37:42 2026 -0400 improvements for vlm --- .../main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java | 9 ++++++++- .../java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java | 6 ++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java b/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java index 6cdae6f6dd..40b1cd1d08 100644 --- a/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java +++ b/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java @@ -159,6 +159,10 @@ public abstract class AbstractVLMParser implements Parser, Initializable { public void parse(TikaInputStream tis, ContentHandler handler, Metadata metadata, ParseContext parseContext) throws IOException, SAXException, TikaException { + if (!serverAvailable) { + return; + } + VLMOCRConfig config = getConfig(parseContext); if (config.isSkipOcr()) { @@ -235,7 +239,10 @@ public abstract class AbstractVLMParser implements Parser, Initializable { return; } try { - httpClient.get(healthUrl, Map.of(), defaultConfig.getTimeoutSeconds()); + Map<String, String> healthHeaders = defaultConfig.getApiKey() != null + ? Map.of("Authorization", "Bearer " + defaultConfig.getApiKey()) + : Map.of(); + httpClient.get(healthUrl, healthHeaders, defaultConfig.getTimeoutSeconds()); serverAvailable = true; LOG.info("VLM server is available at {}", defaultConfig.getBaseUrl()); } catch (TikaException e) { diff --git a/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java b/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java index eb36cb4a1e..fee8b36b0b 100644 --- a/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java +++ b/tika-parsers/tika-parsers-ml/tika-vlm/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java @@ -77,6 +77,8 @@ import org.xml.sax.helpers.AttributesImpl; */ class MarkdownToXHTMLEmitter { + private static final String XHTML_NS = "http://www.w3.org/1999/xhtml"; + private static final List<Extension> EXTENSIONS = Arrays.asList( TablesExtension.create(), StrikethroughExtension.create() @@ -368,7 +370,7 @@ class MarkdownToXHTMLEmitter { return; } try { - handler.startElement("", localName, localName, attrs); + handler.startElement(XHTML_NS, localName, localName, attrs); } catch (SAXException e) { saxException = e; } @@ -379,7 +381,7 @@ class MarkdownToXHTMLEmitter { return; } try { - handler.endElement("", localName, localName); + handler.endElement(XHTML_NS, localName, localName); } catch (SAXException e) { saxException = e; }
