This is an automated email from the ASF dual-hosted git repository. tballison pushed a commit to branch TIKA-4747-add-axml-detection in repository https://gitbox.apache.org/repos/asf/tika.git
commit 5671f691f7deb564b78f63171464ba0ffce22b2a Author: tallison <[email protected]> AuthorDate: Wed Jun 3 13:54:46 2026 -0400 TIKA-4747 -- add axml detection --- .../resources/org/apache/tika/mime/tika-mimetypes.xml | 15 +++++++++++++++ .../test/java/org/apache/tika/mime/MimeDetectionTest.java | 5 +++++ 2 files changed, 20 insertions(+) diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml index f447bf3d97..c559a023af 100644 --- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml +++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml @@ -352,6 +352,21 @@ <sub-class-of type="application/java-archive"/> <glob pattern="*.apk"/> </mime-type> + <mime-type type="application/vnd.android.axml"> + <acronym>AXML</acronym> + <_comment>Android Binary XML</_comment> + <tika:link>https://developer.android.com/guide/topics/manifest/manifest-intro</tika:link> + <!-- Compiled binary form of AndroidManifest.xml and res/*.xml inside an APK. + These carry a .xml extension and live inside a zip, so the *.xml glob would + otherwise route them to application/xml and the XML parser, which fails on the + binary header. Detect by the leading ResChunk_header: type=RES_XML_TYPE(0x0003) + + headerSize(0x0008) -> 0x00080003 little-endian. The following 4 bytes are the + per-file chunk size, so the signature is only the first 4 bytes. Deliberately + NOT a sub-class-of application/xml: it is not XML and must not reach an XML parser. --> + <magic priority="50"> + <match value="0x03000800" type="string" offset="0"/> + </magic> + </mime-type> <mime-type type="application/x-tika-java-enterprise-archive"> <sub-class-of type="application/java-archive"/> <glob pattern="*.ear"/> diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java index 76268b5fea..7d0fb12f94 100644 --- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java +++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java @@ -85,6 +85,11 @@ public class MimeDetectionTest { // truncated xml should still be detected as xml, See TIKA-3596 testFile("application/xml", "truncated-utf16-xml.xyz"); + + // Android Binary XML (compiled AndroidManifest.xml / res/*.xml inside an APK). + // Carries a .xml extension, so magic must win over the *.xml glob and it must + // NOT be routed to application/xml / the XML parser. See TIKA-4748. + testFile("application/vnd.android.axml", "test-android-binary.xml"); } @Test
