This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch TIKA-4747-add-axml-detection
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 5671f691f7deb564b78f63171464ba0ffce22b2a
Author: tallison <[email protected]>
AuthorDate: Wed Jun 3 13:54:46 2026 -0400

    TIKA-4747 -- add axml detection
---
 .../resources/org/apache/tika/mime/tika-mimetypes.xml     | 15 +++++++++++++++
 .../test/java/org/apache/tika/mime/MimeDetectionTest.java |  5 +++++
 2 files changed, 20 insertions(+)

diff --git 
a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml 
b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index f447bf3d97..c559a023af 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -352,6 +352,21 @@
     <sub-class-of type="application/java-archive"/>
     <glob pattern="*.apk"/>
   </mime-type>
+  <mime-type type="application/vnd.android.axml">
+    <acronym>AXML</acronym>
+    <_comment>Android Binary XML</_comment>
+    
<tika:link>https://developer.android.com/guide/topics/manifest/manifest-intro</tika:link>
+    <!-- Compiled binary form of AndroidManifest.xml and res/*.xml inside an 
APK.
+         These carry a .xml extension and live inside a zip, so the *.xml glob 
would
+         otherwise route them to application/xml and the XML parser, which 
fails on the
+         binary header. Detect by the leading ResChunk_header: 
type=RES_XML_TYPE(0x0003)
+         + headerSize(0x0008) -> 0x00080003 little-endian. The following 4 
bytes are the
+         per-file chunk size, so the signature is only the first 4 bytes. 
Deliberately
+         NOT a sub-class-of application/xml: it is not XML and must not reach 
an XML parser. -->
+    <magic priority="50">
+      <match value="0x03000800" type="string" offset="0"/>
+    </magic>
+  </mime-type>
   <mime-type type="application/x-tika-java-enterprise-archive">
     <sub-class-of type="application/java-archive"/>
     <glob pattern="*.ear"/>
diff --git 
a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java 
b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index 76268b5fea..7d0fb12f94 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -85,6 +85,11 @@ public class MimeDetectionTest {
 
         // truncated xml should still be detected as xml, See TIKA-3596
         testFile("application/xml", "truncated-utf16-xml.xyz");
+
+        // Android Binary XML (compiled AndroidManifest.xml / res/*.xml inside 
an APK).
+        // Carries a .xml extension, so magic must win over the *.xml glob and 
it must
+        // NOT be routed to application/xml / the XML parser. See TIKA-4748.
+        testFile("application/vnd.android.axml", "test-android-binary.xml");
     }
 
     @Test

Reply via email to