alexey-pelykh commented on code in PR #1993:
URL: https://github.com/apache/tika/pull/1993#discussion_r1803480932


##########
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/executable/UniversalExecutableParser.java:
##########
@@ -119,46 +118,62 @@ public void parseMachO(XHTMLContentHandler xhtml, 
EmbeddedDocumentExtractor extr
             if (!unsortedOffsets && archIndex > 0 && offset < (long) 
offsetAndSizePerArch[archIndex - 1].getLeft()) {
                 unsortedOffsets = true;
             }
-            var size = is64
+            long size = is64
                     ? (isLE ? EndianUtils.readLongLE(stream) : 
EndianUtils.readLongBE(stream))
                     : (isLE ? EndianUtils.readIntLE(stream) : 
EndianUtils.readIntBE(stream));
 
             offsetAndSizePerArch[archIndex] = Pair.of(offset, size);
 
             if (is64) {
-                if (stream.skip(8) != 8) {
-                    throw new TikaException("Failed to skip align and 
reserved");
-                }
+                IOUtils.skipFully(stream, 8);
             } else {
-                if (stream.skip(4) != 4) {
-                    throw new TikaException("Failed to skip align");
-                }
+                IOUtils.skipFully(stream, 4);
             }
 
             currentOffset += archStructSize;
         }
         if (unsortedOffsets) {
-            Arrays.sort(offsetAndSizePerArch, Comparator.comparingLong(entry 
-> (long) entry.getLeft()));
+            Arrays.sort(offsetAndSizePerArch, 
Comparator.comparingLong(Pair::getLeft));
         }
 
-        for (var archIndex = 0; archIndex < archsCount; archIndex++) {
-            var skipUntilStart = (long) 
offsetAndSizePerArch[archIndex].getLeft() - currentOffset;
-            if (stream.skip(skipUntilStart) != skipUntilStart) {
-                throw new TikaException("Failed to skip to the start of the 
per-architecture Mach-O");
-            }
+        for (int archIndex = 0; archIndex < archsCount; archIndex++) {
+            long skipUntilStart = offsetAndSizePerArch[archIndex].getLeft() - 
currentOffset;
+            IOUtils.skipFully(stream, skipUntilStart);
             currentOffset += skipUntilStart;
 
-            var perArchMachO = new byte[(int) (long) 
offsetAndSizePerArch[archIndex].getRight()];
-            if (stream.read(perArchMachO) != perArchMachO.length) {
-                throw new TikaException("Failed to read the per-architecture 
Mach-O");
-            }
+            //TODO -- bounds check getRight() value earlier to avoid overflow 
???
+            byte[] perArchMachO = new byte[(int) 
offsetAndSizePerArch[archIndex].getRight()];
+            IOUtils.readFully(stream, perArchMachO);
             currentOffset += perArchMachO.length;
 
             var perArchMetadata = new Metadata();
             var tikaInputStream = TikaInputStream.get(perArchMachO, 
perArchMetadata);
             if (extractor.shouldParseEmbedded(perArchMetadata)) {
-                extractor.parseEmbedded(tikaInputStream, handler, 
perArchMetadata, true);
+                extractor.parseEmbedded(tikaInputStream, xhtml, 
perArchMetadata, true);
             }
         }
     }
+
+    private static class Pair {

Review Comment:
   Why not to use `org.apache.commons.lang3.tuple.Pair` since we're depending 
on that package anyhow?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscr...@tika.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to