alexey-pelykh commented on code in PR #1993: URL: https://github.com/apache/tika/pull/1993#discussion_r1803480932
########## tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-code-module/src/main/java/org/apache/tika/parser/executable/UniversalExecutableParser.java: ########## @@ -119,46 +118,62 @@ public void parseMachO(XHTMLContentHandler xhtml, EmbeddedDocumentExtractor extr if (!unsortedOffsets && archIndex > 0 && offset < (long) offsetAndSizePerArch[archIndex - 1].getLeft()) { unsortedOffsets = true; } - var size = is64 + long size = is64 ? (isLE ? EndianUtils.readLongLE(stream) : EndianUtils.readLongBE(stream)) : (isLE ? EndianUtils.readIntLE(stream) : EndianUtils.readIntBE(stream)); offsetAndSizePerArch[archIndex] = Pair.of(offset, size); if (is64) { - if (stream.skip(8) != 8) { - throw new TikaException("Failed to skip align and reserved"); - } + IOUtils.skipFully(stream, 8); } else { - if (stream.skip(4) != 4) { - throw new TikaException("Failed to skip align"); - } + IOUtils.skipFully(stream, 4); } currentOffset += archStructSize; } if (unsortedOffsets) { - Arrays.sort(offsetAndSizePerArch, Comparator.comparingLong(entry -> (long) entry.getLeft())); + Arrays.sort(offsetAndSizePerArch, Comparator.comparingLong(Pair::getLeft)); } - for (var archIndex = 0; archIndex < archsCount; archIndex++) { - var skipUntilStart = (long) offsetAndSizePerArch[archIndex].getLeft() - currentOffset; - if (stream.skip(skipUntilStart) != skipUntilStart) { - throw new TikaException("Failed to skip to the start of the per-architecture Mach-O"); - } + for (int archIndex = 0; archIndex < archsCount; archIndex++) { + long skipUntilStart = offsetAndSizePerArch[archIndex].getLeft() - currentOffset; + IOUtils.skipFully(stream, skipUntilStart); currentOffset += skipUntilStart; - var perArchMachO = new byte[(int) (long) offsetAndSizePerArch[archIndex].getRight()]; - if (stream.read(perArchMachO) != perArchMachO.length) { - throw new TikaException("Failed to read the per-architecture Mach-O"); - } + //TODO -- bounds check getRight() value earlier to avoid overflow ??? + byte[] perArchMachO = new byte[(int) offsetAndSizePerArch[archIndex].getRight()]; + IOUtils.readFully(stream, perArchMachO); currentOffset += perArchMachO.length; var perArchMetadata = new Metadata(); var tikaInputStream = TikaInputStream.get(perArchMachO, perArchMetadata); if (extractor.shouldParseEmbedded(perArchMetadata)) { - extractor.parseEmbedded(tikaInputStream, handler, perArchMetadata, true); + extractor.parseEmbedded(tikaInputStream, xhtml, perArchMetadata, true); } } } + + private static class Pair { Review Comment: Why not to use `org.apache.commons.lang3.tuple.Pair` since we're depending on that package anyhow? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@tika.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org