This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
commit 4dd9e935083421cc14fd2d9372b55d64db5d9488 Author: tallison <[email protected]> AuthorDate: Mon Apr 13 08:57:44 2026 -0400 clean up file list pipes iterator --- .../tika/async/cli/FileListPipesIterator.java | 4 +- .../tika/async/cli/FileListPipesIteratorTest.java | 4 ++ .../iterator/filelist/FileListPipesIterator.java | 30 ---------- .../filelist/FileListPipesIteratorTest.java | 68 ---------------------- 4 files changed, 5 insertions(+), 101 deletions(-) diff --git a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/FileListPipesIterator.java b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/FileListPipesIterator.java index 23237bfcaa..1649896ccb 100644 --- a/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/FileListPipesIterator.java +++ b/tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/FileListPipesIterator.java @@ -22,7 +22,6 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Iterator; import java.util.NoSuchElementException; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.tika.pipes.api.FetchEmitTuple; import org.apache.tika.pipes.api.emitter.EmitKey; @@ -58,7 +57,6 @@ class FileListPipesIterator implements PipesIterator { throw new RuntimeException("Failed to open file list: " + fileListPath, e); } - AtomicInteger id = new AtomicInteger(); return new Iterator<>() { private FetchEmitTuple next; private boolean done; @@ -77,7 +75,7 @@ class FileListPipesIterator implements PipesIterator { line = line.trim(); if (!line.isEmpty() && !line.startsWith("#")) { next = new FetchEmitTuple( - String.valueOf(id.getAndIncrement()), + line, new FetchKey(TikaConfigAsyncWriter.FETCHER_NAME, line), new EmitKey(TikaConfigAsyncWriter.EMITTER_NAME, line)); return true; diff --git a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/FileListPipesIteratorTest.java b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/FileListPipesIteratorTest.java index 5a56cabb39..181b3c1c29 100644 --- a/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/FileListPipesIteratorTest.java +++ b/tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/FileListPipesIteratorTest.java @@ -47,6 +47,10 @@ public class FileListPipesIteratorTest { assertEquals("subdir/doc2.txt", tuples.get(1).getFetchKey().getFetchKey()); assertEquals("doc3.html", tuples.get(2).getFetchKey().getFetchKey()); + assertEquals("doc1.pdf", tuples.get(0).getId()); + assertEquals("subdir/doc2.txt", tuples.get(1).getId()); + assertEquals("doc3.html", tuples.get(2).getId()); + assertEquals("fsf", tuples.get(0).getFetchKey().getFetcherId()); assertEquals("fse", tuples.get(0).getEmitKey().getEmitterId()); } diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/main/java/org/apache/tika/pipes/iterator/filelist/FileListPipesIterator.java b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/main/java/org/apache/tika/pipes/iterator/filelist/FileListPipesIterator.java deleted file mode 100644 index 8836fb5f55..0000000000 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/main/java/org/apache/tika/pipes/iterator/filelist/FileListPipesIterator.java +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.iterator.filelist; - -/** - * Reads a list of file names/relative paths from a UTF-8 file. - * One file name/relative path per line. This path is used for the fetch key, - * the id and the emit key. If you need more customized control of the keys/ids, - * consider using the jdbc pipes iterator or the csv pipes iterator. - * - * Skips empty lines and lines starting with '#' - * - * TODO: implement this class - */ -public class FileListPipesIterator { -} diff --git a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/iterator/filelist/FileListPipesIteratorTest.java b/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/iterator/filelist/FileListPipesIteratorTest.java deleted file mode 100644 index 97335523fe..0000000000 --- a/tika-pipes/tika-pipes-plugins/tika-pipes-csv/src/test/java/org/apache/tika/pipes/iterator/filelist/FileListPipesIteratorTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.tika.pipes.iterator.filelist; - -public class FileListPipesIteratorTest { -/* - @Test - public void testBasic() throws Exception { - Path p = Paths.get(this.getClass().getResource("/test-documents/file-list.txt").toURI()); - FileListPipesIterator it = new FileListPipesIterator(); - it.setFetcherId("f"); - it.setEmitterId("e"); - it.setFileList(p.toAbsolutePath().toString()); - it.setHasHeader(false); - it.checkInitialization(InitializableProblemHandler.DEFAULT); - List<String> lines = new ArrayList<>(); - - for (FetchEmitTuple t : it) { - assertEquals(t.getFetchKey().getFetchKey(), t.getEmitKey().getEmitKey()); - assertEquals(t.getId(), t.getEmitKey().getEmitKey()); - assertEquals("f", t.getFetchKey().getFetcherId()); - assertEquals("e", t.getEmitKey().getEmitterId()); - lines.add(t.getId()); - } - assertEquals("the", lines.get(0)); - assertEquals(8, lines.size()); - assertFalse(lines.contains("quick")); - } - - @Test - public void testHasHeader() throws Exception { - Path p = Paths.get(this.getClass().getResource("/test-documents/file-list.txt").toURI()); - FileListPipesIterator it = new FileListPipesIterator(); - it.setFetcherId("f"); - it.setEmitterId("e"); - it.setFileList(p.toAbsolutePath().toString()); - it.setHasHeader(true); - it.checkInitialization(InitializableProblemHandler.DEFAULT); - List<String> lines = new ArrayList<>(); - - for (FetchEmitTuple t : it) { - assertEquals(t.getFetchKey().getFetchKey(), t.getEmitKey().getEmitKey()); - assertEquals(t.getId(), t.getEmitKey().getEmitKey()); - assertEquals("f", t.getFetchKey().getFetcherId()); - assertEquals("e", t.getEmitKey().getEmitterId()); - lines.add(t.getId()); - } - assertEquals("brown", lines.get(0)); - assertFalse(lines.contains("quick")); - assertEquals(7, lines.size()); - } - - */ -}
