Hi guys, I am still learning Lucene, and I had a very weird problem yesterday that no matter what I did, I always got multifiles not compound file. By default, Lucene use compound file, but I just got this weird problem, even if I used setUseCompoundFile(true) explicitly... And my code is actually adapted from the book Lucene In Action 2ed and the demo code in lucene.apache.org... How can I fix this?
package act.indexing; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import java.io.File; import java.io.FileFilter; import java.io.IOException; import java.io.FileReader; public class Indexer { public Indexer(String indexDir, boolean create) throws IOException { Directory dir = FSDirectory.open(new File(indexDir)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_30, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } writer = new IndexWriter(dir, iwc); writer.setUseCompoundFile(true); } public int index(String dataDir, FileFilter filter) throws Exception{ File[] files = new File(dataDir).listFiles(); for (File f : files) { if ( !f.isDirectory() && !f.isHidden() && f.exists() && f.canRead() && (filter == null || filter.accept(f)) ) { indexFiles(f); } } return writer.numDocs(); } public void close() throws IOException { writer.optimize(); writer.close(); } private IndexWriter writer; private void indexFiles(File f) throws Exception { System.out.println("Indexing " + f.getCanonicalPath()); Document doc = getDocument(f); writer.addDocument(doc); } private Document getDocument(File f) throws Exception { Document doc = new Document(); doc.add( new Field("contents", new FileReader(f)) ); doc.add( new Field("filename", f.getName(), Field.Store.YES, Field.Index.NOT_ANALYZED) ); doc.add( new Field("fullpath", f.getCanonicalPath(), Field.Store.YES, Field.Index.NOT_ANALYZED) ); return doc; } private static class TextFilesFilter implements FileFilter { public boolean accept(File file) { return file.getName().toLowerCase().endsWith(".txt"); } } public static void main(String[] args) throws Exception { String usage = "Usage: java " + "act.indexing.Indexer" + "[-index INDEX_PATH] " + "[-data DATA_PATH] " + "[-update]"; String index_path = null; String data_path = null; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index_path = args[i+1]; } else if ("-data".equals(args[i])) { data_path = args[i+1]; } else if ("-update".equals(args[i])) { create = false; } } if (index_path == null || data_path == null) { System.out.println(usage); System.exit(1); } long start = System.currentTimeMillis(); Indexer indexer = new Indexer(index_path, create); int docsIndexed = 0; try { docsIndexed = indexer.index(data_path, new TextFilesFilter()); } catch(Exception e) { e.printStackTrace(); } finally { indexer.close(); } long end = System.currentTimeMillis(); System.out.println("Indexing " + docsIndexed + " files took " + (end-start) + " milliseconds"); } }
--------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org