Hi guys,
I am still learning Lucene, and I had a very weird problem yesterday
that no matter what I did, I always got multifiles not compound file. By
default, Lucene use compound file, but I just got this weird problem,
even if I used setUseCompoundFile(true) explicitly... And my code is
actually adapted from the book Lucene In Action 2ed and the demo code in
lucene.apache.org... How can I fix this?

package act.indexing;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

import java.io.File;
import java.io.FileFilter;
import java.io.IOException;
import java.io.FileReader;

public class Indexer {

        public Indexer(String indexDir, boolean create) throws IOException {
                Directory dir = FSDirectory.open(new File(indexDir));
                Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
                IndexWriterConfig iwc = new 
IndexWriterConfig(Version.LUCENE_30, analyzer);
                
                if (create) {
                        iwc.setOpenMode(OpenMode.CREATE);
                } else {
                        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
                }
                
                writer = new IndexWriter(dir, iwc);
                writer.setUseCompoundFile(true);
        }

        public int index(String dataDir, FileFilter filter) 
                throws Exception{

                File[] files = new File(dataDir).listFiles();

                for (File f : files) {
                        if ( !f.isDirectory() && !f.isHidden() 
                                        && f.exists() && f.canRead() 
                                        && (filter == null || filter.accept(f)) 
) {
                                indexFiles(f);
                        }
                }
                return writer.numDocs();
        }

        public void close() throws IOException {
                writer.optimize();
                writer.close();
        }

        private IndexWriter writer;

        private void indexFiles(File f) throws Exception {
                System.out.println("Indexing " + f.getCanonicalPath());
                Document doc = getDocument(f);
                writer.addDocument(doc);
        }

        private Document getDocument(File f) throws Exception {
                Document doc = new Document();
                doc.add( new Field("contents", new FileReader(f)) );
                doc.add( new Field("filename", f.getName(), Field.Store.YES, 
Field.Index.NOT_ANALYZED) );
                doc.add( new Field("fullpath", f.getCanonicalPath(), 
Field.Store.YES, Field.Index.NOT_ANALYZED) );
                return doc;
        }

        private static class TextFilesFilter implements FileFilter {
                public boolean accept(File file) {
                        return file.getName().toLowerCase().endsWith(".txt");
                }
        }

        public static void main(String[] args) throws Exception {

                String usage = "Usage: java " + "act.indexing.Indexer" 
                                        + "[-index INDEX_PATH] " + "[-data 
DATA_PATH] " + "[-update]";
                String index_path = null;
                String data_path = null;
                boolean create = true;

                for (int i = 0; i < args.length; i++) {
                        if ("-index".equals(args[i])) {
                                index_path = args[i+1];
                        }
                        else if ("-data".equals(args[i])) {
                                data_path = args[i+1];
                        }
                        else if ("-update".equals(args[i])) {
                                create = false;
                        }
                }

                if (index_path == null || data_path == null) {
                        System.out.println(usage);
                        System.exit(1);
                }

                long start = System.currentTimeMillis();

                Indexer indexer = new Indexer(index_path, create);
                int docsIndexed = 0;

                try {
                        docsIndexed = indexer.index(data_path, new 
TextFilesFilter());
                } catch(Exception e) {
                        e.printStackTrace();
                } finally {
                        indexer.close();
                }
                long end = System.currentTimeMillis();

                System.out.println("Indexing " + docsIndexed + " files took " + 
(end-start) + " milliseconds");
        }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org

Reply via email to