I have a problem with AtomicReader reader = indexReader.leaves().get(0).reader(); it is topical in Lucene 4.0.0??
> Date: Fri, 24 May 2013 13:41:05 -0400 > Subject: Re: lucene 4.0.0 > From: brendan.grain...@gmail.com > To: java-user@lucene.apache.org > > Hi Mary, > > I've been out of the loop with Lucene and java for a bit so this is might > not be too correct, but here is an example of how it might be accomplished > (also you can see it in this gist: https://gist.github.com/rainkinz/5645139). > The output looks like this: > > ** Also note I'm using Lucene 4.3, however I set the version to be > Version.LUCENE_40 for you. I don't think the APIs are different in this > case. > > --------------------------------------------------- > Term 'mary' appears 5 in the index > in doc 0 the term mary appears 1 times at positions 1 > in doc 2 the term mary appears 1 times at positions 3 > in doc 4 the term mary appears 1 times at positions 1 > in doc 8 the term mary appears 1 times at positions 3 > in doc 9 the term mary appears 1 times at positions 6 > etc > > > > import org.apache.lucene.analysis.core.WhitespaceAnalyzer; > import org.apache.lucene.document.Document; > import org.apache.lucene.document.Field; > import org.apache.lucene.document.TextField; > import org.apache.lucene.index.*; > import org.apache.lucene.store.Directory; > import org.apache.lucene.store.RAMDirectory; > import org.apache.lucene.util.BytesRef; > import org.apache.lucene.util.Version; > > import java.io.IOException; > import java.util.Random; > > public class CountingTerms { > > private static final Version VERSION = Version.LUCENE_40; > > private static final String[] terms = "hi am mary and i have a problem > with lucene".split(" "); > > private final Directory indexDir = new RAMDirectory(); > > private String randomTerms() { > Random rand = new Random(); > StringBuilder sb = new StringBuilder(); > int numTerms = rand.nextInt(terms.length); > for (int i = 0; i < numTerms; i++) { > sb.append(terms[rand.nextInt(terms.length)]).append(" "); > } > return sb.toString(); > } > > private void addDocs(IndexWriter writer) throws IOException { > for (int i = 0; i < 10; i++) { > Document doc = new Document(); > String randomStr = randomTerms(); > puts("Adding random str: " + randomStr); > IndexableField field = new TextField("text", randomStr, > Field.Store.YES); > doc.add(field); > writer.addDocument(doc); > } > } > > private void countTerms() throws IOException { > DirectoryReader indexReader = DirectoryReader.open(indexDir); > AtomicReader reader = indexReader.leaves().get(0).reader(); > > Fields fields = reader.fields(); > Terms terms = fields.terms("text"); > TermsEnum termsEnum = terms.iterator(null); > BytesRef term; > > while ((term = termsEnum.next()) != null) { > puts("---------------------------------------------------"); > puts("Term '" + term.utf8ToString() + "' appears " + > termsEnum.totalTermFreq() + " in the index"); > DocsAndPositionsEnum docPosEnum = > termsEnum.docsAndPositions(reader.getLiveDocs(), > null, > DocsAndPositionsEnum.FLAG_OFFSETS); > int docid; > while ((docid = docPosEnum.nextDoc()) != > DocsAndPositionsEnum.NO_MORE_DOCS) { > > int freq = docPosEnum.freq(); > int[] positions = new int[freq]; > for (int i = 0; i < freq; i++) { > int position = docPosEnum.nextPosition(); > positions[i]=position; > } > > puts("in doc " + docid + " the term " + term.utf8ToString() + " > appears " + freq + " times at positions " + ppArray(positions)); > } > > } > > indexReader.close(); > } > > private String ppArray(int[] arr) { > StringBuilder sb = new StringBuilder(); > for (int i = 0; i < arr.length; i++) { > sb.append(arr[i]); > if (i + 1 < arr.length) sb.append(", "); > } > return sb.toString(); > } > > private void puts(Object msg) { > System.out.println(msg); > } > > private void index() throws IOException { > IndexWriter indexWriter = new IndexWriter(indexDir, > new IndexWriterConfig(VERSION, new > WhitespaceAnalyzer(VERSION))); > addDocs(indexWriter); > indexWriter.commit(); > indexWriter.close(); > } > > public static void main(String[] args) throws Exception { > CountingTerms ct = new CountingTerms(); > ct.index(); > ct.countTerms(); > } > > } > > > > On Fri, May 24, 2013 at 12:14 PM, mary meriem <mel-mer...@hotmail.fr> wrote: > > > hii am mary and i have a problem with lucene, Actually a work with lucene > > 4.0.0, my problem is, how can I more listed all the terms, the display > > position for each term in each document and their frequency?please help > > > > > > > -- > Brendan Grainger > www.kuripai.com