Author: koji
Date: Mon Sep 27 16:10:29 2010
New Revision: 1001796
URL: http://svn.apache.org/viewvc?rev=1001796&view=rev
Log:
LUCENE-2529, LUCENE-2668: always apply position increment gap and offset gap
between values
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Sep 27 16:10:29 2010
@@ -108,6 +108,11 @@ Changes in backwards compatibility polic
* LUCENE-2600: Remove IndexReader.isDeleted in favor of
IndexReader.getDeletedDocs(). (Mike McCandless)
+* LUCENE-2529, LUCENE-2668: Position increment gap and offset gap of empty
+ values in multi-valued field has been changed for some cases in index.
+ If you index empty fields and uses positions/offsets information on that
+ fields, reindex is recommended. (David Smiley, Koji Sekiguchi)
+
Changes in Runtime Behavior
* LUCENE-2650: The behavior of FSDirectory.open has changed. On 64-bit
Modified:
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
(original)
+++
lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java
Mon Sep 27 16:10:29 2010
@@ -156,8 +156,7 @@ public abstract class BaseFragmentsBuild
int startOffset, int endOffset ){
while( buffer.length()< endOffset&& index[0]< values.length ){
buffer.append( values[index[0]] );
- if( values[index[0]].length()> 0&& index[0] + 1< values.length )
- buffer.append( multiValuedSeparator );
+ buffer.append( multiValuedSeparator );
index[0]++;
}
int eo = buffer.length()< endOffset ? buffer.length() : endOffset;
@@ -168,7 +167,7 @@ public abstract class BaseFragmentsBuild
int startOffset, int endOffset ){
while( buffer.length()< endOffset&& index[0]< values.length ){
buffer.append( values[index[0]].stringValue() );
- if( values[index[0]].isTokenized()&& values[index[0]].stringValue().length()>
0&& index[0] + 1< values.length )
+ if( values[index[0]].isTokenized() )
buffer.append( multiValuedSeparator );
index[0]++;
}
Modified:
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
(original)
+++
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
Mon Sep 27 16:10:29 2010
@@ -355,16 +355,20 @@ public abstract class AbstractTestCase e
protected void makeIndexShortMV() throws Exception {
+ // 0
// ""
+ // 1
// ""
- // 012345
+ // 234567
// "a b c"
// 0 1 2
-
+
+ // 8
// ""
- // 6789
+ // 111
+ // 9012
// "d e"
// 3 4
make1dmfIndex( shortMVValues );
Modified:
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
(original)
+++
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldPhraseListTest.java
Mon Sep 27 16:10:29 2010
@@ -165,7 +165,7 @@ public class FieldPhraseListTest extends
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
FieldPhraseList fpl = new FieldPhraseList( stack, fq );
assertEquals( 1, fpl.phraseList.size() );
- assertEquals( "d(1.0)((6,7))", fpl.phraseList.get( 0 ).toString() );
+ assertEquals( "d(1.0)((9,10))", fpl.phraseList.get( 0 ).toString() );
}
public void test1PhraseLongMV() throws Exception {
Modified:
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
(original)
+++
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/FieldTermStackTest.java
Mon Sep 27 16:10:29 2010
@@ -132,7 +132,7 @@ public class FieldTermStackTest extends
FieldQuery fq = new FieldQuery( tq( "d" ), true, true );
FieldTermStack stack = new FieldTermStack( reader, 0, F, fq );
assertEquals( 1, stack.termList.size() );
- assertEquals( "d(6,7,3)", stack.pop().toString() );
+ assertEquals( "d(9,10,3)", stack.pop().toString() );
}
public void test1PhraseLongMV() throws Exception {
Modified:
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
(original)
+++
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/ScoreOrderFragmentsBuilderTest.java
Mon Sep 27 16:10:29 2010
@@ -27,7 +27,7 @@ public class ScoreOrderFragmentsBuilderT
String[] f = sofb.createFragments( reader, 0, F, ffl, 3 );
assertEquals( 3, f.length );
// check score order
- assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[0] );
+ assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[0] );
assertEquals( "b b<b>a</b> b<b>a</b> b b b b b ", f[1] );
assertEquals( "<b>a</b> b b b b b b b b b ", f[2] );
}
Modified:
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
(original)
+++
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragListBuilderTest.java
Mon Sep 27 16:10:29 2010
@@ -143,7 +143,7 @@ public class SimpleFragListBuilderTest e
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
assertEquals( 1, ffl.fragInfos.size() );
- assertEquals( "subInfos=(d((6,7)))/1.0(0,100)", ffl.fragInfos.get( 0
).toString() );
+ assertEquals( "subInfos=(d((9,10)))/1.0(3,103)", ffl.fragInfos.get( 0
).toString() );
}
public void test1PhraseLongMV() throws Exception {
Modified:
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
(original)
+++
lucene/dev/trunk/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
Mon Sep 27 16:10:29 2010
@@ -34,11 +34,11 @@ public class SimpleFragmentsBuilderTest
public void test1TermIndex() throws Exception {
FieldFragList ffl = ffl( "a", "a" );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
- assertEquals( "<b>a</b>", sfb.createFragment( reader, 0, F, ffl ) );
+ assertEquals( "<b>a</b> ", sfb.createFragment( reader, 0, F, ffl ) );
// change tags
sfb = new SimpleFragmentsBuilder( new String[]{ "[" }, new String[]{ "]"
} );
- assertEquals( "[a]", sfb.createFragment( reader, 0, F, ffl ) );
+ assertEquals( "[a] ", sfb.createFragment( reader, 0, F, ffl ) );
}
public void test2Frags() throws Exception {
@@ -48,7 +48,7 @@ public class SimpleFragmentsBuilderTest
// 3 snippets requested, but should be 2
assertEquals( 2, f.length );
assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
- assertEquals( "b b<b>a</b> b<b>a</b> b", f[1] );
+ assertEquals( "b b<b>a</b> b<b>a</b> b ", f[1] );
}
public void test3Frags() throws Exception {
@@ -58,7 +58,7 @@ public class SimpleFragmentsBuilderTest
assertEquals( 3, f.length );
assertEquals( "<b>a</b> b b b b b b b b b ", f[0] );
assertEquals( "b b<b>a</b> b<b>a</b> b b b b b ", f[1] );
- assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b", f[2] );
+ assertEquals( "<b>c</b> <b>a</b> <b>a</b> b b ", f[2] );
}
public void testTagsAndEncoder() throws Exception {
@@ -66,7 +66,7 @@ public class SimpleFragmentsBuilderTest
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
String[] preTags = { "[" };
String[] postTags = { "]" };
- assertEquals( "<h1> [a]</h1>",
+ assertEquals( "<h1> [a]</h1> ",
sfb.createFragment( reader, 0, F, ffl, preTags, postTags, new
SimpleHTMLEncoder() ) );
}
@@ -88,7 +88,7 @@ public class SimpleFragmentsBuilderTest
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
- assertEquals( "a b c<b>d</b> e", sfb.createFragment( reader, 0, F, ffl )
);
+ assertEquals( " b c<b>d</b> e ", sfb.createFragment( reader, 0, F, ffl )
);
}
public void test1PhraseLongMV() throws Exception {
@@ -113,7 +113,7 @@ public class SimpleFragmentsBuilderTest
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
- assertEquals( "ssing<b>speed</b>, the", sfb.createFragment( reader, 0, F,
ffl ) );
+ assertEquals( "ssing<b>speed</b>, the ", sfb.createFragment( reader, 0, F,
ffl ) );
}
public void testUnstoredField() throws Exception {
@@ -163,6 +163,6 @@ public class SimpleFragmentsBuilderTest
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
sfb.setMultiValuedSeparator( '/' );
- assertEquals( "a b c/<b>d</b> e", sfb.createFragment( reader, 0, F, ffl )
);
+ assertEquals( " b c//<b>d</b> e/", sfb.createFragment( reader, 0, F, ffl
) );
}
}
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
(original)
+++
lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
Mon Sep 27 16:10:29 2010
@@ -75,10 +75,8 @@ final class DocInverterPerField extends
// consumer if it wants to see this particular field
// tokenized.
if (field.isIndexed()&& doInvert) {
-
- final boolean anyToken;
- if (fieldState.length> 0)
+ if (i> 0)
fieldState.position +=
docState.analyzer.getPositionIncrementGap(fieldInfo.name);
if (!field.isTokenized()) { // un-tokenized field
@@ -99,7 +97,6 @@ final class DocInverterPerField extends
fieldState.offset += valueLength;
fieldState.length++;
fieldState.position++;
- anyToken = valueLength> 0;
} else { // tokenized field
final TokenStream stream;
final TokenStream streamValue = field.tokenStreamValue();
@@ -189,14 +186,12 @@ final class DocInverterPerField extends
stream.end();
fieldState.offset += offsetAttribute.endOffset();
- anyToken = fieldState.length> startLength;
} finally {
stream.close();
}
}
- if (anyToken)
- fieldState.offset += docState.analyzer.getOffsetGap(field);
+ fieldState.offset += docState.analyzer.getOffsetGap(field);
fieldState.boost *= field.getBoost();
}
Modified:
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
(original)
+++
lucene/dev/trunk/lucene/src/test/org/apache/lucene/analysis/MockAnalyzer.java
Mon Sep 27 16:10:29 2010
@@ -30,6 +30,7 @@ public final class MockAnalyzer extends
private final boolean lowerCase;
private final CharacterRunAutomaton filter;
private final boolean enablePositionIncrements;
+ private int positionIncrementGap;
/**
* Creates a new MockAnalyzer.
@@ -89,4 +90,13 @@ public final class MockAnalyzer extends
return saved.filter;
}
}
+
+ public void setPositionIncrementGap(int positionIncrementGap){
+ this.positionIncrementGap = positionIncrementGap;
+ }
+
+ @Override
+ public int getPositionIncrementGap(String fieldName){
+ return positionIncrementGap;
+ }
}
\ No newline at end of file
Modified:
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
URL:
http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java?rev=1001796&r1=1001795&r2=1001796&view=diff
==============================================================================
---
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
(original)
+++
lucene/dev/trunk/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java
Mon Sep 27 16:10:29 2010
@@ -4266,11 +4266,11 @@ public class TestIndexWriter extends Luc
TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0,
"field"));
TermVectorOffsetInfo[] termOffsets = tpv.getOffsets(0);
assertEquals(1, termOffsets.length);
- assertEquals(0, termOffsets[0].getStartOffset());
- assertEquals(6, termOffsets[0].getEndOffset());
+ assertEquals(1, termOffsets[0].getStartOffset());
+ assertEquals(7, termOffsets[0].getEndOffset());
termOffsets = tpv.getOffsets(1);
- assertEquals(7, termOffsets[0].getStartOffset());
- assertEquals(10, termOffsets[0].getEndOffset());
+ assertEquals(8, termOffsets[0].getStartOffset());
+ assertEquals(11, termOffsets[0].getEndOffset());
r.close();
dir.close();
}
@@ -4301,8 +4301,37 @@ public class TestIndexWriter extends Luc
assertEquals(0, termOffsets[0].getStartOffset());
assertEquals(4, termOffsets[0].getEndOffset());
termOffsets = tpv.getOffsets(1);
- assertEquals(5, termOffsets[0].getStartOffset());
- assertEquals(11, termOffsets[0].getEndOffset());
+ assertEquals(6, termOffsets[0].getStartOffset());
+ assertEquals(12, termOffsets[0].getEndOffset());
+ r.close();
+ dir.close();
+ }
+
+ // LUCENE-2529
+ public void testPositionIncrementGapEmptyField() throws Exception {
+ Directory dir = newDirectory();
+ MockAnalyzer analyzer = new MockAnalyzer();
+ analyzer.setPositionIncrementGap( 100 );
+ IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(
+ TEST_VERSION_CURRENT, analyzer));
+ Document doc = new Document();
+ Field f = newField("field", "", Field.Store.NO,
+ Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
+ Field f2 = newField("field", "crunch man", Field.Store.NO,
+ Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS);
+ doc.add(f);
+ doc.add(f2);
+ w.addDocument(doc);
+ w.close();
+
+ IndexReader r = IndexReader.open(dir, true);
+ TermPositionVector tpv = ((TermPositionVector) r.getTermFreqVector(0,
"field"));
+ int[] poss = tpv.getTermPositions(0);
+ assertEquals(1, poss.length);
+ assertEquals(100, poss[0]);
+ poss = tpv.getTermPositions(1);
+ assertEquals(1, poss.length);
+ assertEquals(101, poss[0]);
r.close();
dir.close();
}