[ 
https://issues.apache.org/jira/browse/LUCENE-7805?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16176371#comment-16176371
 ] 

Steve Rowe commented on LUCENE-7805:
------------------------------------

Another failure, from 
[https://builds.apache.org/job/Lucene-Solr-NightlyTests-7.x/49] (reproduces for 
me on master):

{noformat}
Checking out Revision dd59822ac94ee0b5cc2bedcac38f60442c1af5e6 
(refs/remotes/origin/branch_7x)
[...]
   [junit4] Suite: org.apache.lucene.analysis.core.TestRandomChains
   [junit4]   2> TEST FAIL: useCharFilter=false text=' \ubc25 
\ud834\udc6e\ud834\udc76\ud834\udc3e \u467e\ued9e\u0003#\ufb63\u032d 
\u2c61\u2c62\u2c61\u2c76\u2c60\u2c7f \u30da\u30dd\u30f2\u30cf\u30d4 
\u2182\u2150\u218f\u215a\u2166\u216b\u2157\u215b\u216a\u2172\u215c\u2165 udb 
\ua835\ua830\ua833\ua833\ua83b\ua836\ua834\ua83b\ua83a \udb51\udff0\ud85a\udfde'
   [junit4]   2> Exception from random analyzer: 
   [junit4]   2> charfilters=
   [junit4]   2>   
org.apache.lucene.analysis.pattern.PatternReplaceCharFilter(a, <KATAKANA>, 
java.io.StringReader@6a8a3a98)
   [junit4]   2> tokenizer=
   [junit4]   2>   org.apache.lucene.analysis.standard.StandardTokenizer()
   [junit4]   2> filters=
   [junit4]   2>   
org.apache.lucene.analysis.shingle.ShingleFilter(ValidatingTokenFilter@3d7cdb0 
term=,bytes=[],startOffset=0,endOffset=0,positionIncrement=1,positionLength=1,type=word,termFrequency=1)
   [junit4]   2>   
org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter(ValidatingTokenFilter@7001ffb8
 
term=,bytes=[],startOffset=0,endOffset=0,positionIncrement=1,positionLength=1,type=word,termFrequency=1,keyword=false,
 a)
   [junit4]   2>   
org.apache.lucene.analysis.cjk.CJKBigramFilter(ValidatingTokenFilter@7398e9e7 
term=,bytes=[],startOffset=0,endOffset=0,positionIncrement=1,positionLength=1,type=word,termFrequency=1,keyword=false,
 -43)
   [junit4]   2>   
org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter(ValidatingTokenFilter@6503b497
 
term=,bytes=[],startOffset=0,endOffset=0,positionIncrement=1,positionLength=1,type=word,termFrequency=1,keyword=false)
   [junit4]   2> offsetsAreCorrect=false
   [junit4]   2> NOTE: download the large Jenkins line-docs file by running 
'ant get-jenkins-line-docs' in the lucene directory.
   [junit4]   2> NOTE: reproduce with: ant test  -Dtestcase=TestRandomChains 
-Dtests.method=testRandomChainsWithLargeStrings -Dtests.seed=5700948A30BC1917 
-Dtests.multiplier=2 -Dtests.nightly=true -Dtests.slow=true 
-Dtests.linedocsfile=/home/jenkins/jenkins-slave/workspace/Lucene-Solr-NightlyTests-7.x/test-data/enwiki.random.lines.txt
 -Dtests.locale=it-CH -Dtests.timezone=Africa/Luanda -Dtests.asserts=true 
-Dtests.file.encoding=ISO-8859-1
   [junit4] ERROR   8.89s J0 | 
TestRandomChains.testRandomChainsWithLargeStrings <<<
   [junit4]    > Throwable #1: java.lang.IllegalArgumentException: startOffset 
must be non-negative, and endOffset must be >= startOffset, and offsets must 
not go backwards startOffset=24,endOffset=31,lastStartOffset=27 for field 
'dummy'
   [junit4]    >        at 
__randomizedtesting.SeedInfo.seed([5700948A30BC1917:3D5B2B9B69F239E4]:0)
   [junit4]    >        at 
org.apache.lucene.index.DefaultIndexingChain$PerField.invert(DefaultIndexingChain.java:767)
   [junit4]    >        at 
org.apache.lucene.index.DefaultIndexingChain.processField(DefaultIndexingChain.java:430)
   [junit4]    >        at 
org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:392)
   [junit4]    >        at 
org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:239)
   [junit4]    >        at 
org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:481)
   [junit4]    >        at 
org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1717)
   [junit4]    >        at 
org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:1462)
   [junit4]    >        at 
org.apache.lucene.index.RandomIndexWriter.addDocument(RandomIndexWriter.java:171)
   [junit4]    >        at 
org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData(BaseTokenStreamTestCase.java:650)
   [junit4]    >        at 
org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData(BaseTokenStreamTestCase.java:540)
   [junit4]    >        at 
org.apache.lucene.analysis.core.TestRandomChains.testRandomChainsWithLargeStrings(TestRandomChains.java:883)
   [junit4]    >        at java.lang.Thread.run(Thread.java:748)
[...]
   [junit4]   2> NOTE: test params are: codec=Lucene70, 
sim=RandomSimilarity(queryNorm=false): {dummy=DFR I(n)L1}, locale=it-CH, 
timezone=Africa/Luanda
   [junit4]   2> NOTE: Linux 3.13.0-88-generic amd64/Oracle Corporation 
1.8.0_144 (64-bit)/cpus=4,threads=1,free=185203120,total=275775488
{noformat}

> TestRandomChains.testRandomChainsWithLargeStrings() failures
> ------------------------------------------------------------
>
>                 Key: LUCENE-7805
>                 URL: https://issues.apache.org/jira/browse/LUCENE-7805
>             Project: Lucene - Core
>          Issue Type: Bug
>            Reporter: Steve Rowe
>
> My Jenkins found a reproducing master seed, looks like FlattenGraphFilter is 
> where the problem happens:
> {noformat}
> Checking out Revision 680f4d7fd378868254786107de92a894758f667c 
> (refs/remotes/origin/master)
> [...]
>    [junit4] Suite: org.apache.lucene.analysis.core.TestRandomChains
>    [junit4]   2> TEST FAIL: useCharFilter=false text='\u0003J\u522f  nwqbl  
> uwtps  ob zdyokom ){0'
>    [junit4]   2> Exception from random analyzer: 
>    [junit4]   2> charfilters=
>    [junit4]   2>   
> org.apache.lucene.analysis.charfilter.HTMLStripCharFilter(java.io.StringReader@3ab617ae,
>  [])
>    [junit4]   2>   
> org.apache.lucene.analysis.charfilter.HTMLStripCharFilter(org.apache.lucene.analysis.charfilter.HTMLStripCharFilter@23e3c717)
>    [junit4]   2> tokenizer=
>    [junit4]   2>   org.apache.lucene.analysis.ngram.NGramTokenizer(9, 43)
>    [junit4]   2> filters=
>    [junit4]   2>   
> org.apache.lucene.analysis.miscellaneous.CodepointCountFilter(ValidatingTokenFilter@6b4708ea
>  
> term=,bytes=[],startOffset=0,endOffset=0,positionIncrement=1,positionLength=1,type=word,
>  33, 44)
>    [junit4]   2>   
> org.apache.lucene.analysis.shingle.ShingleFilter(ValidatingTokenFilter@5533fb25
>  
> term=,bytes=[],startOffset=0,endOffset=0,positionIncrement=1,positionLength=1,type=word,
>  <EMAIL>)
>    [junit4]   2>   
> org.apache.lucene.analysis.core.FlattenGraphFilter(ValidatingTokenFilter@4ef4c44
>  
> term=,bytes=[],startOffset=0,endOffset=0,positionIncrement=1,positionLength=1,type=word)
>    [junit4]   2>   
> org.apache.lucene.analysis.miscellaneous.KeepWordFilter(ValidatingTokenFilter@15baa1c7
>  
> term=,bytes=[],startOffset=0,endOffset=0,positionIncrement=1,positionLength=1,type=word,
>  [akbnucwt, vrkwm, jtomhk, jxgmfalr])
>    [junit4]   2> offsetsAreCorrect=true
>    [junit4]   2> NOTE: reproduce with: ant test  -Dtestcase=TestRandomChains 
> -Dtests.method=testRandomChainsWithLargeStrings -Dtests.seed=E9460213902F2F82 
> -Dtests.slow=true -Dtests.locale=fi-FI -Dtests.timezone=Europe/Malta 
> -Dtests.asserts=true -Dtests.file.encoding=US-ASCII
>    [junit4] FAILURE 0.19s J7 | 
> TestRandomChains.testRandomChainsWithLargeStrings <<<
>    [junit4]    > Throwable #1: java.lang.AssertionError: outputEndNode=3 vs 
> inputTo=2
>    [junit4]    >      at 
> __randomizedtesting.SeedInfo.seed([E9460213902F2F82:831DBD02C9610F71]:0)
>    [junit4]    >      at 
> org.apache.lucene.analysis.core.FlattenGraphFilter.incrementToken(FlattenGraphFilter.java:335)
>    [junit4]    >      at 
> org.apache.lucene.analysis.ValidatingTokenFilter.incrementToken(ValidatingTokenFilter.java:67)
>    [junit4]    >      at 
> org.apache.lucene.analysis.FilteringTokenFilter.incrementToken(FilteringTokenFilter.java:51)
>    [junit4]    >      at 
> org.apache.lucene.analysis.ValidatingTokenFilter.incrementToken(ValidatingTokenFilter.java:67)
>    [junit4]    >      at 
> org.apache.lucene.analysis.BaseTokenStreamTestCase.checkAnalysisConsistency(BaseTokenStreamTestCase.java:731)
>    [junit4]    >      at 
> org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData(BaseTokenStreamTestCase.java:642)
>    [junit4]    >      at 
> org.apache.lucene.analysis.BaseTokenStreamTestCase.checkRandomData(BaseTokenStreamTestCase.java:540)
>    [junit4]    >      at 
> org.apache.lucene.analysis.core.TestRandomChains.testRandomChainsWithLargeStrings(TestRandomChains.java:880)
>    [junit4]    >      at java.lang.Thread.run(Thread.java:745)
>    [junit4]   2> NOTE: test params are: codec=Asserting(Lucene70): 
> {dummy=PostingsFormat(name=LuceneVarGapFixedInterval)}, docValues:{}, 
> maxPointsInLeafNode=807, maxMBSortInHeap=5.007333045299232, 
> sim=RandomSimilarity(queryNorm=true): {}, locale=fi-FI, timezone=Europe/Malta
>    [junit4]   2> NOTE: Linux 4.1.0-custom2-amd64 amd64/Oracle Corporation 
> 1.8.0_77 (64-bit)/cpus=16,threads=1,free=492062472,total=525336576
> {noformat}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to