You are right - when you want to search across multiple collections, whether 
through alias or explicitly, Solr does no longer guarantee the uniqueness of 
IDs for you, as that is only per collection.
Meaning, you need to enforce ID uniqueness yourself. And if using routed 
aliases, ..."It’s extremely important with all routed aliases that the route 
values NOT change."

So if this is outside your control, the question becomes - are documents with 
same ID really duplicates and should not be counted twice? Or are they distinct 
docs which happen to have same ID?
If they ideed are duplicates, you may attempt to do duplicate removal in your 
query by e.g. adding fq={!collapse field=id} to your query

Jan

> 24. mar. 2021 kl. 18:09 skrev Eran Buchnick <buchni...@gmail.com>:
> 
> Hi,
> I've noticed the following warning in the *aliases documentation*:
> *"...Reindexing a document with a different route value for the same ID*
> *produces two distinct documents with the same ID accessible via the*
> *alias..."*
> When tested such case it seems that really only one doc is retrieved but
> when turning on *facets they aren't aligned with the result set.*
> 
> Expected behavior or bug?
> If expected - how should I avoid dups and implement upserts without the
> overhead of preliminary queries?
> 
> My test:
> 1) create two collections test1 and test2 and alias named test for both
> 2) index docs with the same id to both of the collections
> {"id":123}
> 3) querying the alias as followed with explained debug:
> http://localhost:8983/solr/test/select?debug.explain.structured=true&debugQuery=on&facet.field=id&facet=on&q=*%3A*
> {
>  "responseHeader":{
>    "zkConnected":true,
>    "status":0,
>    "QTime":25,
>    "params":{
>      "q":"*:*",
>      "facet.field":"id",
>      "debug.explain.structured":"true",
>      "facet":"on",
>      "debugQuery":"on",
>      "_":"1616269705741"}},
> 
> "response":{*"numFound":1*
> ,"start":0,"maxScore":1.0,"numFoundExact":true,"docs":[
>      {
>        "id":"123",
>        "_version_":1694670492462481408}]
>  },
>  "facet_counts":{
>    "facet_queries":{},
>    "facet_fields":{
>      *"id":[*
> *        "123",2*]},
>    "facet_ranges":{},
>    "facet_intervals":{},
>    "facet_heatmaps":{}},
>  "debug":{
>    "track":{
>      "rid":"-31",
>      "EXECUTE_QUERY":{
>        "http://some_ip:8983/solr/test2_shard1_replica_n1/":{
>          "QTime":"3",
>          "ElapsedTime":"10",
>          "RequestPurpose":"GET_TOP_IDS,GET_FACETS,SET_TERM_STATS",
>          "NumFound":"1",
> 
> "Response":"{responseHeader={zkConnected=true,status=0,QTime=3,params={df=_text_,distrib=false,fl=[id,
> score],shards.purpose=16404,fsv=true,shard.url=
> http://some_ip:8983/solr/test2_shard1_replica_n1/,rid=-31,wt=javabin,_=1616269705741,facet.field=id,f.id.facet.mincount=0,debug=[false
> ,
> timing,
> track],start=0,f.id.facet.limit=160,collection=test1,test2,rows=10,debug.explain.structured=true,version=2,q=*:*,omitHeader=false,requestPurpose=GET_TOP_IDS,GET_FACETS,SET_TERM_STATS,NOW=1616270594521,isShard=true,facet=on,debugQuery=false}},response={numFound=1,numFoundExact=true,start=0,maxScore=1.0,docs=[SolrDocument{id=123,
> score=1.0}]},sort_values={},facet_counts={facet_queries={},facet_fields={id={123=1}},facet_ranges={},facet_intervals={},facet_heatmaps={}},debug={facet-debug={elapse=0,sub-facet=[{processor=SimpleFacets,elapse=0,action=field
> facet,maxThreads=0,sub-facet=[{elapse=0,requestedMethod=not
> specified,appliedMethod=FC,inputDocSetSize=1,field=id,numBuckets=2}]}]},timing={time=2.0,prepare={time=0.0,query={time=0.0},facet={time=0.0},facet_module={time=0.0},mlt={time=0.0},highlight={time=0.0},stats={time=0.0},expand={time=0.0},terms={time=0.0},debug={time=0.0}},process={time=2.0,query={time=0.0},facet={time=1.0},facet_module={time=0.0},mlt={time=0.0},highlight={time=0.0},stats={time=0.0},expand={time=0.0},terms={time=0.0},debug={time=0.0}}}}}"},
>        "http://some_ip:8983/solr/test1_shard1_replica_n1/":{
>          "QTime":"2",
>          "ElapsedTime":"12",
>          "RequestPurpose":"GET_TOP_IDS,GET_FACETS,SET_TERM_STATS",
>          "NumFound":"1",
> 
> "Response":"{responseHeader={zkConnected=true,status=0,QTime=2,params={df=_text_,distrib=false,fl=[id,
> score],shards.purpose=16404,fsv=true,shard.url=
> http://some_ip:8983/solr/test1_shard1_replica_n1/,rid=-31,wt=javabin,_=1616269705741,facet.field=id,f.id.facet.mincount=0,debug=[false
> ,
> timing,
> track],start=0,f.id.facet.limit=160,collection=test1,test2,rows=10,debug.explain.structured=true,version=2,q=*:*,omitHeader=false,requestPurpose=GET_TOP_IDS,GET_FACETS,SET_TERM_STATS,NOW=1616270594521,isShard=true,facet=on,debugQuery=false}},response={numFound=1,numFoundExact=true,start=0,maxScore=1.0,docs=[SolrDocument{id=123,
> score=1.0}]},sort_values={},facet_counts={facet_queries={},facet_fields={id={123=1}},facet_ranges={},facet_intervals={},facet_heatmaps={}},debug={facet-debug={elapse=0,sub-facet=[{processor=SimpleFacets,elapse=0,action=field
> facet,maxThreads=0,sub-facet=[{elapse=0,requestedMethod=not
> specified,appliedMethod=FC,inputDocSetSize=1,field=id,numBuckets=2}]}]},timing={time=2.0,prepare={time=0.0,query={time=0.0},facet={time=0.0},facet_module={time=0.0},mlt={time=0.0},highlight={time=0.0},stats={time=0.0},expand={time=0.0},terms={time=0.0},debug={time=0.0}},process={time=2.0,query={time=0.0},facet={time=1.0},facet_module={time=0.0},mlt={time=0.0},highlight={time=0.0},stats={time=0.0},expand={time=0.0},terms={time=0.0},debug={time=0.0}}}}}"}},
>      "GET_FIELDS":{
>        "http://some_ip:8983/solr/test2_shard1_replica_n1/":{
>          "QTime":"5",
>          "ElapsedTime":"8",
>          "RequestPurpose":"GET_FIELDS,GET_DEBUG,SET_TERM_STATS",
>          "NumFound":"1",
> 
> "Response":"{responseHeader={zkConnected=true,status=0,QTime=5,params={facet.field=id,df=_text_,distrib=false,debug=[timing,
> track],shards.purpose=16704,collection=test1,test2,shard.url=
> http://some_ip:8983/solr/test2_shard1_replica_n1/,rows=10,rid=-31,debug.explain.structured=true,version=2,q=*:*,omitHeader=false,requestPurpose=GET_FIELDS,GET_DEBUG,SET_TERM_STATS,NOW=1616270594521,ids=123,isShard=true,facet=false,wt=javabin,debugQuery=true,_=1616269705741}
> },response={numFound=1,numFoundExact=true,start=0,docs=[SolrDocument{id=123,
> _version_=1694670492462481408}]},debug={rawquerystring=*:*,querystring=*:*,parsedquery=MatchAllDocsQuery(*:*),parsedquery_toString=*:*,explain={123={match=true,value=1.0,description=*:*}},QParser=LuceneQParser,timing={time=4.0,prepare={time=0.0,query={time=0.0},facet={time=0.0},facet_module={time=0.0},mlt={time=0.0},highlight={time=0.0},stats={time=0.0},expand={time=0.0},terms={time=0.0},debug={time=0.0}},process={time=4.0,query={time=0.0},facet={time=0.0},facet_module={time=0.0},mlt={time=0.0},highlight={time=0.0},stats={time=0.0},expand={time=0.0},terms={time=0.0},debug={time=4.0}}}}}"}}},
>    "facet-debug":{
>      "elapse":0,
>      "sub-facet":[{
>          "processor":"SimpleFacets",
>          "elapse":0,
>          "action":"field facet",
>          "maxThreads":0,
>          "sub-facet":[{
>              "elapse":0,
>              "requestedMethod":"not specified",
>              "appliedMethod":"FC",
>              "inputDocSetSize":1,
>              "field":"id",
>              "numBuckets":2}]}]},
>    "timing":{
>      "time":8.0,
>      "prepare":{
>        "time":0.0,
>        "query":{
>          "time":0.0},
>        "facet":{
>          "time":0.0},
>        "facet_module":{
>          "time":0.0},
>        "mlt":{
>          "time":0.0},
>        "highlight":{
>          "time":0.0},
>        "stats":{
>          "time":0.0},
>        "expand":{
>          "time":0.0},
>        "terms":{
>          "time":0.0},
>        "debug":{
>          "time":0.0}},
>      "process":{
>        "time":8.0,
>        "query":{
>          "time":0.0},
>        "facet":{
>          "time":2.0},
>        "facet_module":{
>          "time":0.0},
>        "mlt":{
>          "time":0.0},
>        "highlight":{
>          "time":0.0},
>        "stats":{
>          "time":0.0},
>        "expand":{
>          "time":0.0},
>        "terms":{
>          "time":0.0},
>        "debug":{
>          "time":4.0}}},
>    "rawquerystring":"*:*",
>    "querystring":"*:*",
>    "parsedquery":"MatchAllDocsQuery(*:*)",
>    "parsedquery_toString":"*:*",
>    "QParser":"LuceneQParser",
>    "explain":{
>      "123":{
>        "match":true,
>        "value":1.0,
>        "description":"*:*"}}}}
> 
> Thanks.

Reply via email to