Hi, All you need to know is in the docs https://solr.apache.org/guide/solr/latest/indexing-guide/indexing-with-tika.html In particular * Your <lib> config references "contribs" folder which is gone in 9.x, see docs * Enable the extraction module with e.g. SOLR_MODULES=extraction We encourage you to take the step to 9.7.0, since earlier versions such as 9.3 will not get patched. You can also compare your configuration with the configuration samples in the latest release to discover other things you might need to change. A good tip is to study the upgrade notes before each version upgrade, they do contain much vital information! Jan
|
<?xml version="1.0" encoding="UTF-8" ?> <config> <luceneMatchVersion>8.2.0</luceneMatchVersion> <lib dir="${solr.install.dir:../../../..}/contrib/extraction/lib" regex=".*\.jar" /> <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-cell-\d.*\.jar" />
<lib dir="${solr.install.dir:../../../..}/contrib/clustering/lib/" regex=".*\.jar" /> <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-clustering-\d.*\.jar" /> <lib dir="${solr.install.dir:../../../..}/contrib/langid/lib/" regex=".*\.jar" /> <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-langid-\d.*\.jar" /> <lib dir="${solr.install.dir:../../../..}/contrib/velocity/lib" regex=".*\.jar" /> <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-velocity-\d.*\.jar" /> <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-ltr-\d.*\.jar" /> <dataDir>${solr.data.dir:}</dataDir> <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> <codecFactory class="solr.SchemaCodecFactory"/> <indexConfig> <lockType>${solr.lock.type:native}</lockType> </indexConfig> <jmx /> <updateHandler class="solr.DirectUpdateHandler2"> <updateLog> <str name="dir">${solr.ulog.dir:}</str> <int name="numVersionBuckets">${solr.ulog.numVersionBuckets:65536}</int> </updateLog> <autoCommit> <maxTime>${solr.autoCommit.maxTime:15000}</maxTime> <openSearcher>false</openSearcher> </autoCommit> <autoSoftCommit> <maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime> </autoSoftCommit> </updateHandler> <query> <maxBooleanClauses>${solr.max.booleanClauses:1024}</maxBooleanClauses> <filterCache class="solr.FastLRUCache" size="512" initialSize="512" autowarmCount="0"/> <queryResultCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/> <documentCache class="solr.LRUCache" size="512" initialSize="512" autowarmCount="0"/> <cache name="perSegFilter" class="solr.search.LRUCache" size="10" initialSize="0" autowarmCount="10" regenerator="solr.NoOpRegenerator" /> <enableLazyFieldLoading>true</enableLazyFieldLoading> <queryResultWindowSize>20</queryResultWindowSize> <queryResultMaxDocsCached>200</queryResultMaxDocsCached> <listener event="newSearcher" class="solr.QuerySenderListener"> <arr name="queries"> </arr> </listener> <listener event="firstSearcher" class="solr.QuerySenderListener"> <arr name="queries"> </arr> </listener> <useColdSearcher>false</useColdSearcher> </query> <requestDispatcher> <httpCaching never304="true" /> </requestDispatcher> <requestHandler name="/select" class="solr.SearchHandler"> <lst name="defaults"> <str name="echoParams">explicit</str> <int name="rows">10</int> </lst> </requestHandler> <requestHandler name="/query" class="solr.SearchHandler"> <lst name="defaults"> <str name="echoParams">explicit</str> <str name="wt">json</str> <str name="indent">true</str> </lst> </requestHandler> <requestHandler name="/browse" class="solr.SearchHandler" useParams="query,facets,velocity,browse"> <lst name="defaults"> <str name="echoParams">explicit</str> </lst> </requestHandler> <initParams path="/update/**,/query,/select,/tvrh,/elevate,/spell,/browse"> <lst name="defaults"> <str name="df">_text_</str> </lst> </initParams> <requestHandler name="/update/extract" startup="lazy" class="solr.extraction.ExtractingRequestHandler" > <lst name="defaults"> <str name="lowernames">true</str> <str name="fmap.content">_text_</str> </lst> </requestHandler> <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> <str name="queryAnalyzerFieldType">text_general</str> <lst name="spellchecker"> <str name="name">default</str> <str name="field">_text_</str> <str name="classname">solr.DirectSolrSpellChecker</str> <str name="distanceMeasure">internal</str> <float name="accuracy">0.5</float> <int name="maxEdits">2</int> <int name="minPrefix">1</int> <int name="maxInspections">5</int> <int name="minQueryLength">4</int> <float name="maxQueryFrequency">0.01</float> </lst> </searchComponent> <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> <lst name="defaults"> <str name="spellcheck.dictionary">default</str> <str name="spellcheck">on</str> <str name="spellcheck.extendedResults">true</str> <str name="spellcheck.count">10</str> <str name="spellcheck.alternativeTermCount">5</str> <str name="spellcheck.maxResultsForSuggest">5</str> <str name="spellcheck.collate">true</str> <str name="spellcheck.collateExtendedResults">true</str> <str name="spellcheck.maxCollationTries">10</str> <str name="spellcheck.maxCollations">5</str> </lst> <arr name="last-components"> <str>spellcheck</str> </arr> </requestHandler> <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> <requestHandler name="/tvrh" class="solr.SearchHandler" startup="lazy"> <lst name="defaults"> <bool name="tv">true</bool> </lst> <arr name="last-components"> <str>tvComponent</str> </arr> </requestHandler> <searchComponent name="terms" class="solr.TermsComponent"/> <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> <lst name="defaults"> <bool name="terms">true</bool> <bool name="distrib">false</bool> </lst> <arr name="components"> <str>terms</str> </arr> </requestHandler> <searchComponent name="elevator" class="solr.QueryElevationComponent" > <str name="queryFieldType">string</str> </searchComponent> <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy"> <lst name="defaults"> <str name="echoParams">explicit</str> </lst> <arr name="last-components"> <str>elevator</str> </arr> </requestHandler> <searchComponent class="solr.HighlightComponent" name="highlight"> <highlighting> <fragmenter name="gap" default="true" class="solr.highlight.GapFragmenter"> <lst name="defaults"> <int name="hl.fragsize">100</int> </lst> </fragmenter> <fragmenter name="regex" class="solr.highlight.RegexFragmenter"> <lst name="defaults"> <int name="hl.fragsize">70</int> <float name="hl.regex.slop">0.5</float> <str name="hl.regex.pattern">[-\w ,/\n\"']{20,200}</str> </lst> </fragmenter> <formatter name="html" default="true" class="solr.highlight.HtmlFormatter"> <lst name="defaults"> <str name="hl.simple.pre"><![CDATA[<em>]]></str> <str name="hl.simple.post"><![CDATA[</em>]]></str> </lst> </formatter> <encoder name="html" class="solr.highlight.HtmlEncoder" /> <fragListBuilder name="simple" class="solr.highlight.SimpleFragListBuilder"/> <fragListBuilder name="single" class="solr.highlight.SingleFragListBuilder"/> <fragListBuilder name="weighted" default="true" class="solr.highlight.WeightedFragListBuilder"/> <fragmentsBuilder name="default" default="true" class="solr.highlight.ScoreOrderFragmentsBuilder"> </fragmentsBuilder> <fragmentsBuilder name="colored" class="solr.highlight.ScoreOrderFragmentsBuilder"> <lst name="defaults"> <str name="hl.tag.pre"><![CDATA[ <b style="background:yellow">,<b style="background:lawgreen">, <b style="background:aquamarine">,<b style="background:magenta">, <b style="background:palegreen">,<b style="background:coral">, <b style="background:wheat">,<b style="background:khaki">, <b style="background:lime">,<b style="background:deepskyblue">]]></str> <str name="hl.tag.post"><![CDATA[</b>]]></str> </lst> </fragmentsBuilder> <boundaryScanner name="default" default="true" class="solr.highlight.SimpleBoundaryScanner"> <lst name="defaults"> <str name="hl.bs.maxScan">10</str> <str name="hl.bs.chars">.,!? 	 </str> </lst> </boundaryScanner> <boundaryScanner name="breakIterator" class="solr.highlight.BreakIteratorBoundaryScanner"> <lst name="defaults"> <str name="hl.bs.type">WORD</str> <str name="hl.bs.language">en</str> <str name="hl.bs.country">US</str> </lst> </boundaryScanner> </highlighting> </searchComponent> <updateProcessor class="solr.UUIDUpdateProcessorFactory" name="uuid"/> <updateProcessor class="solr.RemoveBlankFieldUpdateProcessorFactory" name="remove-blank"/> <updateProcessor class="solr.FieldNameMutatingUpdateProcessorFactory" name="field-name-mutating"> <str name="pattern">[^\w-\.]</str> <str name="replacement">_</str> </updateProcessor> <updateProcessor class="solr.ParseBooleanFieldUpdateProcessorFactory" name="parse-boolean"/> <updateProcessor class="solr.ParseLongFieldUpdateProcessorFactory" name="parse-long"/> <updateProcessor class="solr.ParseDoubleFieldUpdateProcessorFactory" name="parse-double"/> <updateProcessor class="solr.ParseDateFieldUpdateProcessorFactory" name="parse-date"> <arr name="format"> <str>yyyy-MM-dd['T'[HH:mm[:ss[.SSS]][z</str> <str>yyyy-MM-dd['T'[HH:mm[:ss[,SSS]][z</str> <str>yyyy-MM-dd HH:mm[:ss[.SSS]][z</str> <str>yyyy-MM-dd HH:mm[:ss[,SSS]][z</str> <str>[EEE, ]dd MMM yyyy HH:mm[:ss] z</str> <str>EEEE, dd-MMM-yy HH:mm:ss z</str> <str>EEE MMM ppd HH:mm:ss [z ]yyyy</str> </arr> </updateProcessor> <updateProcessor class="solr.AddSchemaFieldsUpdateProcessorFactory" name="add-schema-fields"> <lst name="typeMapping"> <str name="valueClass">java.lang.String</str> <str name="fieldType">text_general</str> <lst name="copyField"> <str name="dest">*_str</str> <int name="maxChars">256</int> </lst> <!-- Use as default mapping instead of defaultFieldType --> <bool name="default">true</bool> </lst> <lst name="typeMapping"> <str name="valueClass">java.lang.Boolean</str> <str name="fieldType">booleans</str> </lst> <lst name="typeMapping"> <str name="valueClass">java.util.Date</str> <str name="fieldType">pdates</str> </lst> <lst name="typeMapping"> <str name="valueClass">java.lang.Long</str> <str name="valueClass">java.lang.Integer</str> <str name="fieldType">plongs</str> </lst> <lst name="typeMapping"> <str name="valueClass">java.lang.Number</str> <str name="fieldType">pdoubles</str> </lst> </updateProcessor> <updateRequestProcessorChain name="add-unknown-fields-to-the-schema" default="${update.autoCreateFields:true}" processor="uuid,remove-blank,field-name-mutating,parse-boolean,parse-long,parse-double,parse-date,add-schema-fields"> <processor class="solr.LogUpdateProcessorFactory"/> <processor class="solr.DistributedUpdateProcessorFactory"/> <processor class="solr.RunUpdateProcessorFactory"/> </updateRequestProcessorChain> <queryResponseWriter name="json" class="solr.JSONResponseWriter"> <str name="content-type">text/plain; charset=UTF-8</str> </queryResponseWriter> <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"> <str name="template.base.dir">${velocity.template.base.dir:}</str> <str name="solr.resource.loader.enabled">${velocity.solr.resource.loader.enabled:true}</str> <str name="params.resource.loader.enabled">${velocity.params.resource.loader.enabled:false}</str> </queryResponseWriter> </config>