fabriziofortino commented on code in PR #2234:
URL: https://github.com/apache/jackrabbit-oak/pull/2234#discussion_r2066475863


##########
oak-query-spi/src/main/java/org/apache/jackrabbit/oak/spi/query/fulltext/InferenceQuery.java:
##########
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.jackrabbit.oak.spi.query.fulltext;
+
+import org.apache.jackrabbit.oak.json.JsonUtils;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class InferenceQuery {
+    private static final Logger LOG = 
LoggerFactory.getLogger(InferenceQuery.class);
+    private static final String DEFAULT_INFERENCE_QUERY_CONFIG_PREFIX = "?";
+    private static final String INFERENCE_QUERY_CONFIG_PREFIX_KEY = 
"org.apache.jackrabbit.oak.search.inference.query.prefix";
+    public static final String INFERENCE_QUERY_CONFIG_PREFIX = 
System.getProperty(
+            INFERENCE_QUERY_CONFIG_PREFIX_KEY, 
DEFAULT_INFERENCE_QUERY_CONFIG_PREFIX);
+
+    private final String queryInferenceConfig;
+    private final String queryText;
+
+    public InferenceQuery(@NotNull String text) {
+        String[] components = parseText(text);
+        this.queryInferenceConfig = components[0];
+        this.queryText = components[1];
+    }
+
+    private String[] parseText(String inputText) {
+        String text = inputText.trim();
+        // Remove the first delimiter
+        if (text.startsWith(INFERENCE_QUERY_CONFIG_PREFIX) && 
text.charAt(INFERENCE_QUERY_CONFIG_PREFIX.length()) == '{') {
+            text = text.substring(INFERENCE_QUERY_CONFIG_PREFIX.length());
+
+            // Try to find the end of the JSON part by parsing incrementally
+            int possibleEndIndex = 0;
+            String jsonPart = null;
+            String queryTextPart = null;
+            int jsonEndDelimiterIndex = -1;
+
+            while (possibleEndIndex < text.length()) {
+                possibleEndIndex = text.indexOf(INFERENCE_QUERY_CONFIG_PREFIX, 
possibleEndIndex + 1);
+                if (possibleEndIndex == -1) {
+                    // If we reach here, it means we couldn't find a valid 
JSON part
+                    jsonPart = "";
+                    queryTextPart = text;
+                    LOG.warn("Query starts with inference prefix {}, but 
without valid json part," +
+                                    " if case this prefix is a valid fulltext 
query prefix, please update system property {} with different prefix value",
+                            INFERENCE_QUERY_CONFIG_PREFIX, 
INFERENCE_QUERY_CONFIG_PREFIX_KEY);
+                    break;
+                }
+                String candidateJson = text.substring(0, possibleEndIndex);
+                // Verify if this is valid JSON using Oak's JsopTokenizer
+                if (JsonUtils.isValidJson(candidateJson, false)) {
+                    jsonPart = candidateJson;
+                    jsonEndDelimiterIndex = possibleEndIndex;
+                    break;
+                } else {
+                    continue;
+                }

Review Comment:
   this block is not needed
   ```suggestion
                   }
   ```



##########
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/InferenceConfig.java:
##########
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elastic.query.inference;
+
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.IndexName;
+import org.apache.jackrabbit.oak.query.QueryEngineSettings;
+import org.apache.jackrabbit.oak.spi.query.QueryLimits;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.function.Function;
+
+import static 
org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValue;
+
+/**
+ * Data model class representing the inference configuration stored under 
/oak:index/:inferenceConfig (default path)
+ */
+public class InferenceConfig {
+    private static final Logger LOG = 
LoggerFactory.getLogger(InferenceConfig.class.getName());
+    private static final ReadWriteLock lock = new ReentrantReadWriteLock(true);
+    private static final InferenceConfig INSTANCE = new InferenceConfig();
+    public static final String TYPE = "inferenceConfig";
+    /**
+     * Semantic search is enabled if this flag is true
+     */
+    private boolean enabled;
+    /**
+     * Map of index names to their respective inference configurations
+     */
+    private Map<String, InferenceIndexConfig> indexConfigs;
+    private NodeStore nodeStore;
+    private String inferenceConfigPath;
+    private String currentInferenceConfig;
+    private volatile String activeInferenceConfig;
+    private boolean isInferenceEnabled;
+
+    public boolean isInferenceEnabled() {
+        return isInferenceEnabled;
+    }
+
+    /**
+     * Loads configuration from the given NodeState
+     *
+     * @return InferenceConfiguration instance

Review Comment:
   `@return` is not allowed here
   ```suggestion
   ```



##########
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/InferenceConfig.java:
##########
@@ -0,0 +1,231 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elastic.query.inference;
+
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.IndexName;
+import org.apache.jackrabbit.oak.query.QueryEngineSettings;
+import org.apache.jackrabbit.oak.spi.query.QueryLimits;
+import org.apache.jackrabbit.oak.spi.state.NodeState;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.jetbrains.annotations.NotNull;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.nio.file.Path;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.UUID;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.function.Function;
+
+import static 
org.apache.jackrabbit.oak.plugins.index.search.util.ConfigUtil.getOptionalValue;
+
+/**
+ * Data model class representing the inference configuration stored under 
/oak:index/:inferenceConfig (default path)
+ */
+public class InferenceConfig {
+    private static final Logger LOG = 
LoggerFactory.getLogger(InferenceConfig.class.getName());
+    private static final ReadWriteLock lock = new ReentrantReadWriteLock(true);
+    private static final InferenceConfig INSTANCE = new InferenceConfig();
+    public static final String TYPE = "inferenceConfig";
+    /**
+     * Semantic search is enabled if this flag is true
+     */
+    private boolean enabled;
+    /**
+     * Map of index names to their respective inference configurations
+     */
+    private Map<String, InferenceIndexConfig> indexConfigs;
+    private NodeStore nodeStore;
+    private String inferenceConfigPath;
+    private String currentInferenceConfig;
+    private volatile String activeInferenceConfig;
+    private boolean isInferenceEnabled;
+
+    public boolean isInferenceEnabled() {
+        return isInferenceEnabled;
+    }
+
+    /**
+     * Loads configuration from the given NodeState
+     *
+     * @return InferenceConfiguration instance
+     */
+
+    private InferenceConfig() {
+        lock.writeLock().lock();
+        try {
+            if (INSTANCE == null) {

Review Comment:
   I don't think `INSTANCE` can ever be true. In general, I see a lot of 
complexity in the initialization/reInit. Can we simplify it?



##########
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java:
##########
@@ -97,9 +107,48 @@ private static ObjectBuilder<TypeMapping> 
loadMappings(@NotNull TypeMapping.Buil
         if (indexDefinition.inferenceDefinition != null) {
             mapInferenceDefinition(builder, 
indexDefinition.inferenceDefinition);
         }
+        // We only add mappings if both the inference config (in 
queryEngineSettings config) and the inference index config are enabled.
+        if (InferenceConfig.getInstance().isInferenceEnabled() && 
InferenceConfig.getInstance().isEnabled()) {
+            mapInferenceConfig(builder, indexDefinition, 
InferenceConfig.getInstance());
+        }
         return builder;
     }
 
+    private static void mapInferenceConfig(TypeMapping.Builder builder, 
@NotNull ElasticIndexDefinition indexDefinition, @NotNull InferenceConfig 
inferenceConfig) {
+        String indexName = PathUtils.getName(indexDefinition.getIndexName());
+
+        InferenceIndexConfig inferenceIndexConfig = 
inferenceConfig.getInferenceIndexConfig(indexName);
+        if (InferenceIndexConfig.NOOP.equals(inferenceIndexConfig)) {
+            return;
+        }
+        try {
+            // We are already validating the enricherConfigJson in the 
InferenceIndexConfig constructor
+            Map<String, Object> enricherConfigJson = 
mapper.readValue(inferenceConfig.getInferenceIndexConfig(indexName).getEnricherConfig(),
+                    new TypeReference<Map<String, Object>>() {
+                    });
+            // Store the enricher configuration in the index metadata so that 
it can be used by the enricher service
+            enricherConfigJson.forEach((k, v) -> {
+                builder.meta(k, JsonData.of(v));
+            });
+        } catch (JsonProcessingException e) {
+            throw new RuntimeException("enricherConfig parsing should never 
fail as it is validated in InferenceIndexConfig" + e.getMessage());
+        }
+
+        builder.properties(InferenceConstants.VECTOR_SPACES, b -> 
b.object(spaces -> {
+            for (var inferenceModelConfig : 
inferenceIndexConfig.getInferenceModelConfigs().entrySet()) {
+                if (inferenceModelConfig.getValue().isEnabled()) {
+                    spaces.properties(inferenceModelConfig.getKey(), v -> 
v.nested(vb -> {
+                        vb.properties("id", p -> p.keyword(k -> k));
+                        vb.properties("vector", p -> p.denseVector(dv -> dv));
+                        vb.properties("metadata", p -> p.object(o -> 
o.enabled(false)));
+                        return vb;
+                    }));
+                }
+            }
+            return spaces;
+        }));
+    }

Review Comment:
   I don't think we should do this. We need a way to abstract the mapping and 
make it configurable.
   One possibility is to have an elastic-specific index definition property 
that contains the actual mapping that will be sent to ES at index creation. The 
same approach can be used for the :enrich field. In that case, we can should 
also specify the default value (eg: PENDING) that will be sent out every time a 
doc gets updated. These changes are actually not related to the inference 
support but are generic. Can we create a separate PR for that?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: oak-dev-unsubscr...@jackrabbit.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to