dsmiley commented on code in PR #2809: URL: https://github.com/apache/solr/pull/2809#discussion_r1845101285
########## settings.gradle: ########## @@ -48,6 +48,7 @@ include "solr:modules:hadoop-auth" include "solr:modules:hdfs" include "solr:modules:jwt-auth" include "solr:modules:langid" +include "solr:modules:llm" Review Comment: Is LLM the best name for this package? Will it expand to other LLM stuff? ########## solr/modules/llm/src/java/org/apache/solr/llm/embedding/SolrEmbeddingModel.java: ########## @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm.embedding; + +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.model.embedding.EmbeddingModel; +import java.lang.reflect.Method; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Map; +import java.util.Objects; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.solr.llm.store.EmbeddingModelException; + +public class SolrEmbeddingModel implements Accountable { + private static final long BASE_RAM_BYTES = + RamUsageEstimator.shallowSizeOfInstance(SolrEmbeddingModel.class); + private static final String TIMEOUT_PARAM = "timeout"; + private static final String MAX_SEGMENTS_PER_BATCH_PARAM = "maxSegmentsPerBatch"; + private static final String MAX_RETRIES_PARAM = "maxRetries"; + + private final String name; + private final Map<String, Object> params; + private final EmbeddingModel embedder; + private final Integer hashCode; + + public static SolrEmbeddingModel getInstance( + String className, String name, Map<String, Object> params) throws EmbeddingModelException { + try { + EmbeddingModel embedder; + Class<?> modelClass = Class.forName(className); + var builder = modelClass.getMethod("builder").invoke(null); + if (params != null) { + for (String paramName : params.keySet()) { + switch (paramName) { + case TIMEOUT_PARAM: + Duration timeOut = Duration.ofSeconds((Long) params.get(paramName)); + builder.getClass().getMethod(paramName, Duration.class).invoke(builder, timeOut); + break; + case MAX_SEGMENTS_PER_BATCH_PARAM: + builder + .getClass() + .getMethod(paramName, Integer.class) + .invoke(builder, ((Long) params.get(paramName)).intValue()); + break; + case MAX_RETRIES_PARAM: + builder + .getClass() + .getMethod(paramName, Integer.class) + .invoke(builder, ((Long) params.get(paramName)).intValue()); + break; + default: + ArrayList<Method> methods = new ArrayList<>(); Review Comment: a comment would be useful explaining ########## solr/modules/llm/src/java/org/apache/solr/llm/store/rest/ManagedEmbeddingModelStore.java: ########## @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm.store.rest; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.llm.embedding.SolrEmbeddingModel; +import org.apache.solr.llm.store.EmbeddingModelException; +import org.apache.solr.llm.store.EmbeddingModelStore; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Managed resource for storing a model */ +public class ManagedEmbeddingModelStore extends ManagedResource + implements ManagedResource.ChildResourceSupport { + + public static void registerManagedEmbeddingModelStore( + SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { + solrResourceLoader + .getManagedResourceRegistry() + .registerManagedResource( + REST_END_POINT, ManagedEmbeddingModelStore.class, managedResourceObserver); + } + + public static ManagedEmbeddingModelStore getManagedModelStore(SolrCore core) { + return (ManagedEmbeddingModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + } + + /** the model store rest endpoint */ + public static final String REST_END_POINT = "/schema/embedding-model-store"; + + /** Managed model store: the name of the attribute containing all the models of a model store */ + private static final String MODELS_JSON_FIELD = "models"; + + /** name of the attribute containing a class */ + static final String CLASS_KEY = "class"; + + /** name of the attribute containing a name */ + static final String NAME_KEY = "name"; + + /** name of the attribute containing parameters */ + static final String PARAMS_KEY = "params"; + + private final EmbeddingModelStore store; + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public ManagedEmbeddingModelStore( + String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + store = new EmbeddingModelStore(); + } + + @Override + protected ManagedResourceStorage createStorage( + ManagedResourceStorage.StorageIO storageIO, SolrResourceLoader loader) throws SolrException { + return new ManagedResourceStorage.JsonStorage(storageIO, loader, -1); + } + + private Object managedData; + + @Override + protected void onManagedDataLoadedFromStorage(NamedList<?> managedInitArgs, Object managedData) + throws SolrException { + store.clear(); + this.managedData = managedData; + } + + public void loadStoredModels() { + log.info("------ managed models ~ loading ------"); + + if ((managedData != null) && (managedData instanceof List)) { + @SuppressWarnings({"unchecked"}) + final List<Map<String, Object>> embeddingModels = (List<Map<String, Object>>) managedData; + for (final Map<String, Object> embeddingModel : embeddingModels) { + addModelFromMap(embeddingModel); + } + } + } + + private void addModelFromMap(Map<String, Object> modelMap) { + try { + final SolrEmbeddingModel embedder = fromEmbeddingModelMap(modelMap); + addModel(embedder); + } catch (final EmbeddingModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + public synchronized void addModel(SolrEmbeddingModel model) throws EmbeddingModelException { Review Comment: instead of synchronized, maybe rely on the store's thread-safety? ########## solr/modules/llm/src/java/org/apache/solr/llm/store/EmbeddingModelStore.java: ########## @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm.store; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.llm.embedding.SolrEmbeddingModel; + +public class EmbeddingModelStore { + + private final Map<String, SolrEmbeddingModel> availableModels; + + public EmbeddingModelStore() { + availableModels = new LinkedHashMap<>(); + } + + public synchronized SolrEmbeddingModel getModel(String name) { Review Comment: use of synchronized seems haphazard in this class; shows an intention to be thread-safe but not actually achieved. It might be sufficient to ensure that the underlying Map is Synchronized and leave it at that. ########## solr/modules/llm/src/java/org/apache/solr/llm/store/rest/ManagedEmbeddingModelStore.java: ########## @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm.store.rest; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.llm.embedding.SolrEmbeddingModel; +import org.apache.solr.llm.store.EmbeddingModelException; +import org.apache.solr.llm.store.EmbeddingModelStore; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Managed resource for storing a model */ +public class ManagedEmbeddingModelStore extends ManagedResource + implements ManagedResource.ChildResourceSupport { + + public static void registerManagedEmbeddingModelStore( + SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { + solrResourceLoader + .getManagedResourceRegistry() + .registerManagedResource( + REST_END_POINT, ManagedEmbeddingModelStore.class, managedResourceObserver); + } + + public static ManagedEmbeddingModelStore getManagedModelStore(SolrCore core) { + return (ManagedEmbeddingModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + } + + /** the model store rest endpoint */ + public static final String REST_END_POINT = "/schema/embedding-model-store"; + + /** Managed model store: the name of the attribute containing all the models of a model store */ + private static final String MODELS_JSON_FIELD = "models"; + + /** name of the attribute containing a class */ + static final String CLASS_KEY = "class"; + + /** name of the attribute containing a name */ + static final String NAME_KEY = "name"; + + /** name of the attribute containing parameters */ + static final String PARAMS_KEY = "params"; + + private final EmbeddingModelStore store; + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public ManagedEmbeddingModelStore( + String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + store = new EmbeddingModelStore(); + } + + @Override + protected ManagedResourceStorage createStorage( + ManagedResourceStorage.StorageIO storageIO, SolrResourceLoader loader) throws SolrException { + return new ManagedResourceStorage.JsonStorage(storageIO, loader, -1); + } + + private Object managedData; + + @Override + protected void onManagedDataLoadedFromStorage(NamedList<?> managedInitArgs, Object managedData) + throws SolrException { + store.clear(); + this.managedData = managedData; + } + + public void loadStoredModels() { + log.info("------ managed models ~ loading ------"); + + if ((managedData != null) && (managedData instanceof List)) { + @SuppressWarnings({"unchecked"}) + final List<Map<String, Object>> embeddingModels = (List<Map<String, Object>>) managedData; + for (final Map<String, Object> embeddingModel : embeddingModels) { + addModelFromMap(embeddingModel); + } + } + } + + private void addModelFromMap(Map<String, Object> modelMap) { + try { + final SolrEmbeddingModel embedder = fromEmbeddingModelMap(modelMap); + addModel(embedder); + } catch (final EmbeddingModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + public synchronized void addModel(SolrEmbeddingModel model) throws EmbeddingModelException { + try { + if (log.isInfoEnabled()) { + log.info("adding model {}", model.getName()); + } + store.addModel(model); + } catch (final EmbeddingModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + if (updates instanceof List) { + final List<Map<String, Object>> embeddingModels = (List<Map<String, Object>>) updates; + for (final Map<String, Object> embeddingModel : embeddingModels) { + addModelFromMap(embeddingModel); + } + } + + if (updates instanceof Map) { + final Map<String, Object> map = (Map<String, Object>) updates; + addModelFromMap(map); + } + + return modelsAsManagedResources(store.getModels()); + } + + @Override + public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) { + store.delete(childId); + storeManagedData(applyUpdatesToManagedData(null)); + } + + /** + * Called to retrieve a named part (the given childId) of the resource at the given endpoint. + * Note: since we have a unique child managed store we ignore the childId. + */ + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + + final SolrQueryResponse response = endpoint.getSolrResponse(); + response.add(MODELS_JSON_FIELD, modelsAsManagedResources(store.getModels())); + } + + public SolrEmbeddingModel getModel(String modelName) { + return store.getModel(modelName); + } + + @Override + public String toString() { + return "ManagedModelStore [store=" + store + "]"; + } + + /** + * Returns the available models as a list of Maps objects. After an update the managed resources + * needs to return the resources in this format in order to store in json somewhere (zookeeper, + * disk...) + * + * @return the available models as a list of Maps objects + */ + private static List<Object> modelsAsManagedResources(List<SolrEmbeddingModel> models) { + final List<Object> list = new ArrayList<>(models.size()); + for (final SolrEmbeddingModel model : models) { + list.add(toEmbeddingModelMap(model)); + } + return list; + } + + @SuppressWarnings("unchecked") + public static SolrEmbeddingModel fromEmbeddingModelMap(Map<String, Object> embeddingModel) { + final SolrEmbeddingModel embedder = Review Comment: no need for this var (and for other methods similarly); just return it directly ########## solr/modules/llm/src/java/org/apache/solr/llm/search/TextEmbedderQParserPlugin.java: ########## @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm.search; + +import java.io.IOException; +import org.apache.lucene.index.VectorEncoding; +import org.apache.lucene.search.KnnFloatVectorQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.ResourceLoader; +import org.apache.lucene.util.ResourceLoaderAware; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.llm.embedding.SolrEmbeddingModel; +import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.schema.DenseVectorField; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.search.neural.KnnQParser; + +/** + * A neural query parser that embed the query and then run K-nearest neighbors search on Dense Review Comment: What does "embed the query" mean? ########## solr/modules/llm/src/java/org/apache/solr/llm/store/rest/ManagedEmbeddingModelStore.java: ########## @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm.store.rest; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.llm.embedding.SolrEmbeddingModel; +import org.apache.solr.llm.store.EmbeddingModelException; +import org.apache.solr.llm.store.EmbeddingModelStore; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Managed resource for storing a model */ +public class ManagedEmbeddingModelStore extends ManagedResource + implements ManagedResource.ChildResourceSupport { + + public static void registerManagedEmbeddingModelStore( + SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { + solrResourceLoader + .getManagedResourceRegistry() + .registerManagedResource( + REST_END_POINT, ManagedEmbeddingModelStore.class, managedResourceObserver); + } + + public static ManagedEmbeddingModelStore getManagedModelStore(SolrCore core) { + return (ManagedEmbeddingModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + } + + /** the model store rest endpoint */ + public static final String REST_END_POINT = "/schema/embedding-model-store"; + + /** Managed model store: the name of the attribute containing all the models of a model store */ + private static final String MODELS_JSON_FIELD = "models"; + + /** name of the attribute containing a class */ + static final String CLASS_KEY = "class"; + + /** name of the attribute containing a name */ + static final String NAME_KEY = "name"; + + /** name of the attribute containing parameters */ + static final String PARAMS_KEY = "params"; + + private final EmbeddingModelStore store; + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public ManagedEmbeddingModelStore( + String resourceId, SolrResourceLoader loader, ManagedResourceStorage.StorageIO storageIO) + throws SolrException { + super(resourceId, loader, storageIO); + store = new EmbeddingModelStore(); + } + + @Override + protected ManagedResourceStorage createStorage( + ManagedResourceStorage.StorageIO storageIO, SolrResourceLoader loader) throws SolrException { + return new ManagedResourceStorage.JsonStorage(storageIO, loader, -1); + } + + private Object managedData; + + @Override + protected void onManagedDataLoadedFromStorage(NamedList<?> managedInitArgs, Object managedData) + throws SolrException { + store.clear(); + this.managedData = managedData; + } + + public void loadStoredModels() { + log.info("------ managed models ~ loading ------"); + + if ((managedData != null) && (managedData instanceof List)) { + @SuppressWarnings({"unchecked"}) + final List<Map<String, Object>> embeddingModels = (List<Map<String, Object>>) managedData; + for (final Map<String, Object> embeddingModel : embeddingModels) { + addModelFromMap(embeddingModel); + } + } + } + + private void addModelFromMap(Map<String, Object> modelMap) { + try { + final SolrEmbeddingModel embedder = fromEmbeddingModelMap(modelMap); + addModel(embedder); + } catch (final EmbeddingModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + public synchronized void addModel(SolrEmbeddingModel model) throws EmbeddingModelException { + try { + if (log.isInfoEnabled()) { + log.info("adding model {}", model.getName()); + } + store.addModel(model); + } catch (final EmbeddingModelException e) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); + } + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + if (updates instanceof List) { + final List<Map<String, Object>> embeddingModels = (List<Map<String, Object>>) updates; + for (final Map<String, Object> embeddingModel : embeddingModels) { + addModelFromMap(embeddingModel); + } + } + + if (updates instanceof Map) { + final Map<String, Object> map = (Map<String, Object>) updates; + addModelFromMap(map); + } + + return modelsAsManagedResources(store.getModels()); + } + + @Override + public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) { + store.delete(childId); + storeManagedData(applyUpdatesToManagedData(null)); + } + + /** + * Called to retrieve a named part (the given childId) of the resource at the given endpoint. + * Note: since we have a unique child managed store we ignore the childId. + */ + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + + final SolrQueryResponse response = endpoint.getSolrResponse(); + response.add(MODELS_JSON_FIELD, modelsAsManagedResources(store.getModels())); + } + + public SolrEmbeddingModel getModel(String modelName) { + return store.getModel(modelName); + } + + @Override + public String toString() { + return "ManagedModelStore [store=" + store + "]"; + } + + /** + * Returns the available models as a list of Maps objects. After an update the managed resources + * needs to return the resources in this format in order to store in json somewhere (zookeeper, + * disk...) + * + * @return the available models as a list of Maps objects + */ + private static List<Object> modelsAsManagedResources(List<SolrEmbeddingModel> models) { + final List<Object> list = new ArrayList<>(models.size()); + for (final SolrEmbeddingModel model : models) { + list.add(toEmbeddingModelMap(model)); + } + return list; Review Comment: is more elegant with Java Stream ########## solr/modules/llm/src/java/org/apache/solr/llm/embedding/SolrEmbeddingModel.java: ########## @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm.embedding; + +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.model.embedding.EmbeddingModel; +import java.lang.reflect.Method; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Map; +import java.util.Objects; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.solr.llm.store.EmbeddingModelException; + +public class SolrEmbeddingModel implements Accountable { + private static final long BASE_RAM_BYTES = + RamUsageEstimator.shallowSizeOfInstance(SolrEmbeddingModel.class); + private static final String TIMEOUT_PARAM = "timeout"; + private static final String MAX_SEGMENTS_PER_BATCH_PARAM = "maxSegmentsPerBatch"; + private static final String MAX_RETRIES_PARAM = "maxRetries"; + + private final String name; + private final Map<String, Object> params; + private final EmbeddingModel embedder; + private final Integer hashCode; + + public static SolrEmbeddingModel getInstance( + String className, String name, Map<String, Object> params) throws EmbeddingModelException { + try { + EmbeddingModel embedder; + Class<?> modelClass = Class.forName(className); Review Comment: In Solr, we load classes with SolrResourceLoader. We ought to make Class.forName forbidden; I'll pursue that separately.. ########## solr/modules/llm/src/java/org/apache/solr/llm/store/rest/ManagedEmbeddingModelStore.java: ########## @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm.store.rest; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.llm.embedding.SolrEmbeddingModel; +import org.apache.solr.llm.store.EmbeddingModelException; +import org.apache.solr.llm.store.EmbeddingModelStore; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** Managed resource for storing a model */ +public class ManagedEmbeddingModelStore extends ManagedResource + implements ManagedResource.ChildResourceSupport { + + public static void registerManagedEmbeddingModelStore( + SolrResourceLoader solrResourceLoader, ManagedResourceObserver managedResourceObserver) { + solrResourceLoader + .getManagedResourceRegistry() + .registerManagedResource( + REST_END_POINT, ManagedEmbeddingModelStore.class, managedResourceObserver); + } + + public static ManagedEmbeddingModelStore getManagedModelStore(SolrCore core) { + return (ManagedEmbeddingModelStore) core.getRestManager().getManagedResource(REST_END_POINT); + } + + /** the model store rest endpoint */ + public static final String REST_END_POINT = "/schema/embedding-model-store"; + + /** Managed model store: the name of the attribute containing all the models of a model store */ + private static final String MODELS_JSON_FIELD = "models"; + + /** name of the attribute containing a class */ + static final String CLASS_KEY = "class"; + + /** name of the attribute containing a name */ + static final String NAME_KEY = "name"; + + /** name of the attribute containing parameters */ + static final String PARAMS_KEY = "params"; + + private final EmbeddingModelStore store; + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); Review Comment: please keep static fields together above non-static fields ########## solr/modules/llm/src/test/org/apache/solr/llm/TestLlmBase.java: ########## @@ -0,0 +1,297 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.llm; + +import java.lang.invoke.MethodHandles; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import org.apache.commons.io.file.PathUtils; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.Utils; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.llm.embedding.SolrEmbeddingModel; +import org.apache.solr.llm.store.EmbeddingModelException; +import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore; +import org.apache.solr.util.RestTestBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class TestLlmBase extends RestTestBase { Review Comment: It's used a ton so I sympathize with continuing to use it until there's renewed energy in more test renovation. Even then, it would likely stay. It's not deprecated even though its parent class is. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org