epugh commented on code in PR #2809:
URL: https://github.com/apache/solr/pull/2809#discussion_r1822451338


##########
solr/modules/llm/src/test/org/apache/solr/llm/TestLlmBase.java:
##########
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.llm;
+
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.llm.embedding.SolrEmbeddingModel;
+import org.apache.solr.llm.store.EmbeddingModelException;
+import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore;
+import org.apache.solr.util.RestTestBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestLlmBase extends RestTestBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  protected static final SolrResourceLoader solrResourceLoader =
+      new SolrResourceLoader(Path.of("").toAbsolutePath());
+
+  protected static Path tmpSolrHome;
+  protected static Path tmpConfDir;
+
+  public static final String MODEL_FILE_NAME = 
"_schema_embedding-model-store.json";
+  protected static final String COLLECTION = "collection1";
+  protected static final String CONF_DIR = COLLECTION + "/conf";
+
+  protected static Path embeddingModelStoreFile = null;
+
+  protected static String IDField = "id";
+  protected static String stringField = "string_field";
+  protected static String vectorField = "vector";
+  protected static String vectorField2 = "vector2";
+  protected static String vectorFieldByteEncoding = "vector_byte_encoding";
+
+  protected static void setuptest(boolean bulkIndex) throws Exception {
+    setuptest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  protected static void setupPersistenttest(boolean bulkIndex) throws 
Exception {
+    setupPersistentTest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  public static ManagedEmbeddingModelStore getManagedModelStore() {
+    try (SolrCore core = 
solrClientTestRule.getCoreContainer().getCore(DEFAULT_TEST_CORENAME)) {
+      return ManagedEmbeddingModelStore.getManagedModelStore(core);
+    }
+  }
+
+  protected static void setupTestInit(String solrconfig, String schema, 
boolean isPersistent)
+      throws Exception {
+    tmpSolrHome = createTempDir();
+    tmpConfDir = tmpSolrHome.resolve(CONF_DIR);
+    tmpConfDir.toFile().deleteOnExit();
+    PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath());
+
+    final Path mstore = tmpConfDir.resolve(MODEL_FILE_NAME);
+
+    if (isPersistent) {
+      embeddingModelStoreFile = mstore;
+    }
+
+    if (Files.exists(mstore)) {
+      if (log.isInfoEnabled()) {
+        log.info("remove model store config file in {}", 
mstore.toAbsolutePath());
+      }
+      Files.delete(mstore);
+    }
+    if (!solrconfig.equals("solrconfig-llm.xml")) {
+      Files.copy(
+          tmpSolrHome.resolve(CONF_DIR).resolve(solrconfig),
+          tmpSolrHome.resolve(CONF_DIR).resolve("solrconfig-llm.xml"));
+    }
+    if (!schema.equals("schema.xml")) {
+      Files.copy(
+          tmpSolrHome.resolve(CONF_DIR).resolve(schema),
+          tmpSolrHome.resolve(CONF_DIR).resolve("schema.xml"));
+    }
+
+    System.setProperty("managed.schema.mutable", "true");
+  }
+
+  public static void setuptest(String solrconfig, String schema) throws 
Exception {

Review Comment:
   setupTest?



##########
solr/modules/llm/src/test-files/solr/collection1/conf/stopwords.txt:
##########


Review Comment:
   do we really want to model "hey, use stopwords"...   Is this really part of 
the e tests?



##########
solr/modules/llm/src/test-files/solr/collection1/conf/synonyms.txt:
##########


Review Comment:
   is this really key?  honestly, part of my excitement around vectors and llms 
is to never use synonyms again!   I suspect this is just copy-pasta!



##########
solr/solr-ref-guide/modules/query-guide/pages/embedding-text.adoc:
##########
@@ -0,0 +1,280 @@
+= Embedding Text
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+With the *Large Language Model* (or *LLM* for short) module you can interact 
with Large Language Models in Solr to encode text to vectors at indexing and 
query time.
+
+
+== Text Embedding Concepts
+
+=== From Text to Vector
+
+The task of sentence similarity aims to encode text to vector in a way that 
sentences semantically similar are encoded to vectors close in a vector space 
(using a vector distance metric).
+
+
+=== Large Language Models 
+
+Large Language Models can be fine-tuned for such task.
+The resulting model is able to encode text to a numerical vector.
+
+For additional information you can refer to this 
https://sease.io/2021/12/using-bert-to-improve-search-relevance.html[blog post].
+
+==== Embedding Services
+
+Training, fine-tuning and operating such Large Language Models is expensive.
+
+Many companies focus on this aspect and let users access APIs to encode the 
text (at the price of a license fee).
+
+Apache Solr uses LangChain4j (add link here) to connect to such apis.
+
+[IMPORTANT]
+====
+At the moment a subset of the embedding models supported by LangChain4j is 
supported by Solr.
+
+*Disclaimer*: Apache Solr is *in no way* affiliated to any of these 
corporations or services.
+
+If you want to add support for additional services or improve the support for 
the existing ones, feel free to contribute:
+
+* https://github.com/apache/solr/blob/main/CONTRIBUTING.md[Contributing to 
Solr]
+====
+
+== Module
+
+This is provided via the `llm` xref:configuration-guide:solr-modules.adoc[Solr 
Module] that needs to be enabled before use.
+
+At the moment the only supported way to interact with Large Language Models is 
via embedding text.
+
+In the future additional components to empower Solr with LLM will be added.
+
+== Installation of LLM
+
+The llm module requires the `modules/ltr/lib/solr-llm-*.jar` JARs.
+
+== LLM Configuration
+
+Large-Language-Model is a module and therefore its plugins must be configured 
in `solrconfig.xml`.
+
+=== Minimum Requirements
+
+* Include the required module JARs.
+Note that by default paths are relative to the Solr core, so they may need 
adjustments to your configuration, or an explicit specification of the 
`$solr.install.dir`.
++
+[source,xml]
+----
+<lib dir="${solr.install.dir:../../../..}/modules/llm/lib/" regex=".*\.jar" />
+----
+
+* Declaration of the `embed` query parser.
++
+[source,xml]
+----
+<queryParser name="embed" 
class="org.apache.solr.llm.search.TextEmbedderQParserPlugin"/>
+----
+
+== Text Embedding Lifecycle
+
+
+=== Models
+
+* A model encodes text to a vector.
+* A model in Solr is a reference to an external API that runs the Large 
Language Model responsible for text embedding.
+
+*N.B.* the Solr embedding model specifies the parameters to access the APIs, 
the model doesn't run internally in Solr

Review Comment:
   for now!   Wait till I get my way ;-)...    SLM (Small Language Models) are 
coming!



##########
solr/modules/llm/src/test/org/apache/solr/llm/TestLlmBase.java:
##########
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.llm;
+
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.llm.embedding.SolrEmbeddingModel;
+import org.apache.solr.llm.store.EmbeddingModelException;
+import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore;
+import org.apache.solr.util.RestTestBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestLlmBase extends RestTestBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  protected static final SolrResourceLoader solrResourceLoader =
+      new SolrResourceLoader(Path.of("").toAbsolutePath());
+
+  protected static Path tmpSolrHome;
+  protected static Path tmpConfDir;
+
+  public static final String MODEL_FILE_NAME = 
"_schema_embedding-model-store.json";
+  protected static final String COLLECTION = "collection1";
+  protected static final String CONF_DIR = COLLECTION + "/conf";
+
+  protected static Path embeddingModelStoreFile = null;
+
+  protected static String IDField = "id";
+  protected static String stringField = "string_field";
+  protected static String vectorField = "vector";
+  protected static String vectorField2 = "vector2";
+  protected static String vectorFieldByteEncoding = "vector_byte_encoding";
+
+  protected static void setuptest(boolean bulkIndex) throws Exception {
+    setuptest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  protected static void setupPersistenttest(boolean bulkIndex) throws 
Exception {
+    setupPersistentTest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  public static ManagedEmbeddingModelStore getManagedModelStore() {
+    try (SolrCore core = 
solrClientTestRule.getCoreContainer().getCore(DEFAULT_TEST_CORENAME)) {
+      return ManagedEmbeddingModelStore.getManagedModelStore(core);
+    }
+  }
+
+  protected static void setupTestInit(String solrconfig, String schema, 
boolean isPersistent)
+      throws Exception {
+    tmpSolrHome = createTempDir();
+    tmpConfDir = tmpSolrHome.resolve(CONF_DIR);
+    tmpConfDir.toFile().deleteOnExit();
+    PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath());
+
+    final Path mstore = tmpConfDir.resolve(MODEL_FILE_NAME);
+
+    if (isPersistent) {
+      embeddingModelStoreFile = mstore;
+    }
+
+    if (Files.exists(mstore)) {
+      if (log.isInfoEnabled()) {
+        log.info("remove model store config file in {}", 
mstore.toAbsolutePath());
+      }
+      Files.delete(mstore);
+    }
+    if (!solrconfig.equals("solrconfig-llm.xml")) {
+      Files.copy(
+          tmpSolrHome.resolve(CONF_DIR).resolve(solrconfig),
+          tmpSolrHome.resolve(CONF_DIR).resolve("solrconfig-llm.xml"));
+    }
+    if (!schema.equals("schema.xml")) {
+      Files.copy(
+          tmpSolrHome.resolve(CONF_DIR).resolve(schema),
+          tmpSolrHome.resolve(CONF_DIR).resolve("schema.xml"));
+    }
+
+    System.setProperty("managed.schema.mutable", "true");
+  }
+
+  public static void setuptest(String solrconfig, String schema) throws 
Exception {
+
+    setupTestInit(solrconfig, schema, false);
+    System.setProperty("enable.update.log", "false");
+
+    createJettyAndHarness(
+        tmpSolrHome.toAbsolutePath().toString(), solrconfig, schema, "/solr", 
true, null);
+  }
+
+  public static void setupPersistentTest(String solrconfig, String schema) 
throws Exception {

Review Comment:
   i would love some javadocs...  why do we need a setupPersistentTest?   Some 
clue on whats going on here...



##########
solr/modules/llm/src/test/org/apache/solr/llm/store/rest/TestModelManager.java:
##########
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.llm.store.rest;
+
+import dev.langchain4j.model.cohere.CohereEmbeddingModel;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.llm.TestLlmBase;
+import org.apache.solr.llm.search.TextEmbedderQParserPlugin;
+import org.apache.solr.rest.ManagedResource;
+import org.apache.solr.rest.ManagedResourceStorage;
+import org.apache.solr.rest.RestManager;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestModelManager extends TestLlmBase {
+
+  @BeforeClass
+  public static void init() throws Exception {
+    setuptest(false);
+  }
+
+  @Test
+  public void test() throws Exception {
+    final SolrResourceLoader loader = new SolrResourceLoader(tmpSolrHome);
+
+    final RestManager.Registry registry = loader.getManagedResourceRegistry();
+    assertNotNull(
+        "Expected a non-null RestManager.Registry from the 
SolrResourceLoader!", registry);
+
+    final String resourceId = "/schema/mstore1";
+    registry.registerManagedResource(
+        resourceId, ManagedEmbeddingModelStore.class, new 
TextEmbedderQParserPlugin());
+
+    final NamedList<String> initArgs = new NamedList<>();
+
+    final RestManager restManager = new RestManager();
+    restManager.init(loader, initArgs, new 
ManagedResourceStorage.InMemoryStorageIO());
+
+    final ManagedResource res = restManager.getManagedResource(resourceId);
+    assertTrue(res instanceof ManagedEmbeddingModelStore);
+    assertEquals(res.getResourceId(), resourceId);
+  }
+
+  @Test
+  public void testRestManagerEndpoints() throws Exception {
+    assertJQ("/schema/managed", "/responseHeader/status==0");
+
+    final String cohereModelClassName = CohereEmbeddingModel.class.getName();
+
+    // Add models
+    String model = "{ \"name\":\"testModel1\", \"class\":\"" + 
cohereModelClassName + "\"}";
+    // fails since it does not have params
+    assertJPut(ManagedEmbeddingModelStore.REST_END_POINT, model, 
"/responseHeader/status==400");
+    // success
+    model =
+        "{ name:\"testModel2\", class:\""
+            + cohereModelClassName
+            + "\","
+            + "params:{"
+            + "baseUrl:\"https://api.cohere.ai/v1/\",";
+            + "apiKey:\"cohereApiKey2\","
+            + "modelName:\"embed-english-light-v3.0\","
+            + "inputType:\"search_document\","
+            + "logRequests:true,"
+            + "logResponses:false"
+            + "}}";
+    assertJPut(ManagedEmbeddingModelStore.REST_END_POINT, model, 
"/responseHeader/status==0");
+    // success
+    final String multipleModels =
+        "[{ name:\"testModel3\", class:\""

Review Comment:
   here we do string building for json, which I *think* is our dominant 
pattern...   personally not having all the `.appends` makes it more readable, 
even with the escaping around the double quotes!



##########
solr/solr-ref-guide/modules/query-guide/pages/dense-vector-search.adoc:
##########
@@ -242,9 +242,9 @@ client.add(Arrays.asList(d1, d2));
 
 == Query Time
 
-Apache Solr provides two query parsers that work with dense vector fields, 
that each support different ways of matching documents based on vector 
similarity: The `knn` query parser, and the `vectorSimilarity` query parser.
+Apache Solr provides three query parsers that work with dense vector fields, 
that each support different ways of matching documents based on vector 
similarity: The `knn` query parser, the `vectorSimilarity` query parser and the 
`embed` query parser.

Review Comment:
   We should be thinking about adding some call out text of "Here is when you 
use knn, here is when you use vectorSimilarity, and here is when you use 
embed".    I'd personally like to see our ref guide move beyond being just a 
reference to look things up and also explain more the "why".



##########
solr/modules/llm/src/test/org/apache/solr/llm/TestLlmBase.java:
##########
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.llm;
+
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.llm.embedding.SolrEmbeddingModel;
+import org.apache.solr.llm.store.EmbeddingModelException;
+import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore;
+import org.apache.solr.util.RestTestBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestLlmBase extends RestTestBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  protected static final SolrResourceLoader solrResourceLoader =
+      new SolrResourceLoader(Path.of("").toAbsolutePath());
+
+  protected static Path tmpSolrHome;
+  protected static Path tmpConfDir;
+
+  public static final String MODEL_FILE_NAME = 
"_schema_embedding-model-store.json";
+  protected static final String COLLECTION = "collection1";
+  protected static final String CONF_DIR = COLLECTION + "/conf";
+
+  protected static Path embeddingModelStoreFile = null;
+
+  protected static String IDField = "id";
+  protected static String stringField = "string_field";
+  protected static String vectorField = "vector";
+  protected static String vectorField2 = "vector2";
+  protected static String vectorFieldByteEncoding = "vector_byte_encoding";
+
+  protected static void setuptest(boolean bulkIndex) throws Exception {
+    setuptest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  protected static void setupPersistenttest(boolean bulkIndex) throws 
Exception {
+    setupPersistentTest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  public static ManagedEmbeddingModelStore getManagedModelStore() {
+    try (SolrCore core = 
solrClientTestRule.getCoreContainer().getCore(DEFAULT_TEST_CORENAME)) {
+      return ManagedEmbeddingModelStore.getManagedModelStore(core);
+    }
+  }
+
+  protected static void setupTestInit(String solrconfig, String schema, 
boolean isPersistent)
+      throws Exception {
+    tmpSolrHome = createTempDir();
+    tmpConfDir = tmpSolrHome.resolve(CONF_DIR);
+    tmpConfDir.toFile().deleteOnExit();
+    PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath());
+
+    final Path mstore = tmpConfDir.resolve(MODEL_FILE_NAME);
+
+    if (isPersistent) {
+      embeddingModelStoreFile = mstore;
+    }
+
+    if (Files.exists(mstore)) {
+      if (log.isInfoEnabled()) {
+        log.info("remove model store config file in {}", 
mstore.toAbsolutePath());
+      }
+      Files.delete(mstore);
+    }
+    if (!solrconfig.equals("solrconfig-llm.xml")) {

Review Comment:
   this appears to be some magic?   isn't the way we set things up in a way so 
that you control which solrconfig you are using?   Maybe I don't know the 
RestTestBase enough, but genrally I just specify what configs etc to use...



##########
solr/modules/llm/src/test/org/apache/solr/llm/TestLlmBase.java:
##########
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.llm;
+
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.llm.embedding.SolrEmbeddingModel;
+import org.apache.solr.llm.store.EmbeddingModelException;
+import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore;
+import org.apache.solr.util.RestTestBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestLlmBase extends RestTestBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  protected static final SolrResourceLoader solrResourceLoader =
+      new SolrResourceLoader(Path.of("").toAbsolutePath());
+
+  protected static Path tmpSolrHome;
+  protected static Path tmpConfDir;
+
+  public static final String MODEL_FILE_NAME = 
"_schema_embedding-model-store.json";
+  protected static final String COLLECTION = "collection1";
+  protected static final String CONF_DIR = COLLECTION + "/conf";
+
+  protected static Path embeddingModelStoreFile = null;
+
+  protected static String IDField = "id";
+  protected static String stringField = "string_field";
+  protected static String vectorField = "vector";
+  protected static String vectorField2 = "vector2";
+  protected static String vectorFieldByteEncoding = "vector_byte_encoding";
+
+  protected static void setuptest(boolean bulkIndex) throws Exception {
+    setuptest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  protected static void setupPersistenttest(boolean bulkIndex) throws 
Exception {
+    setupPersistentTest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  public static ManagedEmbeddingModelStore getManagedModelStore() {
+    try (SolrCore core = 
solrClientTestRule.getCoreContainer().getCore(DEFAULT_TEST_CORENAME)) {
+      return ManagedEmbeddingModelStore.getManagedModelStore(core);
+    }
+  }
+
+  protected static void setupTestInit(String solrconfig, String schema, 
boolean isPersistent)
+      throws Exception {
+    tmpSolrHome = createTempDir();
+    tmpConfDir = tmpSolrHome.resolve(CONF_DIR);
+    tmpConfDir.toFile().deleteOnExit();
+    PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath());
+
+    final Path mstore = tmpConfDir.resolve(MODEL_FILE_NAME);
+
+    if (isPersistent) {
+      embeddingModelStoreFile = mstore;
+    }
+
+    if (Files.exists(mstore)) {
+      if (log.isInfoEnabled()) {
+        log.info("remove model store config file in {}", 
mstore.toAbsolutePath());
+      }
+      Files.delete(mstore);
+    }
+    if (!solrconfig.equals("solrconfig-llm.xml")) {
+      Files.copy(
+          tmpSolrHome.resolve(CONF_DIR).resolve(solrconfig),
+          tmpSolrHome.resolve(CONF_DIR).resolve("solrconfig-llm.xml"));
+    }
+    if (!schema.equals("schema.xml")) {
+      Files.copy(
+          tmpSolrHome.resolve(CONF_DIR).resolve(schema),
+          tmpSolrHome.resolve(CONF_DIR).resolve("schema.xml"));
+    }
+
+    System.setProperty("managed.schema.mutable", "true");
+  }
+
+  public static void setuptest(String solrconfig, String schema) throws 
Exception {
+
+    setupTestInit(solrconfig, schema, false);
+    System.setProperty("enable.update.log", "false");
+
+    createJettyAndHarness(
+        tmpSolrHome.toAbsolutePath().toString(), solrconfig, schema, "/solr", 
true, null);
+  }
+
+  public static void setupPersistentTest(String solrconfig, String schema) 
throws Exception {
+
+    setupTestInit(solrconfig, schema, true);
+
+    createJettyAndHarness(
+        tmpSolrHome.toAbsolutePath().toString(), solrconfig, schema, "/solr", 
true, null);
+  }
+
+  protected static void aftertest() throws Exception {
+    if (null != restTestHarness) {
+      restTestHarness.close();
+      restTestHarness = null;
+    }
+    solrClientTestRule.reset();
+    if (null != tmpSolrHome) {
+      PathUtils.deleteDirectory(tmpSolrHome);
+      tmpSolrHome = null;
+    }
+    System.clearProperty("managed.schema.mutable");
+  }
+
+  public static void makeRestTestHarnessNull() {
+    restTestHarness = null;
+  }
+
+  /** produces a model encoded in json * */
+  public static String getModelInJson(String name, String className, String 
params) {

Review Comment:
   don't we use different approaches to building JSON elsewhere?    One of my 
goals for Solr code bases is to have more consistency across all the areas...   
I know that isn't as helpful to the creator of the new code, like in this case, 
but in two years, when someone else has to come along and understand things, 
then it really pays off!



##########
solr/modules/llm/src/test/org/apache/solr/llm/TestLlmBase.java:
##########
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.llm;
+
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.llm.embedding.SolrEmbeddingModel;
+import org.apache.solr.llm.store.EmbeddingModelException;
+import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore;
+import org.apache.solr.util.RestTestBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestLlmBase extends RestTestBase {

Review Comment:
   Is `RestTestBase` our future?    @dsmiley didn't you have somewhat of a 
vision for where we are going with our hierarchy of testing classes?   I am 
intrigued to learn more abut `RestTestBase`, it's new to me.   However, if it 
isn't part of our vision for how we handle testing, then I am nervous about 
depending on it for another new module!



##########
solr/modules/llm/src/test/org/apache/solr/llm/TestLlmBase.java:
##########
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.llm;
+
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.llm.embedding.SolrEmbeddingModel;
+import org.apache.solr.llm.store.EmbeddingModelException;
+import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore;
+import org.apache.solr.util.RestTestBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestLlmBase extends RestTestBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  protected static final SolrResourceLoader solrResourceLoader =
+      new SolrResourceLoader(Path.of("").toAbsolutePath());
+
+  protected static Path tmpSolrHome;
+  protected static Path tmpConfDir;
+
+  public static final String MODEL_FILE_NAME = 
"_schema_embedding-model-store.json";
+  protected static final String COLLECTION = "collection1";
+  protected static final String CONF_DIR = COLLECTION + "/conf";
+
+  protected static Path embeddingModelStoreFile = null;
+
+  protected static String IDField = "id";
+  protected static String stringField = "string_field";
+  protected static String vectorField = "vector";
+  protected static String vectorField2 = "vector2";
+  protected static String vectorFieldByteEncoding = "vector_byte_encoding";
+
+  protected static void setuptest(boolean bulkIndex) throws Exception {
+    setuptest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  protected static void setupPersistenttest(boolean bulkIndex) throws 
Exception {
+    setupPersistentTest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  public static ManagedEmbeddingModelStore getManagedModelStore() {
+    try (SolrCore core = 
solrClientTestRule.getCoreContainer().getCore(DEFAULT_TEST_CORENAME)) {
+      return ManagedEmbeddingModelStore.getManagedModelStore(core);
+    }
+  }
+
+  protected static void setupTestInit(String solrconfig, String schema, 
boolean isPersistent)
+      throws Exception {
+    tmpSolrHome = createTempDir();
+    tmpConfDir = tmpSolrHome.resolve(CONF_DIR);
+    tmpConfDir.toFile().deleteOnExit();
+    PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath());
+
+    final Path mstore = tmpConfDir.resolve(MODEL_FILE_NAME);
+
+    if (isPersistent) {
+      embeddingModelStoreFile = mstore;
+    }
+
+    if (Files.exists(mstore)) {
+      if (log.isInfoEnabled()) {
+        log.info("remove model store config file in {}", 
mstore.toAbsolutePath());
+      }
+      Files.delete(mstore);
+    }
+    if (!solrconfig.equals("solrconfig-llm.xml")) {
+      Files.copy(
+          tmpSolrHome.resolve(CONF_DIR).resolve(solrconfig),
+          tmpSolrHome.resolve(CONF_DIR).resolve("solrconfig-llm.xml"));
+    }
+    if (!schema.equals("schema.xml")) {
+      Files.copy(
+          tmpSolrHome.resolve(CONF_DIR).resolve(schema),
+          tmpSolrHome.resolve(CONF_DIR).resolve("schema.xml"));
+    }
+
+    System.setProperty("managed.schema.mutable", "true");
+  }
+
+  public static void setuptest(String solrconfig, String schema) throws 
Exception {
+
+    setupTestInit(solrconfig, schema, false);
+    System.setProperty("enable.update.log", "false");
+
+    createJettyAndHarness(
+        tmpSolrHome.toAbsolutePath().toString(), solrconfig, schema, "/solr", 
true, null);
+  }
+
+  public static void setupPersistentTest(String solrconfig, String schema) 
throws Exception {
+
+    setupTestInit(solrconfig, schema, true);
+
+    createJettyAndHarness(
+        tmpSolrHome.toAbsolutePath().toString(), solrconfig, schema, "/solr", 
true, null);
+  }
+
+  protected static void aftertest() throws Exception {

Review Comment:
   likewise, is this pattern we use elsewhere in solr?  "aftertest"?



##########
solr/modules/llm/src/test/org/apache/solr/llm/TestLlmBase.java:
##########
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.llm;
+
+import java.lang.invoke.MethodHandles;
+import java.net.URL;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.util.Utils;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.llm.embedding.SolrEmbeddingModel;
+import org.apache.solr.llm.store.EmbeddingModelException;
+import org.apache.solr.llm.store.rest.ManagedEmbeddingModelStore;
+import org.apache.solr.util.RestTestBase;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestLlmBase extends RestTestBase {
+
+  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  protected static final SolrResourceLoader solrResourceLoader =
+      new SolrResourceLoader(Path.of("").toAbsolutePath());
+
+  protected static Path tmpSolrHome;
+  protected static Path tmpConfDir;
+
+  public static final String MODEL_FILE_NAME = 
"_schema_embedding-model-store.json";
+  protected static final String COLLECTION = "collection1";
+  protected static final String CONF_DIR = COLLECTION + "/conf";
+
+  protected static Path embeddingModelStoreFile = null;
+
+  protected static String IDField = "id";
+  protected static String stringField = "string_field";
+  protected static String vectorField = "vector";
+  protected static String vectorField2 = "vector2";
+  protected static String vectorFieldByteEncoding = "vector_byte_encoding";
+
+  protected static void setuptest(boolean bulkIndex) throws Exception {
+    setuptest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  protected static void setupPersistenttest(boolean bulkIndex) throws 
Exception {
+    setupPersistentTest("solrconfig-llm.xml", "schema.xml");
+    if (bulkIndex) prepareIndex();
+  }
+
+  public static ManagedEmbeddingModelStore getManagedModelStore() {
+    try (SolrCore core = 
solrClientTestRule.getCoreContainer().getCore(DEFAULT_TEST_CORENAME)) {
+      return ManagedEmbeddingModelStore.getManagedModelStore(core);
+    }
+  }
+
+  protected static void setupTestInit(String solrconfig, String schema, 
boolean isPersistent)
+      throws Exception {
+    tmpSolrHome = createTempDir();
+    tmpConfDir = tmpSolrHome.resolve(CONF_DIR);
+    tmpConfDir.toFile().deleteOnExit();
+    PathUtils.copyDirectory(TEST_PATH(), tmpSolrHome.toAbsolutePath());
+
+    final Path mstore = tmpConfDir.resolve(MODEL_FILE_NAME);

Review Comment:
   modelStore?     I don't love when we save a few characters in our variable 
names ;-)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org


Reply via email to