branch: externals/vecdb
commit 1d614ad36db09dfc035e912cac345164e0eff76a
Merge: 7362e46ae3 e28ccf4be4
Author: Andrew Hyatt <ahy...@gmail.com>
Commit: GitHub <nore...@github.com>

    Merge pull request #1 from ahyatt/chroma-integration-tests
    
    Chroma integration tests and various fixes
---
 README.org                |  11 +--
 vecdb-chroma.el           |  60 ++++++++-----
 vecdb-integration-test.el | 217 ++++++++++++++++++++++++++++++++++++++++++++++
 vecdb-qdrant.el           |  21 +++--
 vecdb.el                  |   3 +-
 5 files changed, 277 insertions(+), 35 deletions(-)

diff --git a/README.org b/README.org
index 7c010e6410..2c9fbb2a25 100644
--- a/README.org
+++ b/README.org
@@ -30,15 +30,16 @@ Collections must be created before they can be used with 
~vecdb-create~, and ~ve
 They can also be deleted with ~vecdb-delete~.
 
 * Adding and replacing data
-Before data is queried, it must be added.  This is done via a batch operation 
on
-a group of data, ~vecdb-upsert-items~.  This either creates an item in the 
collection
-or replaces it, based on the =id= of the item.  Here's an example of adding or
-replacing one item:
+Before data is queried, it must be added. This is done via a batch operation on
+a group of data, ~vecdb-upsert-items~. This either creates an item in the
+collection or replaces it, based on the =id= of the item. =id= should be an 
integer
+or a string UUID. Seeing as how emacs does not provide a UUID library, probably
+an integer is the best choice.
 
 #+begin_src emacs-lisp
 (vecdb-upsert-items my-vecdb-provider my-vecdb-collection
                  (list (make-vecdb-item
-                        :id "example-id"
+                        :id 91
                         :vector [0.1 0.2 0.3 0.4]
                         :payload '(:my-id "235913926"))))
 #+end_src
diff --git a/vecdb-chroma.el b/vecdb-chroma.el
index 3c9807bc3c..3343089214 100644
--- a/vecdb-chroma.el
+++ b/vecdb-chroma.el
@@ -103,14 +103,16 @@ SYNC indicates whether the request should be synchronous."
      (format "/api/v2/tenants/%s/databases"
              (vecdb-chroma-provider-tenant provider))
      `(:name ,(vecdb-chroma-provider-database provider)) t))
-  (vecdb-chroma-call
-   provider
-   'post
-   (format "/api/v2/tenants/%s/databases/%s/collections"
-           (vecdb-chroma-provider-tenant provider)
-           (vecdb-chroma-provider-database provider))
-   `(:name ,(vecdb-collection-name collection))
-   t))
+  (let ((result (vecdb-chroma-call
+                 provider
+                 'post
+                 (format "/api/v2/tenants/%s/databases/%s/collections"
+                         (vecdb-chroma-provider-tenant provider)
+                         (vecdb-chroma-provider-database provider))
+                 `(:name ,(vecdb-collection-name collection))
+                 t)))
+    (puthash (vecdb-collection-name collection)
+             (plist-get result :id) vecdb-chroma-collection-id-cache)))
 
 (cl-defmethod vecdb-delete ((provider vecdb-chroma-provider)
                             (collection vecdb-collection))
@@ -145,17 +147,21 @@ SYNC indicates whether the request should be synchronous."
   (and (vecdb-chroma-has-tenant-p provider)
        (vecdb-chroma-has-database-p provider
                                     (vecdb-chroma-provider-database provider))
-       (condition-case nil
+       (condition-case err
            (vecdb-chroma-call
             provider
             'get
             (format "/api/v2/tenants/%s/databases/%s/collections/%s"
                     (vecdb-chroma-provider-tenant provider)
                     (vecdb-chroma-provider-database provider)
-                    (vecdb-chroma-collection-id provider collection))
+                    ;; Although the docs say this could be the ID, what
+                    ;; actually works in practice is only the name.
+                    (vecdb-collection-name collection))
             nil
             t)
-         (plz-error nil))))
+         (plz-error (if (eq 404 (plz-response-status (plz-error-response (nth 
2 err))))
+                        nil
+                      (error "Error checking tenant: %s" (plz-error-message 
err)))))))
 
 (cl-defmethod vecdb-upsert-items ((provider vecdb-chroma-provider)
                                   (collection vecdb-collection)
@@ -170,7 +176,7 @@ SYNC indicates whether the request should be synchronous."
      'post
      url
      `(:embeddings ,(apply #'vector (mapcar #'vecdb-item-vector items))
-                   :ids ,(apply #'vector (mapcar #'vecdb-item-id items))
+                   :ids ,(apply #'vector (mapcar (lambda (i) (format "%s" 
(vecdb-item-id i))) items))
                    :metadatas ,(apply #'vector (mapcar #'vecdb-item-payload 
items)))
      sync)))
 
@@ -182,15 +188,18 @@ SYNC indicates whether the request should be synchronous."
                       (vecdb-chroma-provider-tenant provider)
                       (vecdb-chroma-provider-database provider)
                       (vecdb-chroma-collection-id provider collection)))
-         (result (vecdb-chroma-call provider 'get url
-                                    `(:ids ,(vector item-id)
-                                           :limit 1)
+         (result (vecdb-chroma-call provider 'post url
+                                    `(:ids ,(vector (format "%s" item-id))
+                                           :limit 1
+                                           :include ["embeddings" "metadatas"])
                                     t)))
-    (unless (= (length (plist-get result :items)) 1)
-      (error "Expected exactly one item, got %d"
-             (length (plist-get result :items)))
+    (when (> (length (plist-get result :ids)) 1)
+      (error "Chroma returned multiple items for ID %s: %s"
+             item-id (plist-get result :ids)))
+    (when (= (length (plist-get result :ids)) 1)
       (make-vecdb-item
-       :id (aref (plist-get result :ids) 0)
+       :id (vecdb-chroma--normalize-stored-id
+            (aref (plist-get result :ids) 0))
        :vector (aref (plist-get result :embeddings) 0)
        :payload (aref (plist-get result :metadatas) 0)))))
 
@@ -206,9 +215,17 @@ SYNC indicates whether the request should be synchronous."
      provider
      'post
      url
-     `(:ids ,(apply #'vector item-ids))
+     `(:ids ,(apply #'vector (mapcar (lambda (id) (format "%s" id))
+                                     item-ids)))
      sync)))
 
+(defun vecdb-chroma--normalize-stored-id (id)
+  "From a string ID, return an integer ID if it is numeric.
+If the the ID is a uuid, return it as a string."
+  (if (string-match-p "^[0-9]+$" id)
+      (string-to-number id)
+    id))
+
 (cl-defmethod vecdb-search-by-vector ((provider vecdb-chroma-provider)
                                       (collection vecdb-collection)
                                       vector &optional limit)
@@ -224,7 +241,8 @@ SYNC indicates whether the request should be synchronous."
                                     t)))
     (cl-loop for i from 0 below (length (aref (plist-get result :ids) 0))
              collect (make-vecdb-item ;
-                      :id (aref (aref (plist-get result :ids) 0) i)
+                      :id (vecdb-chroma--normalize-stored-id
+                           (aref (aref (plist-get result :ids) 0) i))
                       :vector (aref (aref (plist-get result :embeddings) 0) i)
                       :payload (aref (aref (plist-get result :metadatas) 0) 
i)))))
 
diff --git a/vecdb-integration-test.el b/vecdb-integration-test.el
new file mode 100644
index 0000000000..755e0702e1
--- /dev/null
+++ b/vecdb-integration-test.el
@@ -0,0 +1,217 @@
+;;; vecdb-integration-test.el --- Integration tests for vecdb -*- 
lexical-binding: t; -*-
+
+;; Copyright (C) 2025 Google LLC
+;;
+;; Author: Andrew Hyatt <ahy...@gmail.com>
+;; Maintainer: Andrew Hyatt <ahy...@gmail.com>
+;;
+;; This file is not part of GNU Emacs.
+;;
+;; Licensed under the Apache License, Version 2.0 (the "License");
+;; you may not use this file except in compliance with the License.
+;; You may obtain a copy of the License at
+;;
+;;     http://www.apache.org/licenses/LICENSE-2.0
+;;
+;; Unless required by applicable law or agreed to in writing, software
+;; distributed under the License is distributed on an "AS IS" BASIS,
+;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+;; See the License for the specific language governing permissions and
+;; limitations under the License.
+;;
+;;; Commentary:
+;;
+;;  This file contains integration tests for the `vecdb' package,
+;;  supporting multiple backends like Chroma and Qdrant.
+;;  Tests for specific backends run if corresponding environment variables are 
set.
+;;
+;;  To run these tests manually:
+;;  1. Ensure the relevant vector database (Chroma and/or Qdrant) is running.
+;;  2. Set the necessary environment variables:
+;;     For Chroma: CHROMA_URL (e.g., "http://localhost:8000";)
+;;                 CHROMA_TENANT (optional, defaults to "default")
+;;                 CHROMA_DATABASE (optional, defaults to "default")
+;;     For Qdrant: QDRANT_URL (e.g., "http://localhost:6333";)
+;;                 QDRANT_API_KEY (e.g., "your-api-key")
+;;  3. Execute from the command line:
+;;     emacs -batch -l ert -l vecdb-integration-test.el -f 
ert-run-tests-batch-and-exit
+;;
+;;  The tests for a specific provider will be skipped if its required
+;;  environment variables are not set. If no providers are configured,
+;;  all tests will be skipped.
+;;
+;;; Code:
+
+(require 'ert)
+(require 'vecdb)
+(require 'vecdb-chroma)
+(require 'vecdb-qdrant)
+(require 'cl-lib) ;; For cl-remove-if-not, cl-every
+
+(declare-function chroma-ext--tmp-project-dir "ext-chroma")
+
+;;; Helper Functions
+
+(defun vecdb-test--get-providers ()
+  "Initialize and return a list of available `vecdb-provider' structs.
+Reads configuration from environment variables.
+Skips tests if no providers are configured."
+  (let ((providers (list)))
+    ;; ChromaDB Configuration
+    (let ((chroma-url (getenv "CHROMA_URL")))
+      (when chroma-url
+        (add-to-list 'providers
+                     (make-vecdb-chroma-provider
+                      :url chroma-url
+                      :tenant (or (getenv "CHROMA_TENANT") "default")
+                      :database (or (getenv "CHROMA_DATABASE") "default"))
+                     t)))
+
+    ;; Qdrant Configuration
+    (let ((qdrant-url (getenv "QDRANT_URL")))
+      (when qdrant-url
+        (let ((qdrant-api-key (getenv "QDRANT_API_KEY")))
+          (if qdrant-api-key
+              (add-to-list 'providers
+                           (make-vecdb-qdrant-provider
+                            :url qdrant-url
+                            :api-key qdrant-api-key)
+                           t)
+            (warn "QDRANT_URL is set, but QDRANT_API_KEY is missing. Qdrant 
provider will not be configured.")))))
+
+    (when (null providers)
+      (ert-skip "No vector database provider environment variables set. 
(CHROMA_URL or QDRANT_URL must be set)"))
+    providers))
+
+(defmacro vecdb-test--deftest-for-providers (base-name body-function &optional 
docstring)
+  "Define `ert-deftest` forms for BASE-NAME against Chroma and Qdrant 
providers.
+BODY-FUNCTION is a symbol of a function that takes one argument (the provider).
+Docstring is optional, a default will be generated.
+
+Each generated test will individually skip if its specific provider
+is not found in the list returned by `vecdb-test--get-providers` (which
+itself might globally skip if no providers at all are configured)."
+  (declare (indent defun))
+  (let ((chroma-test-name (intern (format "%s-chroma" base-name)))
+        (qdrant-test-name (intern (format "%s-qdrant" base-name)))
+        (base-doc (or docstring (format "Test %s for a vector database 
provider." base-name))))
+    `(progn
+       (ert-deftest ,chroma-test-name ()
+         ,(format "%s (Chroma)" base-doc)
+         (interactive) ; Keep interactive for manual runs if desired
+         (let ((current-provider (cl-find-if (lambda (p) (eq (type-of p) 
'vecdb-chroma-provider))
+                                             (vecdb-test--get-providers))))
+           (if current-provider
+               (funcall ,body-function current-provider)
+             (ert-skip (format "Chroma provider not configured for %s" 
',chroma-test-name)))))
+
+       (ert-deftest ,qdrant-test-name ()
+         ,(format "%s (Qdrant)" base-doc)
+         (interactive)
+         (let ((current-provider (cl-find-if (lambda (p) (eq (type-of p) 
'vecdb-qdrant-provider))
+                                             (vecdb-test--get-providers))))
+           (if current-provider
+               (funcall ,body-function current-provider)
+             (ert-skip (format "Qdrant provider not configured for %s" 
',qdrant-test-name))))))))
+
+(defmacro with-test-collection (current-provider collection-var 
collection-name-base options &rest body)
+  "Execute BODY with COLLECTION-VAR bound to a new collection.
+CURRENT-PROVIDER is the provider instance.
+COLLECTION-VAR is the symbol to bind the collection struct to.
+COLLECTION-NAME-BASE is the base string for the collection name.
+OPTIONS is a plist, e.g., '(:vector-size 3).
+The full collection name is generated by appending the provider's name.
+The collection is created before BODY and deleted afterwards."
+  (declare (indent 1) (debug t))
+  (let ((full-collection-name (gensym "full-collection-name-"))
+        (vector-size-val (gensym "vector-size-"))
+        (default-vector-size 3))
+    `(let* ((,full-collection-name (format "%s-%s" ,collection-name-base 
(vecdb-provider-name ,current-provider)))
+            (,vector-size-val (or (plist-get ,options :vector-size) 
,default-vector-size))
+            (,collection-var (make-vecdb-collection :name 
,full-collection-name :vector-size ,vector-size-val)))
+       (unwind-protect
+           (progn
+             (vecdb-create ,current-provider ,collection-var)
+             ,@body)
+         ;; Ensure cleanup even if vecdb-create fails or body errors
+         (if (vecdb-exists ,current-provider ,collection-var)
+             (vecdb-delete ,current-provider ,collection-var)
+           (message "Collection %s did not exist or was already deleted during 
cleanup." ,full-collection-name))))))
+
+;;; Test cases Body Functions
+
+(defun vecdb-test-create-exists-delete-collection-body (current-provider)
+  "Core logic for testing create, exists, and delete collection."
+  (let* ((collection-name-base "test-collection-ced")
+         (collection-name (format "%s-%s" collection-name-base 
(vecdb-provider-name current-provider)))
+         (collection (make-vecdb-collection :name collection-name :vector-size 
3)))
+    (unwind-protect
+        (progn
+          (vecdb-create current-provider collection)
+          (should (vecdb-exists current-provider collection))
+          (vecdb-delete current-provider collection)
+          (should-not (vecdb-exists current-provider collection)))
+      ;; Cleanup in case of error during assertions
+      (when (vecdb-exists current-provider collection)
+        (vecdb-delete current-provider collection)))))
+
+(vecdb-test--deftest-for-providers vecdb-test-create-exists-delete-collection
+  #'vecdb-test-create-exists-delete-collection-body
+  "Test `vecdb-create', `vecdb-exists', and `vecdb-delete'.")
+
+(defun vecdb-test-upsert-get-delete-items-body (current-provider)
+  "Core logic for testing upsert and get items."
+  (let* ((collection-name "test-collection-ug")
+         (vector-size 3)
+         (items (list
+                 (make-vecdb-item :id 1 :vector [0 1 2] :payload '(:val 1))
+                 (make-vecdb-item :id 2 :vector [0 1 2] :payload '(:val 2))
+                 (make-vecdb-item :id 3 :vector [0 1 2] :payload '(:val 3)))))
+    (with-test-collection current-provider current-collection collection-name 
`(:vector-size ,vector-size)
+                          (vecdb-upsert-items current-provider 
current-collection items t)
+                          (dolist (item items)
+                            (let ((retrieved-item (vecdb-get-item 
current-provider current-collection (vecdb-item-id item))))
+                              (should retrieved-item)
+                              (should (equal (vecdb-item-id item) 
(vecdb-item-id retrieved-item)))
+                              ;; We don't test to see if the vector is equal,
+                              ;; because it could be normalized.
+                              (should (equal (vecdb-item-payload item) 
(vecdb-item-payload retrieved-item)))))
+                          (vecdb-delete-items current-provider 
current-collection (mapcar #'vecdb-item-id items) t)
+                          (dolist (item items)
+                            (should-not (vecdb-get-item current-provider 
current-collection (vecdb-item-id item)))))))
+
+(vecdb-test--deftest-for-providers vecdb-test-upsert-get-delete-items
+  #'vecdb-test-upsert-get-delete-items-body
+  "Test `vecdb-upsert-items', `vecdb-get-item' and `vecdb-delete-items'.")
+
+(defun vecdb-test-search-by-vector-body (current-provider)
+  "Core logic for testing search by vector."
+  (let* ((collection-name "test-collection-sv")
+         (vector-size 3)
+         (item1 (make-vecdb-item :id 1 :vector [0.1 0.2 0.3] :payload '(:val 
1)))
+         (item2 (make-vecdb-item :id 2 :vector [0.4 0.5 0.6] :payload '(:val 
2)))
+         (item3 (make-vecdb-item :id 3 :vector [0.7 0.8 0.9] :payload '(:val 
3)))
+         (items (list item1 item2 item3)))
+    (with-test-collection current-provider current-collection collection-name 
`(:vector-size ,vector-size)
+                          (vecdb-upsert-items current-provider 
current-collection items t)
+                          ;; Search for a vector similar to item2
+                          (let ((results (vecdb-search-by-vector 
current-provider current-collection [0.41 0.51 0.61] 3)))
+                            (should (= (length results) 3))
+                            ;; Item2 should be the closest
+                            (let ((first-result (car results)))
+                              (should (equal (vecdb-item-id first-result) 
(vecdb-item-id item2)))
+                              (should (equal (vecdb-item-payload first-result) 
(vecdb-item-payload item2))))
+                            ;; Check if all original items are present in 
results (order might vary beyond the first)
+                            (should (cl-every (lambda (item)
+                                                (cl-find-if (lambda (res-item) 
(equal (vecdb-item-id item) (vecdb-item-id res-item)))
+                                                            results))
+                                              items))))))
+
+(vecdb-test--deftest-for-providers
+  vecdb-test-search-by-vector
+  #'vecdb-test-search-by-vector-body
+  "Test `vecdb-search-by-vector'.")
+
+(provide 'vecdb-integration-test)
+
+;;; vecdb-integration-test.el ends here
diff --git a/vecdb-qdrant.el b/vecdb-qdrant.el
index 65cdf38656..d6e35b0a14 100644
--- a/vecdb-qdrant.el
+++ b/vecdb-qdrant.el
@@ -106,14 +106,19 @@ SYNC indicates whether the request should be synchronous."
                               (collection vecdb-collection)
                               id)
   "Get items with ID from the COLLECTION with PROVIDER."
-  (let ((result (vecdb-qdrant-call provider 'get
-                                   (concat "/collections/" 
(vecdb-collection-name collection) "/points/" id))))
-    (when result
-      (let ((point (plist-get result :result)))
-        (when point
-          (make-vecdb-item :id (plist-get point :id)
-                           :vector (plist-get point :vector)
-                           :payload (plist-get point :payload)))))))
+  (condition-case err
+      (let ((result (vecdb-qdrant-call provider 'get
+                                       (format "/collections/%s/points/%s" 
(vecdb-collection-name collection) id) nil t)))
+        (when result
+          (let ((point (plist-get result :result)))
+            (when point
+              (make-vecdb-item :id (plist-get point :id)
+                               :vector (plist-get point :vector)
+                               :payload (plist-get point :payload))))))
+    (plz-error
+     (if (eq 404 (plz-response-status (plz-error-response (nth 2 err))))
+         nil
+       (error "Error retrieving item: %s" (plz-error-message err))))))
 
 (cl-defmethod vecdb-delete-items ((provider vecdb-qdrant-provider)
                                   (collection vecdb-collection)
diff --git a/vecdb.el b/vecdb.el
index 6c94bb7d63..78179c804b 100644
--- a/vecdb.el
+++ b/vecdb.el
@@ -75,7 +75,8 @@ An ID may be an integer or a string, and is used to uniquely 
identify the item."
           (list "vecdb-upsert not implemented for" (vecdb-provider-name 
provider))))
 
 (cl-defgeneric vecdb-get-item (provider collection id)
-  "Get items with ID from the COLLECTION with PROVIDER."
+  "Get items with ID from the COLLECTION with PROVIDER.
+If the item does not exist, return `nil'."
   (ignore collection id)
   (signal 'not-implemented
           (list "vecdb-get not implemented for" (vecdb-provider-name 
provider))))

Reply via email to