branch: externals/vecdb commit b8a604f64df181cc788986dbce6c05073f9a18a1 Author: Andrew Hyatt <ahy...@continua.ai> Commit: Andrew Hyatt <ahy...@continua.ai>
Fix tests, and fix qdrant integration to get tests to pass --- README.org | 11 +-- vecdb-integration-test.el | 170 +++++++++++++--------------------------------- vecdb-qdrant.el | 21 +++--- vecdb.el | 3 +- 4 files changed, 70 insertions(+), 135 deletions(-) diff --git a/README.org b/README.org index 7c010e6410..2c9fbb2a25 100644 --- a/README.org +++ b/README.org @@ -30,15 +30,16 @@ Collections must be created before they can be used with ~vecdb-create~, and ~ve They can also be deleted with ~vecdb-delete~. * Adding and replacing data -Before data is queried, it must be added. This is done via a batch operation on -a group of data, ~vecdb-upsert-items~. This either creates an item in the collection -or replaces it, based on the =id= of the item. Here's an example of adding or -replacing one item: +Before data is queried, it must be added. This is done via a batch operation on +a group of data, ~vecdb-upsert-items~. This either creates an item in the +collection or replaces it, based on the =id= of the item. =id= should be an integer +or a string UUID. Seeing as how emacs does not provide a UUID library, probably +an integer is the best choice. #+begin_src emacs-lisp (vecdb-upsert-items my-vecdb-provider my-vecdb-collection (list (make-vecdb-item - :id "example-id" + :id 91 :vector [0.1 0.2 0.3 0.4] :payload '(:my-id "235913926")))) #+end_src diff --git a/vecdb-integration-test.el b/vecdb-integration-test.el index e16d9f1701..90ea5f2ac7 100644 --- a/vecdb-integration-test.el +++ b/vecdb-integration-test.el @@ -1,14 +1,9 @@ -;;; vecdb-integration-test.el --- Integration tests for vecdb-chroma -*- lexical-binding: t; -*- +;;; vecdb-integration-test.el --- Integration tests for vecdb -*- lexical-binding: t; -*- -;; Copyright (C) 2023 Google LLC +;; Copyright (C) 2025 Google LLC ;; -;; Author: Google LLC -;; Maintainer: Google LLC -;; Created: November 2023 -;; Modified: November 2023 -;; Version: 0.1 -;; Keywords: tools, ai -;; Package-Requires: ((emacs "29.1") (vecdb "0.1") (chroma-ext "0.1")) +;; Author: Andrew Hyatt <ahy...@gmail.com> +;; Maintainer: Andrew Hyatt <ahy...@gmail.com> ;; ;; This file is not part of GNU Emacs. ;; @@ -96,6 +91,7 @@ Docstring is optional, a default will be generated. Each generated test will individually skip if its specific provider is not found in the list returned by `vecdb-test--get-providers` (which itself might globally skip if no providers at all are configured)." + (declare (indent defun)) (let ((chroma-test-name (intern (format "%s-chroma" base-name))) (qdrant-test-name (intern (format "%s-qdrant" base-name))) (base-doc (or docstring (format "Test %s for a vector database provider." base-name)))) @@ -118,21 +114,6 @@ itself might globally skip if no providers at all are configured)." (funcall ,body-function current-provider) (ert-skip (format "Qdrant provider not configured for %s" ',qdrant-test-name)))))))) -(defun vecdb-test--make-collection (name vector-size) - "Return a `vecdb-collection' struct with NAME and VECTOR-SIZE." - (make-vecdb-collection :name name :vector-size vector-size)) - -(defun vecdb-test--make-item (id vector payload) - "Return a `vecdb-item' struct with ID, VECTOR, and PAYLOAD (plist)." - (make-vecdb-item :id id :vector vector :payload payload)) - -(defun vecdb-test--random-vector (dimension) - "Return a vector of DIMENSION with random float numbers." - (let ((vec (make-vector dimension 0.0))) - (dotimes (i dimension) - (setf (aref vec i) (random 1.0))) - vec)) - (defmacro with-test-collection (current-provider collection-var collection-name-base options &rest body) "Execute BODY with COLLECTION-VAR bound to a new collection. CURRENT-PROVIDER is the provider instance. @@ -141,12 +122,13 @@ COLLECTION-NAME-BASE is the base string for the collection name. OPTIONS is a plist, e.g., '(:vector-size 3). The full collection name is generated by appending the provider's name. The collection is created before BODY and deleted afterwards." + (declare (indent 1) (debug t)) (let ((full-collection-name (gensym "full-collection-name-")) (vector-size-val (gensym "vector-size-")) (default-vector-size 3)) `(let* ((,full-collection-name (format "%s-%s" ,collection-name-base (vecdb-provider-name ,current-provider))) (,vector-size-val (or (plist-get ,options :vector-size) ,default-vector-size)) - (,collection-var (vecdb-test--make-collection ,full-collection-name ,vector-size-val))) + (,collection-var (make-vecdb-collection :name ,full-collection-name :vector-size ,vector-size-val))) (unwind-protect (progn (vecdb-create ,current-provider ,collection-var) @@ -156,20 +138,13 @@ The collection is created before BODY and deleted afterwards." (vecdb-delete ,current-provider ,collection-var) (message "Collection %s did not exist or was already deleted during cleanup." ,full-collection-name)))))) -;;; Test Suite Definition -;; This will need to be updated with the generated test names. -;; For now, removing the explicit list and :require, relying on ERT to find tests. -;; Or, it could be removed if tests are not run via suite selection. -(def-ert-suite vecdb-integration-suite - "Integration tests for the `vecdb' package against various backends.") - ;;; Test cases Body Functions (defun vecdb-test-create-exists-delete-collection-body (current-provider) "Core logic for testing create, exists, and delete collection." (let* ((collection-name-base "test-collection-ced") (collection-name (format "%s-%s" collection-name-base (vecdb-provider-name current-provider))) - (collection (vecdb-test--make-collection collection-name 3))) + (collection (make-vecdb-collection :name collection-name :vector-size 3))) (unwind-protect (progn (should (vecdb-create current-provider collection)) @@ -180,109 +155,62 @@ The collection is created before BODY and deleted afterwards." (when (vecdb-exists current-provider collection) (vecdb-delete current-provider collection))))) -(vecdb-test--deftest-for-providers - vecdb-test-create-exists-delete-collection - #'vecdb-test-create-exists-delete-collection-body - "Test `vecdb-create', `vecdb-exists', and `vecdb-delete'.") +(vecdb-test--deftest-for-providers vecdb-test-create-exists-delete-collection + #'vecdb-test-create-exists-delete-collection-body + "Test `vecdb-create', `vecdb-exists', and `vecdb-delete'.") -(defun vecdb-test-upsert-get-items-body (current-provider) +(defun vecdb-test-upsert-get-delete-items-body (current-provider) "Core logic for testing upsert and get items." (let* ((collection-name "test-collection-ug") (vector-size 3) (items (list - (vecdb-test--make-item "item1" (vecdb-test--random-vector vector-size) '(:val 1)) - (vecdb-test--make-item "item2" (vecdb-test--random-vector vector-size) '(:val 2)) - (vecdb-test--make-item "item3" (vecdb-test--random-vector vector-size) '(:val 3))))) + (make-vecdb-item :id 1 :vector [0 1 2] :payload '(:val 1)) + (make-vecdb-item :id 2 :vector [0 1 2] :payload '(:val 2)) + (make-vecdb-item :id 3 :vector [0 1 2] :payload '(:val 3))))) (with-test-collection current-provider current-collection collection-name `(:vector-size ,vector-size) - (should (vecdb-upsert-items current-provider current-collection items)) - (dolist (item items) - (let ((retrieved-item (vecdb-get-item current-provider current-collection (vecdb-item-id item)))) - (should retrieved-item) - (should (equal (vecdb-item-id item) (vecdb-item-id retrieved-item))) - (should (equal (vecdb-item-vector item) (vecdb-item-vector retrieved-item))) - (should (equal (vecdb-item-payload item) (vecdb-item-payload retrieved-item)))))))) - -(vecdb-test--deftest-for-providers - vecdb-test-upsert-get-items - #'vecdb-test-upsert-get-items-body - "Test `vecdb-upsert-items' and `vecdb-get-item'.") - -(defun vecdb-test-delete-items-body (current-provider) - "Core logic for testing delete items." - (let* ((collection-name "test-collection-di") - (vector-size 3) - (item1 (vecdb-test--make-item "item1" (vecdb-test--random-vector vector-size) '(:val 1))) - (item2 (vecdb-test--make-item "item2" (vecdb-test--random-vector vector-size) '(:val 2))) - (item3 (vecdb-test--make-item "item3" (vecdb-test--random-vector vector-size) '(:val 3))) - (items (list item1 item2 item3))) - (with-test-collection current-provider current-collection collection-name `(:vector-size ,vector-size) - (vecdb-upsert-items current-provider current-collection items) - (should (vecdb-delete-items current-provider current-collection (list (vecdb-item-id item1) (vecdb-item-id item3)))) - (should-not (vecdb-get-item current-provider current-collection (vecdb-item-id item1))) - (should (vecdb-get-item current-provider current-collection (vecdb-item-id item2))) - (should-not (vecdb-get-item current-provider current-collection (vecdb-item-id item3)))))) - -(vecdb-test--deftest-for-providers - vecdb-test-delete-items - #'vecdb-test-delete-items-body - "Test `vecdb-delete-items'.") + (should (vecdb-upsert-items current-provider current-collection items t)) + (dolist (item items) + (let ((retrieved-item (vecdb-get-item current-provider current-collection (vecdb-item-id item)))) + (should retrieved-item) + (should (equal (vecdb-item-id item) (vecdb-item-id retrieved-item))) + ;; We don't test to see if the vector is equal, + ;; because it could be normalized. + (should (equal (vecdb-item-payload item) (vecdb-item-payload retrieved-item))))) + (vecdb-delete-items current-provider current-collection (mapcar #'vecdb-item-id items) t) + (dolist (item items) + (should-not (vecdb-get-item current-provider current-collection (vecdb-item-id item))))))) + +(vecdb-test--deftest-for-providers vecdb-test-upsert-get-delete-items + #'vecdb-test-upsert-get-delete-items-body + "Test `vecdb-upsert-items', `vecdb-get-item' and `vecdb-delete-items'.") (defun vecdb-test-search-by-vector-body (current-provider) "Core logic for testing search by vector." (let* ((collection-name "test-collection-sv") (vector-size 3) - (item1 (vecdb-test--make-item "item1" (vector 0.1 0.2 0.3) '(:val 1))) - (item2 (vecdb-test--make-item "item2" (vector 0.4 0.5 0.6) '(:val 2))) - (item3 (vecdb-test--make-item "item3" (vector 0.7 0.8 0.9) '(:val 3))) + (item1 (make-vecdb-item :id 1 :vector [0.1 0.2 0.3] :payload '(:val 1))) + (item2 (make-vecdb-item :id 2 :vector [0.4 0.5 0.6] :payload '(:val 2))) + (item3 (make-vecdb-item :id 3 :vector [0.7 0.8 0.9] :payload '(:val 3))) (items (list item1 item2 item3))) (with-test-collection current-provider current-collection collection-name `(:vector-size ,vector-size) - (vecdb-upsert-items current-provider current-collection items) - ;; Search for a vector similar to item2 - (let* ((search-vector (vector 0.41 0.51 0.61)) - (results (vecdb-search-by-vector current-provider current-collection search-vector 3))) - (should (= (length results) 3)) - ;; Item2 should be the closest - (let ((first-result (car results))) - (should (equal (vecdb-item-id first-result) (vecdb-item-id item2))) - (should (equal (vecdb-item-vector first-result) (vecdb-item-vector item2))) - (should (equal (vecdb-item-payload first-result) (vecdb-item-payload item2)))) - ;; Check if all original items are present in results (order might vary beyond the first) - (should (cl-every (lambda (item) - (cl-find-if (lambda (res-item) (equal (vecdb-item-id item) (vecdb-item-id res-item))) - results)) - items)))))) + (vecdb-upsert-items current-provider current-collection items) + ;; Search for a vector similar to item2 + (let ((results (vecdb-search-by-vector current-provider current-collection [0.41 0.51 0.61] 3))) + (should (= (length results) 3)) + ;; Item2 should be the closest + (let ((first-result (car results))) + (should (equal (vecdb-item-id first-result) (vecdb-item-id item2))) + (should (equal (vecdb-item-payload first-result) (vecdb-item-payload item2)))) + ;; Check if all original items are present in results (order might vary beyond the first) + (should (cl-every (lambda (item) + (cl-find-if (lambda (res-item) (equal (vecdb-item-id item) (vecdb-item-id res-item))) + results)) + items)))))) (vecdb-test--deftest-for-providers - vecdb-test-search-by-vector - #'vecdb-test-search-by-vector-body - "Test `vecdb-search-by-vector'.") - -(defun vecdb-test-search-by-vector-with-filter-body (current-provider) - "Core logic for testing search by vector with filter." - (let* ((collection-name "test-collection-svf") - (vector-size 2) - (item1 (vecdb-test--make-item "itemA1" (vector 0.1 0.2) '(:type "A" :val 1))) - (item2 (vecdb-test--make-item "itemB1" (vector 0.3 0.4) '(:type "B" :val 2))) - (item3 (vecdb-test--make-item "itemA2" (vector 0.5 0.6) '(:type "A" :val 3))) - (items (list item1 item2 item3))) - (with-test-collection current-provider current-collection collection-name `(:vector-size ,vector-size) - (vecdb-upsert-items current-provider current-collection items) - (let* ((search-vector (vector 0.11 0.21)) ; Closer to itemA1 - ;; Filter for items of type "A" - (results (vecdb-search-by-vector current-provider current-collection search-vector 3 '(:type "A")))) - (should (= (length results) 2)) - (should (cl-every (lambda (item) (equal (getf (vecdb-item-payload item) :type) "A")) results)) - ;; itemA1 should be the first result - (let ((first-result (car results))) - (should (equal (vecdb-item-id first-result) (vecdb-item-id item1)))) - ;; Ensure itemB1 is not in the results - (should-not (cl-find-if (lambda (res-item) (equal (vecdb-item-id res-item) "itemB1")) results)))))) - -(vecdb-test--deftest-for-providers - vecdb-test-search-by-vector-with-filter - #'vecdb-test-search-by-vector-with-filter-body - "Test `vecdb-search-by-vector' with a metadata filter.") - + vecdb-test-search-by-vector + #'vecdb-test-search-by-vector-body + "Test `vecdb-search-by-vector'.") (provide 'vecdb-integration-test) diff --git a/vecdb-qdrant.el b/vecdb-qdrant.el index 65cdf38656..d6e35b0a14 100644 --- a/vecdb-qdrant.el +++ b/vecdb-qdrant.el @@ -106,14 +106,19 @@ SYNC indicates whether the request should be synchronous." (collection vecdb-collection) id) "Get items with ID from the COLLECTION with PROVIDER." - (let ((result (vecdb-qdrant-call provider 'get - (concat "/collections/" (vecdb-collection-name collection) "/points/" id)))) - (when result - (let ((point (plist-get result :result))) - (when point - (make-vecdb-item :id (plist-get point :id) - :vector (plist-get point :vector) - :payload (plist-get point :payload))))))) + (condition-case err + (let ((result (vecdb-qdrant-call provider 'get + (format "/collections/%s/points/%s" (vecdb-collection-name collection) id) nil t))) + (when result + (let ((point (plist-get result :result))) + (when point + (make-vecdb-item :id (plist-get point :id) + :vector (plist-get point :vector) + :payload (plist-get point :payload)))))) + (plz-error + (if (eq 404 (plz-response-status (plz-error-response (nth 2 err)))) + nil + (error "Error retrieving item: %s" (plz-error-message err)))))) (cl-defmethod vecdb-delete-items ((provider vecdb-qdrant-provider) (collection vecdb-collection) diff --git a/vecdb.el b/vecdb.el index 6c94bb7d63..78179c804b 100644 --- a/vecdb.el +++ b/vecdb.el @@ -75,7 +75,8 @@ An ID may be an integer or a string, and is used to uniquely identify the item." (list "vecdb-upsert not implemented for" (vecdb-provider-name provider)))) (cl-defgeneric vecdb-get-item (provider collection id) - "Get items with ID from the COLLECTION with PROVIDER." + "Get items with ID from the COLLECTION with PROVIDER. +If the item does not exist, return `nil'." (ignore collection id) (signal 'not-implemented (list "vecdb-get not implemented for" (vecdb-provider-name provider))))