branch: elpa/gptel commit 4abb9581ff8aa728bfe29b8fbc987a21166f3813 Author: Karthik Chikmagalur <karthikchikmaga...@gmail.com> Commit: Karthik Chikmagalur <karthikchikmaga...@gmail.com>
gptel: Add JSON response schema parsing and preprocessing Add experimental support for structured outputs to `gptel-request' via the :schema argument. It is now possible to force an LLM to respond in JSON conforming to the provided schema. Note: This commit only adds the infrastructure required for the feature! No backend currently respects :schema. Support for all backends will be added in the next commit. There are several caveats with this feature in its current form: 1. Not all providers support it, but the major backends do: OpenAI, Anthropic, Gemini, llama-cpp and Ollama. Support for structured outputs among other "OpenAI-compatible" backends is flaky. 2. `gptel-send' does not yet support structured outputs, as it is intended to be a general chat command. Only the `gptel-request' API does. (Schema support for `gptel-send' can be added if there is sufficient demand.) 3. Schemas whose root elements are of type array are not supported by most APIs. In this case the schema is wrapped in an object with one field and it is the caller's responsibility to extract the array elements from it. 4. The JSON schema has to be supplied in one of two ways: - As an elisp object consisting of nested plists, similar to how arguments in gptel-tool definitions are provided. - As a JSON schema serialized to a string. While expressive, both formats are cumbersome for quick use, so support for other short hand specifications is planned. * gptel.el (gptel--with-buffer-copy-internal): Copy `gptel--schema' as well. (gptel--schema, gptel-request): Add :schema argument, use the internal variable `gptel--schema' to communicate this to the payload builders (primarily `gptel--request-data'). The docstring for :schema is inadequate, but it will require too many lines in an already long description. This will be updated after adding other ways to specify the schema. (gptel--parse-schema): Generic function to parse a provided schema into a backend-appropriate format. (gptel--preprocess-schema, gptel--dispatch-schema-type): Utility functions to sanitize provided schemas. The former is required to convert all symbols in the spec to strings (see `gptel--preprocess-tool-args' for why). The latter handles schemas provided as serialized JSON, and wraps a root-level array specification in an object. This wrapping is needed since most APIs require an object type at the schema root. (gptel--tool-use-p, gptel--tool-result-p): Don't check for `:tools' in INFO, as the Anthropic API uses an ersatz tool to provide JSON output as tool call arguments. This ersatz tool is not defined by the user and not included in `:tools'. (gptel--handle-tool-use): Handle JSON output masquerading as a tool call. This is for the Anthropic API only. --- gptel.el | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 7 deletions(-) diff --git a/gptel.el b/gptel.el index a05d4c004a..38eaf754f7 100644 --- a/gptel.el +++ b/gptel.el @@ -914,6 +914,11 @@ These parameters are combined with model-specific and backend-specific incompatible with the active backend can break gptel. Do not use this variable unless you know what you're doing!") +(defconst gptel--ersatz-json-tool "response_json" + "Name of ersatz tool used to force JSON output. + +Some APIs, like Anthropic, use a tool to produce structured JSON output.") + ;;; Utility functions @@ -1092,7 +1097,7 @@ For BUF, START, END and BODY-THUNK see `gptel--with-buffer-copy'." (with-current-buffer temp-buffer (dolist (sym '( gptel-backend gptel--system-message gptel-model gptel-mode gptel-track-response gptel-track-media - gptel-use-tools gptel-tools gptel-use-curl + gptel-use-tools gptel-tools gptel-use-curl gptel--schema gptel-use-context gptel--num-messages-to-send gptel-stream gptel-include-reasoning gptel--request-params gptel-temperature gptel-max-tokens gptel-cache)) @@ -1577,6 +1582,60 @@ file." (declare-function gptel-context--wrap "gptel-context") +;;; Structured output +(defvar gptel--schema nil + "Response output schema for backends that support it.") + +(cl-defgeneric gptel--parse-schema (_backend _schema) + "Parse JSON schema in a backend-appropriate way.") + +(defun gptel--dispatch-schema-type (schema) + "Convert SCHEMA to a valid elisp representation." + (when (stringp schema) + (setq schema (gptel--json-read-string schema))) + ;; The OpenAI and Anthropic APIs don't allow arrays at the root of the schema. + ;; Work around this by wrapping it in an object with the field "items". + ;; TODO(schema): Find some way to strip this extra layer from the response. + (if (member (plist-get schema :type) '("array" array)) + (list :type "object" + :properties (list :items schema) + :required ["items"] + :additionalProperties :json-false) + schema)) + +(defun gptel--preprocess-schema (spec) + "Set additionalProperties for objects in SPEC destructively. + +Convert symbol :types to strings." + ;; NOTE: Do not use `sequencep' here, as that covers strings too and breaks + ;; things. + (when (or (listp spec) (vectorp spec)) + (cond + ((vectorp spec) + (cl-loop for element across spec + for idx upfrom 0 + do (aset spec idx (gptel--preprocess-schema element)))) + ((keywordp (car spec)) + (let ((tail spec)) + (while tail + (when (eq (car tail) :type) + (when (symbolp (cadr tail)) ;Convert symbol :type to string + (setcar (cdr tail) (symbol-name (cadr tail)))) + (when (equal (cadr tail) "object") ;Add additional object fields + (plist-put tail :additionalProperties :json-false) + (let ((props + (cl-loop for prop in (plist-get tail :properties) by #'cddr + collect (substring (symbol-name prop) 1)))) + (plist-put tail :required (vconcat props))))) + (when (or (listp (cadr tail)) (vectorp (cadr tail))) + (gptel--preprocess-schema (cadr tail))) + (setq tail (cddr tail))))) + ((listp spec) (dolist (element spec) + (when (listp element) + (gptel--preprocess-schema element)))))) + spec) + + ;;; Tool use (defcustom gptel-use-tools t @@ -2242,7 +2301,12 @@ Run post-response hooks." (cons 'tool-result result-alist) info) (gptel--fsm-transition fsm))))) (if (null tool-spec) - (message "Unknown tool called by model: %s" name) + (if (equal name gptel--ersatz-json-tool) ;Could be a JSON response + ;; Handle structured JSON output supplied as tool call + (funcall (plist-get info :callback) + (gptel--json-encode (plist-get tool-call :args)) + info) + (message "Unknown tool called by model: %s" name)) (setq arg-values (mapcar (lambda (arg) @@ -2277,11 +2341,9 @@ Run post-response hooks." (defun gptel--error-p (info) (plist-get info :error)) -(defun gptel--tool-use-p (info) - (and (plist-get info :tools) (plist-get info :tool-use))) +(defun gptel--tool-use-p (info) (plist-get info :tool-use)) -(defun gptel--tool-result-p (info) - (and (plist-get info :tools) (plist-get info :tool-success))) +(defun gptel--tool-result-p (info) (plist-get info :tool-success)) ;; TODO(prompt-list): Document new prompt input format to `gptel-request'. @@ -2292,7 +2354,7 @@ Run post-response hooks." position context dry-run (stream nil) (in-place nil) (system gptel--system-message) - transforms (fsm (gptel-make-fsm))) + schema transforms (fsm (gptel-make-fsm))) "Request a response from the `gptel-backend' for PROMPT. The request is asynchronous, this function returns immediately. @@ -2442,6 +2504,13 @@ additional information (such as from a RAG engine). and the state machine. It should run the callback after finishing its transformation. +If provided, SCHEMA forces the LLM to generate JSON output. Its value +is a JSON schema, which can be provided as an elisp object, a nested +plist structure. See the manual or the wiki for examples. + +Note: SCHEMA is presently experimental and subject to change, and not +all providers support structured output. + See `gptel-prompt-transform-functions' for more. FSM is the state machine driving the request. This can be used @@ -2470,6 +2539,7 @@ be used to rerun or continue the request at a later time." ((markerp position) position) ((integerp position) (set-marker (make-marker) position buffer)))) + (gptel--schema schema) (prompt-buffer (cond ;prompt from buffer or explicitly supplied ((null prompt)