branch: elpa/gptel commit 34ecadea17870b07fe7f146376b5a25f45fb27af Author: Karthik Chikmagalur <karthikchikmaga...@gmail.com> Commit: Karthik Chikmagalur <karthikchikmaga...@gmail.com>
gptel-anthropic: Update cache control to include ttl "1h" * README.org ((Optional) Interim support for Claude 3.7 Sonnet): Simplify the setup for Claude-thinking. None of the beta headers are required any longer. * gptel-anthropic.el (gptel--request-data, gptel--parse-list, gptel--parse-buffer): Increase default cache ttl for system and messages to 1 hour. Tools cache remains at a ttl of 5 minutes. It's not clear from the documentation if 1 hour caches are any more expensive than 5 minute caches, but on the whole this should reduce the costs of conversations when using `gptel-cache' with Anthropic (only). (gptel-make-anthropic): Remove obsolete headers, add extended-cache-ttl support. --- README.org | 8 +------- gptel-anthropic.el | 12 ++++++------ 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/README.org b/README.org index 349cdaae27..bec5645414 100644 --- a/README.org +++ b/README.org @@ -615,13 +615,7 @@ To use Claude 3.7 Sonnet model in its "thinking" mode, you can define a second C (gptel-make-anthropic "Claude-thinking" ;Any name you want :key "your-API-key" :stream t - :models '(claude-3-7-sonnet-20250219) - :header (lambda () (when-let* ((key (gptel--get-api-key))) - `(("x-api-key" . ,key) - ("anthropic-version" . "2023-06-01") - ("anthropic-beta" . "pdfs-2024-09-25") - ("anthropic-beta" . "output-128k-2025-02-19") - ("anthropic-beta" . "prompt-caching-2024-07-31")))) + :models '(claude-sonnet-4-20250514 claude-3-7-sonnet-20250219) :request-params '(:thinking (:type "enabled" :budget_tokens 2048) :max_tokens 4096)) #+end_src diff --git a/gptel-anthropic.el b/gptel-anthropic.el index 181e79f309..ed47d32370 100644 --- a/gptel-anthropic.el +++ b/gptel-anthropic.el @@ -216,7 +216,7 @@ Mutate state INFO with response metadata." ;; gptel--system-message is guaranteed to be a string (plist-put prompts-plist :system `[(:type "text" :text ,gptel--system-message - :cache_control (:type "ephemeral"))]) + :cache_control (:type "ephemeral" :ttl "1h"))]) (plist-put prompts-plist :system gptel--system-message))) (when gptel-temperature (plist-put prompts-plist :temperature gptel-temperature)) @@ -362,7 +362,7 @@ TOOL-USE is a list of plists containing tool names, arguments and call results." (when (and (or (eq gptel-cache t) (memq 'message gptel-cache)) (gptel--model-capable-p 'cache)) (nconc (aref (plist-get (car (last full-prompt)) :content) 0) - '(:cache_control (:type "ephemeral")))) + '(:cache_control (:type "ephemeral" :ttl "1h")))) full-prompt)) (cl-defmethod gptel--parse-buffer ((backend gptel-anthropic) &optional max-entries) @@ -429,9 +429,10 @@ TOOL-USE is a list of plists containing tool names, arguments and call results." (if (stringp last-message) (plist-put (car (last prompts)) :content - `[(:type "text" :text ,last-message :cache_control (:type "ephemeral"))]) + `[(:type "text" :text ,last-message + :cache_control (:type "ephemeral" :ttl "1h"))]) (nconc (aref (plist-get (car (last prompts)) :content) 0) - '(:cache_control (:type "ephemeral")))))) + '(:cache_control (:type "ephemeral" :ttl "1h")))))) prompts)) (defun gptel--anthropic-parse-multipart (parts) @@ -621,8 +622,7 @@ URL `https://docs.anthropic.com/en/docs/about-claude/models#model-comparison-tab (lambda () (when-let* ((key (gptel--get-api-key))) `(("x-api-key" . ,key) ("anthropic-version" . "2023-06-01") - ("anthropic-beta" . "pdfs-2024-09-25") - ("anthropic-beta" . "prompt-caching-2024-07-31"))))) + ("anthropic-beta" . "extended-cache-ttl-2025-04-11"))))) (models gptel--anthropic-models) (host "api.anthropic.com") (protocol "https")