This is an automated email from the ASF dual-hosted git repository.

akaashrp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 8039963c23 [Web][COS] Persist URL→hash mapping across page loads 
(#19569)
8039963c23 is described below

commit 8039963c23b79885aacfcf7e37b02bb0dd0180f9
Author: Thomas Steiner <[email protected]>
AuthorDate: Mon Jun 1 21:33:20 2026 +0200

    [Web][COS] Persist URL→hash mapping across page loads (#19569)
    
    The CrossOriginStorage class was storing the URL→hash map only in the
    module-level GLOBAL_HASH_CACHE. After a page reload that cache is empty,
    and getFileHash() can only recover hashes for HuggingFace LFS files
    (URLs containing /resolve/). This left several resource categories
    uncacheable across sessions:
    
    <img width="1197" height="279" alt="Screenshot 2026-05-15 at 17 43 15"
    
src="https://github.com/user-attachments/assets/c9943910-9002-4b06-afdd-6288b7e22ba6";
    />
    
    - JSON files not stored in LFS (mlc-chat-config.json, tokenizer.json,
    tensor-cache.json) — getFileHash returns null for their /resolve/ URLs
    because the raw pointer is the actual file content, not an LFS pointer.
    - .wasm files from GitHub raw URLs — no /resolve/ pattern at all.
    - Any file whose hash was computed from blob content via getBlobHash.
    
    Additionally, even for genuine LFS model shards, each page load was
    re-fetching every shard's LFS pointer file over the network just to
    re-derive the SHA-256 hash.
    
    Fix: persist the URL→hash mapping to a dedicated Cache API store
    (tvmjs-cos-hash-meta). Two write sites:
    
    1. put() — after a file is stored in COS, persist its blob-derived hash.
    This covers all non-LFS files and non-HuggingFace URLs.
    
    2. resolveHashDescriptor() — after getFileHash() resolves a hash from
    the LFS pointer, persist it immediately. This eliminates repeated
    pointer-file network requests for model shards on subsequent visits.
    
    Both write sites use a best-effort try/catch so storage quota errors are
    silently ignored. loadPersistedHashEntry() similarly swallows errors.
    The typeof caches === "undefined" guard keeps the code safe in Node.js
    test environments.
---
 web/src/artifact_cache.ts | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/web/src/artifact_cache.ts b/web/src/artifact_cache.ts
index d36573cccc..0a1bcad589 100644
--- a/web/src/artifact_cache.ts
+++ b/web/src/artifact_cache.ts
@@ -133,6 +133,7 @@ declare global {
 
 const HASH_ALGORITHM = "SHA-256";
 const DEFAULT_FETCH_OPTIONS: RequestInit = { method: "GET" };
+const COS_HASH_META_CACHE = "tvmjs-cos-hash-meta";
 let crossOriginFallbackWarningLogged = false;
 
 const GLOBAL_HASH_CACHE = new Map<
@@ -194,6 +195,7 @@ class CrossOriginStorage {
     await writableStream.write(blob);
     await writableStream.close();
     this.hashCache.set(url, hash);
+    await this.persistHashEntry(url, hash);
   }
 
   async delete(_request: RequestLike): Promise<void> {
@@ -224,6 +226,39 @@ class CrossOriginStorage {
     throw new Error("CrossOriginStorage: Unsupported request type.");
   }
 
+  private async persistHashEntry(
+    url: string,
+    hash: CrossOriginHashDescriptor,
+  ): Promise<void> {
+    try {
+      if (typeof caches === "undefined") {
+        return;
+      }
+      const store = await caches.open(COS_HASH_META_CACHE);
+      await store.put(url, new Response(JSON.stringify(hash)));
+    } catch {
+      // best-effort: ignore storage errors
+    }
+  }
+
+  private async loadPersistedHashEntry(
+    url: string,
+  ): Promise<CrossOriginHashDescriptor | null> {
+    try {
+      if (typeof caches === "undefined") {
+        return null;
+      }
+      const store = await caches.open(COS_HASH_META_CACHE);
+      const response = await store.match(url);
+      if (!response) {
+        return null;
+      }
+      return JSON.parse(await response.text()) as CrossOriginHashDescriptor;
+    } catch {
+      return null;
+    }
+  }
+
   private async resolveHashDescriptor(
     url: string,
   ): Promise<CrossOriginHashDescriptor | null> {
@@ -231,6 +266,15 @@ class CrossOriginStorage {
     if (cached) {
       return cached;
     }
+    // Check persistent store before falling back to network-based hash 
extraction.
+    // This covers non-LFS files (JSON configs, tokenizers) and 
non-HuggingFace URLs
+    // (e.g. GitHub raw .wasm files) whose hashes were computed from blob 
content on a
+    // previous visit and persisted to the Cache API.
+    const persisted = await this.loadPersistedHashEntry(url);
+    if (persisted) {
+      this.hashCache.set(url, persisted);
+      return persisted;
+    }
     const hashValue = await this.getFileHash(url);
     if (!hashValue) {
       return null;
@@ -240,6 +284,9 @@ class CrossOriginStorage {
       value: hashValue,
     };
     this.hashCache.set(url, descriptor);
+    // Persist pointer-derived hashes so subsequent visits skip the LFS pointer
+    // network request (especially important for models with many shards).
+    await this.persistHashEntry(url, descriptor);
     return descriptor;
   }
 

Reply via email to