xloya commented on code in PR #4320:
URL: https://github.com/apache/gravitino/pull/4320#discussion_r1770705835


##########
clients/filesystem-hadoop3/src/main/java/org/apache/gravitino/filesystem/hadoop/GravitinoVirtualFileSystem.java:
##########
@@ -381,51 +339,66 @@ NameIdentifier extractIdentifier(URI virtualUri) {
     return NameIdentifier.of(metalakeName, matcher.group(1), matcher.group(2), 
matcher.group(3));
   }
 
-  private FilesetContext getFilesetContext(Path virtualPath) {
+  private FilesetContextPair getFilesetContext(Path virtualPath, 
FilesetDataOperation operation) {
     NameIdentifier identifier = extractIdentifier(virtualPath.toUri());
-    Pair<Fileset, FileSystem> pair = filesetCache.get(identifier, 
this::constructNewFilesetPair);
-    Preconditions.checkState(
-        pair != null,
-        "Cannot get the pair of fileset instance and actual file system for 
%s",
-        identifier);
-    Path actualPath = getActualPathByIdentifier(identifier, pair, virtualPath);
-    return FilesetContext.builder()
-        .withIdentifier(identifier)
-        .withFileset(pair.getLeft())
-        .withFileSystem(pair.getRight())
-        .withActualPath(actualPath)
-        .build();
-  }
+    String virtualPathString = virtualPath.toString();
+    String subPath = getSubPathFromVirtualPath(identifier, virtualPathString);
 
-  private Pair<Fileset, FileSystem> constructNewFilesetPair(NameIdentifier 
identifier) {
-    // Always create a new file system instance for the fileset.
-    // Therefore, users cannot bypass gvfs and use `FileSystem.get()` to 
directly obtain the
-    // FileSystem
-    try {
-      Fileset fileset = loadFileset(identifier);
-      URI storageUri = URI.create(fileset.storageLocation());
-      FileSystem actualFileSystem = FileSystem.newInstance(storageUri, 
getConf());
-      Preconditions.checkState(actualFileSystem != null, "Cannot get the 
actual file system");
-      return Pair.of(fileset, actualFileSystem);
-    } catch (IOException e) {
-      throw new RuntimeException(
-          String.format(
-              "Cannot create file system for fileset: %s, exception: %s",
-              identifier, e.getMessage()),
-          e);
-    } catch (RuntimeException e) {
-      throw new RuntimeException(
-          String.format(
-              "Cannot load fileset: %s from the server. exception: %s",
-              identifier, e.getMessage()));
-    }
+    NameIdentifier catalogIdent = NameIdentifier.of(metalakeName, 
identifier.namespace().level(1));
+    FilesetCatalog filesetCatalog =
+        catalogCache.get(
+            catalogIdent, ident -> 
client.loadCatalog(catalogIdent.name()).asFilesetCatalog());
+    Preconditions.checkArgument(
+        filesetCatalog != null, String.format("Loaded fileset catalog: %s is 
null.", catalogIdent));
+
+    // set the thread local audit info
+    Map<String, String> contextMap = Maps.newHashMap();
+    contextMap.put(
+        FilesetAuditConstants.HTTP_HEADER_INTERNAL_CLIENT_TYPE,
+        InternalClientType.HADOOP_GVFS.name());
+    contextMap.put(FilesetAuditConstants.HTTP_HEADER_FILESET_DATA_OPERATION, 
operation.name());
+    CallerContext callerContext = 
CallerContext.builder().withContext(contextMap).build();
+    CallerContext.CallerContextHolder.set(callerContext);
+
+    String actualFileLocation =
+        filesetCatalog.getFileLocation(
+            NameIdentifier.of(identifier.namespace().level(2), 
identifier.name()), subPath);
+
+    URI uri = new Path(actualFileLocation).toUri();
+    // we cache the fs for the same scheme, so we can reuse it
+    FileSystem fs =
+        internalFileSystemCache.get(
+            uri.getScheme(),

Review Comment:
   Yes, logically, there will be not null here, because the URI here is 
composed of the storage location of the fileset (when creating a fileset, the 
storage location will be formalized, which will make the storage location 
always having the scheme, see 
https://github.com/apache/gravitino/blob/main/catalogs/catalog-hadoop/src/main/java/org/apache/gravitino/catalog/hadoop/HadoopCatalogOperations.java#L233)
 and the sub path on the server side. But I think we can add a null value check 
here to remind users that they are using the wrong actual path.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@gravitino.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to