manojpec commented on a change in pull request #3977: URL: https://github.com/apache/hudi/pull/3977#discussion_r773675837
########## File path: hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFSUtils.java ########## @@ -29,46 +32,58 @@ public class InLineFSUtils { private static final String START_OFFSET_STR = "start_offset"; private static final String LENGTH_STR = "length"; + private static final String PATH_SEPARATOR = "/"; + private static final String SCHEME_SEPARATOR = ":"; private static final String EQUALS_STR = "="; + private static final String LOCAL_FILESYSTEM_SCHEME = "file"; /** - * Fetch inline file path from outer path. - * Eg - * Input: - * Path = s3a://file1, origScheme: file, startOffset = 20, length = 40 - * Output: "inlinefs:/file1/s3a/?start_offset=20&length=40" + * Get the InlineFS Path for a given schema and its Path. + * <p> + * Examples: + * Input Path: s3a://file1, origScheme: file, startOffset = 20, length = 40 + * Output: "inlinefs://file1/s3a/?start_offset=20&length=40" * - * @param outerPath - * @param origScheme - * @param inLineStartOffset - * @param inLineLength - * @return + * @param outerPath The outer file Path + * @param origScheme The file schema + * @param inLineStartOffset Start offset for the inline file + * @param inLineLength Length for the inline file + * @return InlineFS Path for the requested outer path and schema */ public static Path getInlineFilePath(Path outerPath, String origScheme, long inLineStartOffset, long inLineLength) { - String subPath = outerPath.toString().substring(outerPath.toString().indexOf(":") + 1); + final String subPath = new File(outerPath.toString().substring(outerPath.toString().indexOf(":") + 1)).getPath(); return new Path( - InLineFileSystem.SCHEME + "://" + subPath + "/" + origScheme - + "/" + "?" + START_OFFSET_STR + EQUALS_STR + inLineStartOffset + InLineFileSystem.SCHEME + SCHEME_SEPARATOR + PATH_SEPARATOR + subPath + PATH_SEPARATOR + origScheme + + PATH_SEPARATOR + "?" + START_OFFSET_STR + EQUALS_STR + inLineStartOffset + "&" + LENGTH_STR + EQUALS_STR + inLineLength ); } /** - * Inline file format - * "inlinefs://<path_to_outer_file>/<outer_file_scheme>/?start_offset=start_offset>&length=<length>" - * Outer File format - * "<outer_file_scheme>://<path_to_outer_file>" + * InlineFS Path format: + * "inlinefs://path/to/outer/file/outer_file_schema/?start_offset=start_offset>&length=<length>" * <p> - * Eg input : "inlinefs://file1/sa3/?start_offset=20&length=40". - * Output : "sa3://file1" + * Outer File Path format: + * "outer_file_schema://path/to/outer/file" + * <p> + * Example + * Input: "inlinefs://file1/s3a/?start_offset=20&length=40". + * Output: "s3a://file1" * - * @param inlinePath inline file system path - * @return + * @param inlineFSPath InLineFS Path to get the outer file Path + * @return Outer file Path from the InLineFS Path */ - public static Path getOuterfilePathFromInlinePath(Path inlinePath) { - String scheme = inlinePath.getParent().getName(); - Path basePath = inlinePath.getParent().getParent(); - return new Path(basePath.toString().replaceFirst(InLineFileSystem.SCHEME, scheme)); + public static Path getOuterFilePathFromInlinePath(Path inlineFSPath) { + final String scheme = inlineFSPath.getParent().getName(); + final Path basePath = inlineFSPath.getParent().getParent(); + ValidationUtils.checkArgument(basePath.toString().contains(SCHEME_SEPARATOR), + "Invalid InLineFSPath: " + inlineFSPath); + + final String pathExceptScheme = basePath.toString().substring(basePath.toString().indexOf(SCHEME_SEPARATOR) + 1); + final String fullPath = scheme + SCHEME_SEPARATOR + + (scheme.equals(LOCAL_FILESYSTEM_SCHEME) ? PATH_SEPARATOR : "") Review comment: @vinothchandar Right, hdfs/s3a/s3 and all other non-local fs needs `://` as scheme separator. Where as the local fs needs `:/` or `:///`. Unit test was added to verify the expected inlinefs forward conversion and backward conversions - https://github.com/apache/hudi/pull/3977/files#diff-5b10f493d7ba18b2f58b6c2fe4e544413d47ceb0d24a980ec07b9b1b14dca167R312 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org