Hisoka-X commented on code in PR #8603: URL: https://github.com/apache/seatunnel/pull/8603#discussion_r1944064856
########## seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java: ########## @@ -403,156 +409,104 @@ private Optional<URL> findPluginJarPath(PluginIdentifier pluginIdentifier) { final String engineType = pluginIdentifier.getEngineType().toLowerCase(); final String pluginType = pluginIdentifier.getPluginType().toLowerCase(); final String pluginName = pluginIdentifier.getPluginName().toLowerCase(); - if (!pluginMappingConfig.hasPath(engineType)) { - return Optional.empty(); - } - Config engineConfig = pluginMappingConfig.getConfig(engineType); - if (!engineConfig.hasPath(pluginType)) { - return Optional.empty(); - } - Config typeConfig = engineConfig.getConfig(pluginType); - Optional<Map.Entry<String, ConfigValue>> optional = - typeConfig.entrySet().stream() - .filter(entry -> StringUtils.equalsIgnoreCase(entry.getKey(), pluginName)) - .findFirst(); - if (!optional.isPresent()) { - return Optional.empty(); - } - String pluginJarPrefix = optional.get().getValue().unwrapped().toString(); - File[] targetPluginFiles = - pluginDir - .toFile() - .listFiles( - new FileFilter() { - @Override - public boolean accept(File pathname) { - return pathname.getName().endsWith(".jar") - && StringUtils.startsWithIgnoreCase( - pathname.getName(), pluginJarPrefix); - } - }); - if (ArrayUtils.isEmpty(targetPluginFiles)) { - return Optional.empty(); - } - try { - URL pluginJarPath; - if (targetPluginFiles.length == 1) { - pluginJarPath = targetPluginFiles[0].toURI().toURL(); - } else { - pluginJarPath = - findMostSimlarPluginJarFile(targetPluginFiles, pluginJarPrefix) - .toURI() - .toURL(); - } - log.info("Discovery plugin jar for: {} at: {}", pluginIdentifier, pluginJarPath); - return Optional.of(pluginJarPath); - } catch (MalformedURLException e) { - log.warn( - "Cannot get plugin URL: {} for pluginIdentifier: {}" + targetPluginFiles[0], - pluginIdentifier, - e); - return Optional.empty(); - } - } - private static File findMostSimlarPluginJarFile( - File[] targetPluginFiles, String pluginJarPrefix) { - String splitRegex = "\\-|\\_|\\."; - double maxSimlarity = -Integer.MAX_VALUE; - int mostSimlarPluginJarFileIndex = -1; - for (int i = 0; i < targetPluginFiles.length; i++) { - File file = targetPluginFiles[i]; - String fileName = file.getName(); - double similarity = - CosineSimilarityUtil.cosineSimilarity(pluginJarPrefix, fileName, splitRegex); - if (similarity > maxSimlarity) { - maxSimlarity = similarity; - mostSimlarPluginJarFileIndex = i; - } - } - return targetPluginFiles[mostSimlarPluginJarFileIndex]; + return Optional.ofNullable(pluginMappingConfig.getConfig(engineType)) + .flatMap(engineConfig -> Optional.ofNullable(engineConfig.getConfig(pluginType))) + .flatMap( + typeConfig -> + typeConfig.entrySet().stream() + .filter( + entry -> + StringUtils.equalsIgnoreCase( + entry.getKey(), pluginName)) + .findFirst()) + .map(entry -> entry.getValue().unwrapped().toString()) + .map( + pluginJarPrefix -> + pluginDir + .toFile() + .listFiles( + pathname -> + pathname.getName().endsWith(".jar") + && StringUtils.startsWithIgnoreCase( + pathname.getName(), + pluginJarPrefix))) + .filter(files -> !ArrayUtils.isEmpty(files)) + .flatMap( + files -> { + try { + if (files.length == 1) { + return Optional.of(files[0].toURI().toURL()); + } else { + PluginType type = PluginType.valueOf(pluginType.toUpperCase()); + String targetPluginName = + getTargetPluginName(pluginIdentifier, type); + return selectPluginJar(files, targetPluginName); + } + } catch (MalformedURLException e) { + log.warn( + "Cannot get plugin URL for pluginIdentifier: {}", + pluginIdentifier, + e); + return Optional.empty(); + } + }) + .map( + pluginJarPath -> { + log.info( + "Discovery plugin jar for: {} at: {}", + pluginIdentifier, + pluginJarPath); + return pluginJarPath; + }); } - static class CosineSimilarityUtil { - public static double cosineSimilarity(String textA, String textB, String splitRegrex) { - Set<String> words1 = - new HashSet<>(Arrays.asList(textA.toLowerCase().split(splitRegrex))); - Set<String> words2 = - new HashSet<>(Arrays.asList(textB.toLowerCase().split(splitRegrex))); - int[] termFrequency1 = calculateTermFrequencyVector(textA, words1, splitRegrex); - int[] termFrequency2 = calculateTermFrequencyVector(textB, words2, splitRegrex); - return calculateCosineSimilarity(termFrequency1, termFrequency2); + private String getTargetPluginName(PluginIdentifier pluginIdentifier, PluginType type) { + switch (type) { + case SINK: Review Comment: ditto ########## seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java: ########## @@ -114,6 +118,8 @@ public AbstractPluginDiscovery( this.pluginDir = pluginDir; this.pluginMappingConfig = pluginMappingConfig; this.addURLToClassLoaderConsumer = addURLToClassLoaderConsumer; + this.sourcePluginInstance = getAllSupportedPlugins(PluginType.SOURCE); + this.sinkPluginInstance = getAllSupportedPlugins(PluginType.SINK); Review Comment: where is transform? ########## seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java: ########## @@ -403,156 +409,104 @@ private Optional<URL> findPluginJarPath(PluginIdentifier pluginIdentifier) { final String engineType = pluginIdentifier.getEngineType().toLowerCase(); final String pluginType = pluginIdentifier.getPluginType().toLowerCase(); final String pluginName = pluginIdentifier.getPluginName().toLowerCase(); - if (!pluginMappingConfig.hasPath(engineType)) { - return Optional.empty(); - } - Config engineConfig = pluginMappingConfig.getConfig(engineType); - if (!engineConfig.hasPath(pluginType)) { - return Optional.empty(); - } - Config typeConfig = engineConfig.getConfig(pluginType); - Optional<Map.Entry<String, ConfigValue>> optional = - typeConfig.entrySet().stream() - .filter(entry -> StringUtils.equalsIgnoreCase(entry.getKey(), pluginName)) - .findFirst(); - if (!optional.isPresent()) { - return Optional.empty(); - } - String pluginJarPrefix = optional.get().getValue().unwrapped().toString(); - File[] targetPluginFiles = - pluginDir - .toFile() - .listFiles( - new FileFilter() { - @Override - public boolean accept(File pathname) { - return pathname.getName().endsWith(".jar") - && StringUtils.startsWithIgnoreCase( - pathname.getName(), pluginJarPrefix); - } - }); - if (ArrayUtils.isEmpty(targetPluginFiles)) { - return Optional.empty(); - } - try { - URL pluginJarPath; - if (targetPluginFiles.length == 1) { - pluginJarPath = targetPluginFiles[0].toURI().toURL(); - } else { - pluginJarPath = - findMostSimlarPluginJarFile(targetPluginFiles, pluginJarPrefix) - .toURI() - .toURL(); - } - log.info("Discovery plugin jar for: {} at: {}", pluginIdentifier, pluginJarPath); - return Optional.of(pluginJarPath); - } catch (MalformedURLException e) { - log.warn( - "Cannot get plugin URL: {} for pluginIdentifier: {}" + targetPluginFiles[0], - pluginIdentifier, - e); - return Optional.empty(); - } - } - private static File findMostSimlarPluginJarFile( - File[] targetPluginFiles, String pluginJarPrefix) { - String splitRegex = "\\-|\\_|\\."; - double maxSimlarity = -Integer.MAX_VALUE; - int mostSimlarPluginJarFileIndex = -1; - for (int i = 0; i < targetPluginFiles.length; i++) { - File file = targetPluginFiles[i]; - String fileName = file.getName(); - double similarity = - CosineSimilarityUtil.cosineSimilarity(pluginJarPrefix, fileName, splitRegex); - if (similarity > maxSimlarity) { - maxSimlarity = similarity; - mostSimlarPluginJarFileIndex = i; - } - } - return targetPluginFiles[mostSimlarPluginJarFileIndex]; + return Optional.ofNullable(pluginMappingConfig.getConfig(engineType)) + .flatMap(engineConfig -> Optional.ofNullable(engineConfig.getConfig(pluginType))) + .flatMap( + typeConfig -> + typeConfig.entrySet().stream() + .filter( + entry -> + StringUtils.equalsIgnoreCase( + entry.getKey(), pluginName)) + .findFirst()) + .map(entry -> entry.getValue().unwrapped().toString()) + .map( + pluginJarPrefix -> + pluginDir + .toFile() + .listFiles( + pathname -> + pathname.getName().endsWith(".jar") + && StringUtils.startsWithIgnoreCase( + pathname.getName(), + pluginJarPrefix))) + .filter(files -> !ArrayUtils.isEmpty(files)) + .flatMap( + files -> { + try { + if (files.length == 1) { + return Optional.of(files[0].toURI().toURL()); + } else { + PluginType type = PluginType.valueOf(pluginType.toUpperCase()); + String targetPluginName = + getTargetPluginName(pluginIdentifier, type); + return selectPluginJar(files, targetPluginName); + } + } catch (MalformedURLException e) { + log.warn( + "Cannot get plugin URL for pluginIdentifier: {}", + pluginIdentifier, + e); + return Optional.empty(); + } + }) + .map( + pluginJarPath -> { + log.info( + "Discovery plugin jar for: {} at: {}", + pluginIdentifier, + pluginJarPath); + return pluginJarPath; + }); } - static class CosineSimilarityUtil { - public static double cosineSimilarity(String textA, String textB, String splitRegrex) { - Set<String> words1 = - new HashSet<>(Arrays.asList(textA.toLowerCase().split(splitRegrex))); - Set<String> words2 = - new HashSet<>(Arrays.asList(textB.toLowerCase().split(splitRegrex))); - int[] termFrequency1 = calculateTermFrequencyVector(textA, words1, splitRegrex); - int[] termFrequency2 = calculateTermFrequencyVector(textB, words2, splitRegrex); - return calculateCosineSimilarity(termFrequency1, termFrequency2); + private String getTargetPluginName(PluginIdentifier pluginIdentifier, PluginType type) { + switch (type) { + case SINK: + return sinkPluginInstance.get(pluginIdentifier); + case SOURCE: + return sourcePluginInstance.get(pluginIdentifier); + default: + throw new SeaTunnelException("Unsupported plugin type: " + type); } + } - private static int[] calculateTermFrequencyVector( - String text, Set<String> words, String splitRegrex) { - int[] termFrequencyVector = new int[words.size()]; - String[] textArray = text.toLowerCase().split(splitRegrex); - List<String> orderedWords = new ArrayList<String>(); - words.clear(); - for (String word : textArray) { - if (!words.contains(word)) { - orderedWords.add(word); - words.add(word); + private Optional<URL> selectPluginJar(File[] targetPluginFiles, String targetPluginName) { + for (File file : targetPluginFiles) { + if (readPomPropertiesJudgeJar(file.getPath(), targetPluginName)) { + try { + return Optional.of(file.toURI().toURL()); + } catch (MalformedURLException e) { + log.error("Invalid URL for file: {}", file.getAbsolutePath(), e); } } - for (String word : textArray) { - if (words.contains(word)) { - int index = 0; - for (String w : orderedWords) { - if (w.equals(word)) { - termFrequencyVector[index]++; - break; - } - index++; - } - } - } - return termFrequencyVector; } + return Optional.empty(); + } + + @SneakyThrows + private static boolean readPomPropertiesJudgeJar(String jarPath, String targetPluginPath) { + try (JarFile jarFile = new JarFile(jarPath)) { + Enumeration<JarEntry> entries = jarFile.entries(); + while (entries.hasMoreElements()) { + JarEntry entry = entries.nextElement(); + String entryName = entry.getName(); - private static double calculateCosineSimilarity(int[] vectorA, int[] vectorB) { - double dotProduct = 0.0; - double magnitudeA = 0.0; - double magnitudeB = 0.0; - int vectorALength = vectorA.length; - int vectorBLength = vectorB.length; - if (vectorALength < vectorBLength) { - int[] vectorTemp = new int[vectorBLength]; - for (int i = 0; i < vectorB.length; i++) { - if (i <= vectorALength - 1) { - vectorTemp[i] = vectorA[i]; - } else { - vectorTemp[i] = 0; + if (entryName.endsWith("pom.properties") && entryName.contains("META-INF/maven/")) { + Review Comment: can we make sure all jar contains these file? Why not continue use jar file name to check it? cc @hailin0 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@seatunnel.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org