Hisoka-X commented on code in PR #8603:
URL: https://github.com/apache/seatunnel/pull/8603#discussion_r1944064856


##########
seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java:
##########
@@ -403,156 +409,104 @@ private Optional<URL> 
findPluginJarPath(PluginIdentifier pluginIdentifier) {
         final String engineType = 
pluginIdentifier.getEngineType().toLowerCase();
         final String pluginType = 
pluginIdentifier.getPluginType().toLowerCase();
         final String pluginName = 
pluginIdentifier.getPluginName().toLowerCase();
-        if (!pluginMappingConfig.hasPath(engineType)) {
-            return Optional.empty();
-        }
-        Config engineConfig = pluginMappingConfig.getConfig(engineType);
-        if (!engineConfig.hasPath(pluginType)) {
-            return Optional.empty();
-        }
-        Config typeConfig = engineConfig.getConfig(pluginType);
-        Optional<Map.Entry<String, ConfigValue>> optional =
-                typeConfig.entrySet().stream()
-                        .filter(entry -> 
StringUtils.equalsIgnoreCase(entry.getKey(), pluginName))
-                        .findFirst();
-        if (!optional.isPresent()) {
-            return Optional.empty();
-        }
-        String pluginJarPrefix = 
optional.get().getValue().unwrapped().toString();
-        File[] targetPluginFiles =
-                pluginDir
-                        .toFile()
-                        .listFiles(
-                                new FileFilter() {
-                                    @Override
-                                    public boolean accept(File pathname) {
-                                        return 
pathname.getName().endsWith(".jar")
-                                                && 
StringUtils.startsWithIgnoreCase(
-                                                        pathname.getName(), 
pluginJarPrefix);
-                                    }
-                                });
-        if (ArrayUtils.isEmpty(targetPluginFiles)) {
-            return Optional.empty();
-        }
-        try {
-            URL pluginJarPath;
-            if (targetPluginFiles.length == 1) {
-                pluginJarPath = targetPluginFiles[0].toURI().toURL();
-            } else {
-                pluginJarPath =
-                        findMostSimlarPluginJarFile(targetPluginFiles, 
pluginJarPrefix)
-                                .toURI()
-                                .toURL();
-            }
-            log.info("Discovery plugin jar for: {} at: {}", pluginIdentifier, 
pluginJarPath);
-            return Optional.of(pluginJarPath);
-        } catch (MalformedURLException e) {
-            log.warn(
-                    "Cannot get plugin URL: {} for pluginIdentifier: {}" + 
targetPluginFiles[0],
-                    pluginIdentifier,
-                    e);
-            return Optional.empty();
-        }
-    }
 
-    private static File findMostSimlarPluginJarFile(
-            File[] targetPluginFiles, String pluginJarPrefix) {
-        String splitRegex = "\\-|\\_|\\.";
-        double maxSimlarity = -Integer.MAX_VALUE;
-        int mostSimlarPluginJarFileIndex = -1;
-        for (int i = 0; i < targetPluginFiles.length; i++) {
-            File file = targetPluginFiles[i];
-            String fileName = file.getName();
-            double similarity =
-                    CosineSimilarityUtil.cosineSimilarity(pluginJarPrefix, 
fileName, splitRegex);
-            if (similarity > maxSimlarity) {
-                maxSimlarity = similarity;
-                mostSimlarPluginJarFileIndex = i;
-            }
-        }
-        return targetPluginFiles[mostSimlarPluginJarFileIndex];
+        return Optional.ofNullable(pluginMappingConfig.getConfig(engineType))
+                .flatMap(engineConfig -> 
Optional.ofNullable(engineConfig.getConfig(pluginType)))
+                .flatMap(
+                        typeConfig ->
+                                typeConfig.entrySet().stream()
+                                        .filter(
+                                                entry ->
+                                                        
StringUtils.equalsIgnoreCase(
+                                                                
entry.getKey(), pluginName))
+                                        .findFirst())
+                .map(entry -> entry.getValue().unwrapped().toString())
+                .map(
+                        pluginJarPrefix ->
+                                pluginDir
+                                        .toFile()
+                                        .listFiles(
+                                                pathname ->
+                                                        
pathname.getName().endsWith(".jar")
+                                                                && 
StringUtils.startsWithIgnoreCase(
+                                                                        
pathname.getName(),
+                                                                        
pluginJarPrefix)))
+                .filter(files -> !ArrayUtils.isEmpty(files))
+                .flatMap(
+                        files -> {
+                            try {
+                                if (files.length == 1) {
+                                    return 
Optional.of(files[0].toURI().toURL());
+                                } else {
+                                    PluginType type = 
PluginType.valueOf(pluginType.toUpperCase());
+                                    String targetPluginName =
+                                            
getTargetPluginName(pluginIdentifier, type);
+                                    return selectPluginJar(files, 
targetPluginName);
+                                }
+                            } catch (MalformedURLException e) {
+                                log.warn(
+                                        "Cannot get plugin URL for 
pluginIdentifier: {}",
+                                        pluginIdentifier,
+                                        e);
+                                return Optional.empty();
+                            }
+                        })
+                .map(
+                        pluginJarPath -> {
+                            log.info(
+                                    "Discovery plugin jar for: {} at: {}",
+                                    pluginIdentifier,
+                                    pluginJarPath);
+                            return pluginJarPath;
+                        });
     }
 
-    static class CosineSimilarityUtil {
-        public static double cosineSimilarity(String textA, String textB, 
String splitRegrex) {
-            Set<String> words1 =
-                    new 
HashSet<>(Arrays.asList(textA.toLowerCase().split(splitRegrex)));
-            Set<String> words2 =
-                    new 
HashSet<>(Arrays.asList(textB.toLowerCase().split(splitRegrex)));
-            int[] termFrequency1 = calculateTermFrequencyVector(textA, words1, 
splitRegrex);
-            int[] termFrequency2 = calculateTermFrequencyVector(textB, words2, 
splitRegrex);
-            return calculateCosineSimilarity(termFrequency1, termFrequency2);
+    private String getTargetPluginName(PluginIdentifier pluginIdentifier, 
PluginType type) {
+        switch (type) {
+            case SINK:

Review Comment:
   ditto



##########
seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java:
##########
@@ -114,6 +118,8 @@ public AbstractPluginDiscovery(
         this.pluginDir = pluginDir;
         this.pluginMappingConfig = pluginMappingConfig;
         this.addURLToClassLoaderConsumer = addURLToClassLoaderConsumer;
+        this.sourcePluginInstance = getAllSupportedPlugins(PluginType.SOURCE);
+        this.sinkPluginInstance = getAllSupportedPlugins(PluginType.SINK);

Review Comment:
   where is transform?



##########
seatunnel-plugin-discovery/src/main/java/org/apache/seatunnel/plugin/discovery/AbstractPluginDiscovery.java:
##########
@@ -403,156 +409,104 @@ private Optional<URL> 
findPluginJarPath(PluginIdentifier pluginIdentifier) {
         final String engineType = 
pluginIdentifier.getEngineType().toLowerCase();
         final String pluginType = 
pluginIdentifier.getPluginType().toLowerCase();
         final String pluginName = 
pluginIdentifier.getPluginName().toLowerCase();
-        if (!pluginMappingConfig.hasPath(engineType)) {
-            return Optional.empty();
-        }
-        Config engineConfig = pluginMappingConfig.getConfig(engineType);
-        if (!engineConfig.hasPath(pluginType)) {
-            return Optional.empty();
-        }
-        Config typeConfig = engineConfig.getConfig(pluginType);
-        Optional<Map.Entry<String, ConfigValue>> optional =
-                typeConfig.entrySet().stream()
-                        .filter(entry -> 
StringUtils.equalsIgnoreCase(entry.getKey(), pluginName))
-                        .findFirst();
-        if (!optional.isPresent()) {
-            return Optional.empty();
-        }
-        String pluginJarPrefix = 
optional.get().getValue().unwrapped().toString();
-        File[] targetPluginFiles =
-                pluginDir
-                        .toFile()
-                        .listFiles(
-                                new FileFilter() {
-                                    @Override
-                                    public boolean accept(File pathname) {
-                                        return 
pathname.getName().endsWith(".jar")
-                                                && 
StringUtils.startsWithIgnoreCase(
-                                                        pathname.getName(), 
pluginJarPrefix);
-                                    }
-                                });
-        if (ArrayUtils.isEmpty(targetPluginFiles)) {
-            return Optional.empty();
-        }
-        try {
-            URL pluginJarPath;
-            if (targetPluginFiles.length == 1) {
-                pluginJarPath = targetPluginFiles[0].toURI().toURL();
-            } else {
-                pluginJarPath =
-                        findMostSimlarPluginJarFile(targetPluginFiles, 
pluginJarPrefix)
-                                .toURI()
-                                .toURL();
-            }
-            log.info("Discovery plugin jar for: {} at: {}", pluginIdentifier, 
pluginJarPath);
-            return Optional.of(pluginJarPath);
-        } catch (MalformedURLException e) {
-            log.warn(
-                    "Cannot get plugin URL: {} for pluginIdentifier: {}" + 
targetPluginFiles[0],
-                    pluginIdentifier,
-                    e);
-            return Optional.empty();
-        }
-    }
 
-    private static File findMostSimlarPluginJarFile(
-            File[] targetPluginFiles, String pluginJarPrefix) {
-        String splitRegex = "\\-|\\_|\\.";
-        double maxSimlarity = -Integer.MAX_VALUE;
-        int mostSimlarPluginJarFileIndex = -1;
-        for (int i = 0; i < targetPluginFiles.length; i++) {
-            File file = targetPluginFiles[i];
-            String fileName = file.getName();
-            double similarity =
-                    CosineSimilarityUtil.cosineSimilarity(pluginJarPrefix, 
fileName, splitRegex);
-            if (similarity > maxSimlarity) {
-                maxSimlarity = similarity;
-                mostSimlarPluginJarFileIndex = i;
-            }
-        }
-        return targetPluginFiles[mostSimlarPluginJarFileIndex];
+        return Optional.ofNullable(pluginMappingConfig.getConfig(engineType))
+                .flatMap(engineConfig -> 
Optional.ofNullable(engineConfig.getConfig(pluginType)))
+                .flatMap(
+                        typeConfig ->
+                                typeConfig.entrySet().stream()
+                                        .filter(
+                                                entry ->
+                                                        
StringUtils.equalsIgnoreCase(
+                                                                
entry.getKey(), pluginName))
+                                        .findFirst())
+                .map(entry -> entry.getValue().unwrapped().toString())
+                .map(
+                        pluginJarPrefix ->
+                                pluginDir
+                                        .toFile()
+                                        .listFiles(
+                                                pathname ->
+                                                        
pathname.getName().endsWith(".jar")
+                                                                && 
StringUtils.startsWithIgnoreCase(
+                                                                        
pathname.getName(),
+                                                                        
pluginJarPrefix)))
+                .filter(files -> !ArrayUtils.isEmpty(files))
+                .flatMap(
+                        files -> {
+                            try {
+                                if (files.length == 1) {
+                                    return 
Optional.of(files[0].toURI().toURL());
+                                } else {
+                                    PluginType type = 
PluginType.valueOf(pluginType.toUpperCase());
+                                    String targetPluginName =
+                                            
getTargetPluginName(pluginIdentifier, type);
+                                    return selectPluginJar(files, 
targetPluginName);
+                                }
+                            } catch (MalformedURLException e) {
+                                log.warn(
+                                        "Cannot get plugin URL for 
pluginIdentifier: {}",
+                                        pluginIdentifier,
+                                        e);
+                                return Optional.empty();
+                            }
+                        })
+                .map(
+                        pluginJarPath -> {
+                            log.info(
+                                    "Discovery plugin jar for: {} at: {}",
+                                    pluginIdentifier,
+                                    pluginJarPath);
+                            return pluginJarPath;
+                        });
     }
 
-    static class CosineSimilarityUtil {
-        public static double cosineSimilarity(String textA, String textB, 
String splitRegrex) {
-            Set<String> words1 =
-                    new 
HashSet<>(Arrays.asList(textA.toLowerCase().split(splitRegrex)));
-            Set<String> words2 =
-                    new 
HashSet<>(Arrays.asList(textB.toLowerCase().split(splitRegrex)));
-            int[] termFrequency1 = calculateTermFrequencyVector(textA, words1, 
splitRegrex);
-            int[] termFrequency2 = calculateTermFrequencyVector(textB, words2, 
splitRegrex);
-            return calculateCosineSimilarity(termFrequency1, termFrequency2);
+    private String getTargetPluginName(PluginIdentifier pluginIdentifier, 
PluginType type) {
+        switch (type) {
+            case SINK:
+                return sinkPluginInstance.get(pluginIdentifier);
+            case SOURCE:
+                return sourcePluginInstance.get(pluginIdentifier);
+            default:
+                throw new SeaTunnelException("Unsupported plugin type: " + 
type);
         }
+    }
 
-        private static int[] calculateTermFrequencyVector(
-                String text, Set<String> words, String splitRegrex) {
-            int[] termFrequencyVector = new int[words.size()];
-            String[] textArray = text.toLowerCase().split(splitRegrex);
-            List<String> orderedWords = new ArrayList<String>();
-            words.clear();
-            for (String word : textArray) {
-                if (!words.contains(word)) {
-                    orderedWords.add(word);
-                    words.add(word);
+    private Optional<URL> selectPluginJar(File[] targetPluginFiles, String 
targetPluginName) {
+        for (File file : targetPluginFiles) {
+            if (readPomPropertiesJudgeJar(file.getPath(), targetPluginName)) {
+                try {
+                    return Optional.of(file.toURI().toURL());
+                } catch (MalformedURLException e) {
+                    log.error("Invalid URL for file: {}", 
file.getAbsolutePath(), e);
                 }
             }
-            for (String word : textArray) {
-                if (words.contains(word)) {
-                    int index = 0;
-                    for (String w : orderedWords) {
-                        if (w.equals(word)) {
-                            termFrequencyVector[index]++;
-                            break;
-                        }
-                        index++;
-                    }
-                }
-            }
-            return termFrequencyVector;
         }
+        return Optional.empty();
+    }
+
+    @SneakyThrows
+    private static boolean readPomPropertiesJudgeJar(String jarPath, String 
targetPluginPath) {
+        try (JarFile jarFile = new JarFile(jarPath)) {
+            Enumeration<JarEntry> entries = jarFile.entries();
+            while (entries.hasMoreElements()) {
+                JarEntry entry = entries.nextElement();
+                String entryName = entry.getName();
 
-        private static double calculateCosineSimilarity(int[] vectorA, int[] 
vectorB) {
-            double dotProduct = 0.0;
-            double magnitudeA = 0.0;
-            double magnitudeB = 0.0;
-            int vectorALength = vectorA.length;
-            int vectorBLength = vectorB.length;
-            if (vectorALength < vectorBLength) {
-                int[] vectorTemp = new int[vectorBLength];
-                for (int i = 0; i < vectorB.length; i++) {
-                    if (i <= vectorALength - 1) {
-                        vectorTemp[i] = vectorA[i];
-                    } else {
-                        vectorTemp[i] = 0;
+                if (entryName.endsWith("pom.properties") && 
entryName.contains("META-INF/maven/")) {
+

Review Comment:
   can we make sure all jar contains these file? Why not continue use jar file 
name to check it? cc @hailin0 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@seatunnel.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to