Pankraz76 commented on code in PR #2407: URL: https://github.com/apache/maven/pull/2407#discussion_r2120301094
########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -220,19 +226,131 @@ public static String detectIndentation(Element element) { return indent; } else { // This should be the indentation of the elements end tag. - return indent + " "; + String baseIndent = detectBaseIndentationUnit(element); + return indent + baseIndent; Review Comment: ```suggestion return indent + detectBaseIndentationUnit(element); ``` - https://github.com/pmd/pmd/issues/5770 ########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -80,8 +83,10 @@ public static void addElement(JDomCfg jDomCfg, Element element, Element root, in root.addContent(index, new Text("\n" + detectIndentation(root))); } - resetIndentations(root, detectIndentation(root)); - resetIndentations(element, detectIndentation(root) + " "); + String rootIndent = detectIndentation(root); + String baseIndent = detectBaseIndentationUnit(root); + resetIndentations(root, rootIndent); + resetIndentations(element, rootIndent + baseIndent); Review Comment: root comes from the context if inlined. maven propagates functional style avoiding overhead when first interacted with the codebase so might keeping this idea strong. what can be removed is considered obsolete. code should speak for itself. root context + detectIndentation(root) will give the rootIndent as context. This seems coupled. ```suggestion resetIndentations(root, detectIndentation(root)); resetIndentations(element, detectIndentation(root) + detectBaseIndentationUnit(root)); ``` ########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -220,19 +226,131 @@ public static String detectIndentation(Element element) { return indent; } else { // This should be the indentation of the elements end tag. - return indent + " "; + String baseIndent = detectBaseIndentationUnit(element); + return indent + baseIndent; } } } Parent parent = element.getParent(); if (parent instanceof Element) { - return detectIndentation((Element) parent) + " "; + String baseIndent = detectBaseIndentationUnit(element); + return detectIndentation((Element) parent) + baseIndent; } return ""; } + /** + * Detects the base indentation unit used in the document by analyzing indentation patterns. + * This method traverses the document tree to find the most common indentation style. + * + * @param element any element in the document to analyze + * @return the detected base indentation unit (e.g., " ", " ", "\t") + */ + public static String detectBaseIndentationUnit(Element element) { + // Find the root element to analyze the entire document + Element root = element; + while (root.getParent() instanceof Element) { + root = (Element) root.getParent(); + } + + // Collect indentation samples from the document + Map<String, Integer> indentationCounts = new HashMap<>(); + collectIndentationSamples(root, indentationCounts, ""); + + // Analyze the collected samples to determine the base unit + return analyzeIndentationPattern(indentationCounts); + } + + /** + * Recursively collects indentation samples from the document tree. + */ + private static void collectIndentationSamples( + Element element, Map<String, Integer> indentationCounts, String parentIndent) { + for (Iterator<Text> iterator = element.getContent(textOnly()).iterator(); iterator.hasNext(); ) { + String text = iterator.next().getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + String indent = text.substring(lastLsIndex + 1); + if (iterator.hasNext() && !indent.isEmpty()) { + // This is indentation before a child element + if (indent.length() > parentIndent.length()) { + String indentDiff = indent.substring(parentIndent.length()); + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + } + } + } + + // Recursively analyze child elements + for (Element child : element.getChildren()) { + String childIndent = detectIndentationForElement(element, child); + if (childIndent != null && childIndent.length() > parentIndent.length()) { + String indentDiff = childIndent.substring(parentIndent.length()); + if (!indentDiff.isEmpty()) { + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + collectIndentationSamples(child, indentationCounts, childIndent); + } + } + } + + /** + * Detects the indentation used for a specific child element. + */ + private static String detectIndentationForElement(Element parent, Element child) { + int childIndex = parent.indexOf(child); + if (childIndex > 0) { + org.jdom2.Content prevContent = parent.getContent(childIndex - 1); + if (prevContent instanceof Text) { + String text = ((Text) prevContent).getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + return text.substring(lastLsIndex + 1); + } + } + } + return null; + } + + /** + * Analyzes the collected indentation patterns to determine the most likely base unit. + */ + private static String analyzeIndentationPattern(Map<String, Integer> indentationCounts) { + if (indentationCounts.isEmpty()) { + return " "; // Default to 2 spaces + } + + // Find the most common indentation pattern + String mostCommon = indentationCounts.entrySet().stream() + .max(Map.Entry.comparingByValue()) + .map(Map.Entry::getKey) + .orElse(" "); + + // Validate and normalize the detected pattern + if (mostCommon.matches("^\\s+$")) { // Only whitespace characters + return mostCommon; + } + + // If we have mixed patterns, try to find a common base unit + Set<String> patterns = indentationCounts.keySet(); + + // Check for common patterns + if (patterns.stream().anyMatch(p -> p.equals(" "))) { + return " "; // 4 spaces + } + if (patterns.stream().anyMatch(p -> p.equals("\t"))) { + return "\t"; // Tab Review Comment: ```suggestion return TAB; ``` ########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -220,19 +226,131 @@ public static String detectIndentation(Element element) { return indent; } else { // This should be the indentation of the elements end tag. - return indent + " "; + String baseIndent = detectBaseIndentationUnit(element); + return indent + baseIndent; } } } Parent parent = element.getParent(); if (parent instanceof Element) { - return detectIndentation((Element) parent) + " "; + String baseIndent = detectBaseIndentationUnit(element); + return detectIndentation((Element) parent) + baseIndent; } return ""; } + /** + * Detects the base indentation unit used in the document by analyzing indentation patterns. + * This method traverses the document tree to find the most common indentation style. + * + * @param element any element in the document to analyze + * @return the detected base indentation unit (e.g., " ", " ", "\t") + */ + public static String detectBaseIndentationUnit(Element element) { + // Find the root element to analyze the entire document + Element root = element; + while (root.getParent() instanceof Element) { + root = (Element) root.getParent(); + } + + // Collect indentation samples from the document + Map<String, Integer> indentationCounts = new HashMap<>(); + collectIndentationSamples(root, indentationCounts, ""); + + // Analyze the collected samples to determine the base unit + return analyzeIndentationPattern(indentationCounts); + } + + /** + * Recursively collects indentation samples from the document tree. + */ + private static void collectIndentationSamples( + Element element, Map<String, Integer> indentationCounts, String parentIndent) { + for (Iterator<Text> iterator = element.getContent(textOnly()).iterator(); iterator.hasNext(); ) { + String text = iterator.next().getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + String indent = text.substring(lastLsIndex + 1); + if (iterator.hasNext() && !indent.isEmpty()) { + // This is indentation before a child element + if (indent.length() > parentIndent.length()) { + String indentDiff = indent.substring(parentIndent.length()); + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + } + } + } + + // Recursively analyze child elements + for (Element child : element.getChildren()) { + String childIndent = detectIndentationForElement(element, child); + if (childIndent != null && childIndent.length() > parentIndent.length()) { + String indentDiff = childIndent.substring(parentIndent.length()); + if (!indentDiff.isEmpty()) { + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + collectIndentationSamples(child, indentationCounts, childIndent); + } + } + } + + /** + * Detects the indentation used for a specific child element. + */ + private static String detectIndentationForElement(Element parent, Element child) { + int childIndex = parent.indexOf(child); + if (childIndex > 0) { + org.jdom2.Content prevContent = parent.getContent(childIndex - 1); + if (prevContent instanceof Text) { + String text = ((Text) prevContent).getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + return text.substring(lastLsIndex + 1); + } + } + } + return null; + } + + /** + * Analyzes the collected indentation patterns to determine the most likely base unit. + */ + private static String analyzeIndentationPattern(Map<String, Integer> indentationCounts) { + if (indentationCounts.isEmpty()) { + return " "; // Default to 2 spaces + } + + // Find the most common indentation pattern + String mostCommon = indentationCounts.entrySet().stream() + .max(Map.Entry.comparingByValue()) + .map(Map.Entry::getKey) + .orElse(" "); + + // Validate and normalize the detected pattern + if (mostCommon.matches("^\\s+$")) { // Only whitespace characters + return mostCommon; + } + + // If we have mixed patterns, try to find a common base unit + Set<String> patterns = indentationCounts.keySet(); + + // Check for common patterns + if (patterns.stream().anyMatch(p -> p.equals(" "))) { + return " "; // 4 spaces + } + if (patterns.stream().anyMatch(p -> p.equals("\t"))) { + return "\t"; // Tab + } + if (patterns.stream().anyMatch(p -> p.equals(" "))) { + return " "; // 2 spaces Review Comment: ```suggestion return TWO_SPACES; ``` wondering how much we have this coding all around. ########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -220,19 +226,131 @@ public static String detectIndentation(Element element) { return indent; } else { // This should be the indentation of the elements end tag. - return indent + " "; + String baseIndent = detectBaseIndentationUnit(element); + return indent + baseIndent; } } } Parent parent = element.getParent(); if (parent instanceof Element) { - return detectIndentation((Element) parent) + " "; + String baseIndent = detectBaseIndentationUnit(element); + return detectIndentation((Element) parent) + baseIndent; } return ""; } + /** + * Detects the base indentation unit used in the document by analyzing indentation patterns. + * This method traverses the document tree to find the most common indentation style. + * + * @param element any element in the document to analyze + * @return the detected base indentation unit (e.g., " ", " ", "\t") + */ + public static String detectBaseIndentationUnit(Element element) { + // Find the root element to analyze the entire document + Element root = element; + while (root.getParent() instanceof Element) { + root = (Element) root.getParent(); + } + + // Collect indentation samples from the document + Map<String, Integer> indentationCounts = new HashMap<>(); + collectIndentationSamples(root, indentationCounts, ""); + + // Analyze the collected samples to determine the base unit + return analyzeIndentationPattern(indentationCounts); + } + + /** + * Recursively collects indentation samples from the document tree. + */ + private static void collectIndentationSamples( + Element element, Map<String, Integer> indentationCounts, String parentIndent) { + for (Iterator<Text> iterator = element.getContent(textOnly()).iterator(); iterator.hasNext(); ) { + String text = iterator.next().getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + String indent = text.substring(lastLsIndex + 1); + if (iterator.hasNext() && !indent.isEmpty()) { + // This is indentation before a child element + if (indent.length() > parentIndent.length()) { + String indentDiff = indent.substring(parentIndent.length()); + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + } + } + } + + // Recursively analyze child elements + for (Element child : element.getChildren()) { + String childIndent = detectIndentationForElement(element, child); + if (childIndent != null && childIndent.length() > parentIndent.length()) { + String indentDiff = childIndent.substring(parentIndent.length()); + if (!indentDiff.isEmpty()) { + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + collectIndentationSamples(child, indentationCounts, childIndent); + } + } + } + + /** + * Detects the indentation used for a specific child element. + */ + private static String detectIndentationForElement(Element parent, Element child) { + int childIndex = parent.indexOf(child); + if (childIndex > 0) { + org.jdom2.Content prevContent = parent.getContent(childIndex - 1); + if (prevContent instanceof Text) { + String text = ((Text) prevContent).getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + return text.substring(lastLsIndex + 1); + } + } + } + return null; + } + + /** + * Analyzes the collected indentation patterns to determine the most likely base unit. + */ + private static String analyzeIndentationPattern(Map<String, Integer> indentationCounts) { + if (indentationCounts.isEmpty()) { + return " "; // Default to 2 spaces + } + + // Find the most common indentation pattern + String mostCommon = indentationCounts.entrySet().stream() + .max(Map.Entry.comparingByValue()) + .map(Map.Entry::getKey) + .orElse(" "); + + // Validate and normalize the detected pattern + if (mostCommon.matches("^\\s+$")) { // Only whitespace characters + return mostCommon; + } + + // If we have mixed patterns, try to find a common base unit + Set<String> patterns = indentationCounts.keySet(); + + // Check for common patterns + if (patterns.stream().anyMatch(p -> p.equals(" "))) { + return " "; // 4 spaces Review Comment: ```suggestion return FOUR_SPACES; ``` ########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -220,19 +226,131 @@ public static String detectIndentation(Element element) { return indent; } else { // This should be the indentation of the elements end tag. - return indent + " "; + String baseIndent = detectBaseIndentationUnit(element); + return indent + baseIndent; } } } Parent parent = element.getParent(); if (parent instanceof Element) { - return detectIndentation((Element) parent) + " "; + String baseIndent = detectBaseIndentationUnit(element); + return detectIndentation((Element) parent) + baseIndent; } return ""; } + /** + * Detects the base indentation unit used in the document by analyzing indentation patterns. + * This method traverses the document tree to find the most common indentation style. + * + * @param element any element in the document to analyze + * @return the detected base indentation unit (e.g., " ", " ", "\t") + */ + public static String detectBaseIndentationUnit(Element element) { + // Find the root element to analyze the entire document + Element root = element; + while (root.getParent() instanceof Element) { + root = (Element) root.getParent(); + } + + // Collect indentation samples from the document + Map<String, Integer> indentationCounts = new HashMap<>(); + collectIndentationSamples(root, indentationCounts, ""); + + // Analyze the collected samples to determine the base unit + return analyzeIndentationPattern(indentationCounts); + } + + /** + * Recursively collects indentation samples from the document tree. + */ + private static void collectIndentationSamples( + Element element, Map<String, Integer> indentationCounts, String parentIndent) { + for (Iterator<Text> iterator = element.getContent(textOnly()).iterator(); iterator.hasNext(); ) { + String text = iterator.next().getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + String indent = text.substring(lastLsIndex + 1); + if (iterator.hasNext() && !indent.isEmpty()) { + // This is indentation before a child element + if (indent.length() > parentIndent.length()) { + String indentDiff = indent.substring(parentIndent.length()); + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + } + } + } + + // Recursively analyze child elements + for (Element child : element.getChildren()) { + String childIndent = detectIndentationForElement(element, child); + if (childIndent != null && childIndent.length() > parentIndent.length()) { + String indentDiff = childIndent.substring(parentIndent.length()); + if (!indentDiff.isEmpty()) { + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + collectIndentationSamples(child, indentationCounts, childIndent); + } + } + } + + /** + * Detects the indentation used for a specific child element. + */ + private static String detectIndentationForElement(Element parent, Element child) { + int childIndex = parent.indexOf(child); + if (childIndex > 0) { + org.jdom2.Content prevContent = parent.getContent(childIndex - 1); + if (prevContent instanceof Text) { + String text = ((Text) prevContent).getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + return text.substring(lastLsIndex + 1); + } + } + } + return null; + } + + /** + * Analyzes the collected indentation patterns to determine the most likely base unit. + */ + private static String analyzeIndentationPattern(Map<String, Integer> indentationCounts) { + if (indentationCounts.isEmpty()) { + return " "; // Default to 2 spaces Review Comment: ```suggestion return TWO_SPACES; ``` DRY ########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -220,19 +226,131 @@ public static String detectIndentation(Element element) { return indent; } else { // This should be the indentation of the elements end tag. - return indent + " "; + String baseIndent = detectBaseIndentationUnit(element); + return indent + baseIndent; } } } Parent parent = element.getParent(); if (parent instanceof Element) { - return detectIndentation((Element) parent) + " "; + String baseIndent = detectBaseIndentationUnit(element); + return detectIndentation((Element) parent) + baseIndent; Review Comment: - https://github.com/pmd/pmd/issues/5770 ########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -80,8 +83,10 @@ public static void addElement(JDomCfg jDomCfg, Element element, Element root, in root.addContent(index, new Text("\n" + detectIndentation(root))); } - resetIndentations(root, detectIndentation(root)); - resetIndentations(element, detectIndentation(root) + " "); + String rootIndent = detectIndentation(root); + String baseIndent = detectBaseIndentationUnit(root); + resetIndentations(root, rootIndent); + resetIndentations(element, rootIndent + baseIndent); Review Comment: Okay, not saw that `rootIndent` is actually rarely (re)used, as variable get mistreated to explain coding like seen with `baseIndent` as actually **not** used. Its not clear if the code is actually manipulating some state which fields are used in first place and **not** to document or structure code, which is mostly used for. Due to missing refactoring. considered boilerplate as reducible. yes calling detectIndentation() twice not ideal ether. Therefore simply give this as method param with dedicated concern which always hides behind a variable used. Dedicated concern: ```java resetIndentations(detectIndentation(root), root); private static void resetIndentations(rootIndent, root) { resetIndentations(root, rootIndent); resetIndentations(element, rootIndent + detectBaseIndentationUnit(root)); } ``` ########## impl/maven-cli/src/main/java/org/apache/maven/cling/invoker/mvnup/jdom/JDomUtils.java: ########## @@ -220,19 +226,131 @@ public static String detectIndentation(Element element) { return indent; } else { // This should be the indentation of the elements end tag. - return indent + " "; + String baseIndent = detectBaseIndentationUnit(element); + return indent + baseIndent; } } } Parent parent = element.getParent(); if (parent instanceof Element) { - return detectIndentation((Element) parent) + " "; + String baseIndent = detectBaseIndentationUnit(element); + return detectIndentation((Element) parent) + baseIndent; } return ""; } + /** + * Detects the base indentation unit used in the document by analyzing indentation patterns. + * This method traverses the document tree to find the most common indentation style. + * + * @param element any element in the document to analyze + * @return the detected base indentation unit (e.g., " ", " ", "\t") + */ + public static String detectBaseIndentationUnit(Element element) { + // Find the root element to analyze the entire document + Element root = element; + while (root.getParent() instanceof Element) { + root = (Element) root.getParent(); + } + + // Collect indentation samples from the document + Map<String, Integer> indentationCounts = new HashMap<>(); + collectIndentationSamples(root, indentationCounts, ""); + + // Analyze the collected samples to determine the base unit + return analyzeIndentationPattern(indentationCounts); + } + + /** + * Recursively collects indentation samples from the document tree. + */ + private static void collectIndentationSamples( + Element element, Map<String, Integer> indentationCounts, String parentIndent) { + for (Iterator<Text> iterator = element.getContent(textOnly()).iterator(); iterator.hasNext(); ) { + String text = iterator.next().getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + String indent = text.substring(lastLsIndex + 1); + if (iterator.hasNext() && !indent.isEmpty()) { + // This is indentation before a child element + if (indent.length() > parentIndent.length()) { + String indentDiff = indent.substring(parentIndent.length()); + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + } + } + } + + // Recursively analyze child elements + for (Element child : element.getChildren()) { + String childIndent = detectIndentationForElement(element, child); + if (childIndent != null && childIndent.length() > parentIndent.length()) { + String indentDiff = childIndent.substring(parentIndent.length()); + if (!indentDiff.isEmpty()) { + indentationCounts.merge(indentDiff, 1, Integer::sum); + } + collectIndentationSamples(child, indentationCounts, childIndent); + } + } + } + + /** + * Detects the indentation used for a specific child element. + */ + private static String detectIndentationForElement(Element parent, Element child) { + int childIndex = parent.indexOf(child); + if (childIndex > 0) { + org.jdom2.Content prevContent = parent.getContent(childIndex - 1); + if (prevContent instanceof Text) { + String text = ((Text) prevContent).getText(); + int lastLsIndex = StringUtils.lastIndexOfAny(text, new String[] {"\n", "\r"}); + if (lastLsIndex > -1) { + return text.substring(lastLsIndex + 1); + } + } + } + return null; + } + + /** + * Analyzes the collected indentation patterns to determine the most likely base unit. + */ + private static String analyzeIndentationPattern(Map<String, Integer> indentationCounts) { + if (indentationCounts.isEmpty()) { + return " "; // Default to 2 spaces + } + + // Find the most common indentation pattern + String mostCommon = indentationCounts.entrySet().stream() + .max(Map.Entry.comparingByValue()) + .map(Map.Entry::getKey) + .orElse(" "); Review Comment: ```suggestion .orElse(TWO_SPACES); ``` magic number, DRY -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@maven.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org