Hi, I'm trying to put together a script that can be run via the console or possibly the groovy plugin, to remove all the additional saved configs added due to how the envInject and jobConfigHistory plugins interact.
Related Jira is https://issues.jenkins-ci.org/browse/JENKINS-14144. The core problem is the addition/removal of the xml element "<org.jenkinsci.plugins.envinject.EnvInjectListener_-JobSetupEnvironmentWrapper/>" from the buildWrappers node that occurred at the start/end of each build once the envInject plugin was being used by the build. What I was hoping to do, was compare the xml contents of each saved config, ignore the xml node created by envInject and then remove any saved configs that were identical. So I've cobbled together something that mostly works (pasted at the end) from various webpages and posts online. It uses an xsl transform to discard nodes matching "org.jenkinsci.plugins.envinject.EnvInjectListener_-JobSetupEnvironmentWrapper", and then normalizes the xml. For some reason though, when I ask the "Document" object to do the comparison using the 'isEqualNode' method, there are a number of cases where it says the xml does not match, but if I compare the xml by formatting it and outputting to strings, it does match. I'm a little stumped as to what would be getting picked up as being different. Is there something that the transform would be missing? Hoping that someone here might be able to point me in the right direction, and that the code below helps others hitting that JIRA issue. Here's some output from a run of the script: Ignoring as files differ: config-history/2012-08-13_11-17-31/config.xml config-history/2012-08-13_12-29-07/config.xml Ignoring as files differ: config-history/2012-08-13_12-29-07/config.xml config-history/2012-08-13_18-51-04/config.xml Ignoring as files differ: config-history/2012-08-13_18-51-04/config.xml config-history/2012-08-13_18-51-05/config.xml XML output is equal Following file can be deleted: config-history/2012-08-13_18-51-05/config.xml Ignoring as files differ: config-history/2012-08-13_18-51-04/config.xml config-history/2012-08-16_19-34-46/config.xml Ignoring as files differ: config-history/2012-08-16_19-34-46/config.xml config-history/2012-08-16_19-34-51/config.xml Ignoring as files differ: config-history/2012-08-16_19-34-51/config.xml config-history/2012-08-16_19-35-38/config.xml Ignoring as files differ: config-history/2012-08-16_19-35-38/config.xml config-history/2012-08-16_19-35-49/config.xml XML output is equal <--- previous line says that the check using the Document object says they are not the same, why? Following file can be deleted: config-history/2012-08-16_19-35-49/config.xml Files are the same: config-history/2012-08-16_19-35-38/config.xml config-history/2012-08-16_19-35-50/config.xml Following file can be deleted: config-history/2012-08-16_19-35-50/config.xml Ignoring as files differ: config-history/2012-08-16_19-35-38/config.xml config-history/2012-08-16_19-41-12/config.xml XML output is equal Following file can be deleted: config-history/2012-08-16_19-41-12/config.xml A direct textual comparison of the two files (config-history/2012-08-13_18-51-04/config.xml, on disk: diff -u config-history/2012-08-13_18-51-04/config.xml config-history/2012-08-13_18-51-05/config.xml --- config-history/2012-08-13_18-51-04/config.xml 2012-08-13 18:51:04.592635936 +0000 +++ config-history/2012-08-13_18-51-05/config.xml 2012-08-13 18:51:05.592648870 +0000 @@ -207,6 +207,5 @@ <postSuccessfulBuildSteps/> <postFailedBuildSteps/> </hudson.plugins.release.ReleaseWrapper> - <org.jenkinsci.plugins.envinject.EnvInjectListener_-JobSetupEnvironmentWrapper/> </buildWrappers> </project> \ No newline at end of file Changing the script below to output the strings to the script console, also shows them to be identical. ----------------------------------- import hudson.plugins.jobConfigHistory.*; import javax.xml.parsers.*; import javax.xml.transform.*; import javax.xml.transform.dom.*; import javax.xml.transform.stream.*; import org.apache.commons.lang.StringUtils; import org.apache.xml.serialize.OutputFormat; import org.apache.xml.serialize.XMLSerializer; import org.w3c.dom.*; import org.xml.sax.*; // see http://stackoverflow.com/questions/141993/best-way-to-compare-2-xml-documents-in-java public class BasicXsl { // applies the xsl string to inFilename and returns the result as a string public static String xsl(String inFilename, String xsl) { try { Source source = new StreamSource(new FileInputStream(inFilename)); StringWriter writer = new StringWriter(); // Create transformer factory TransformerFactory factory = TransformerFactory.newInstance(); Templates template = factory.newTemplates(new StreamSource(new StringReader(xsl))); Transformer xformer = template.newTransformer(); xformer.transform(source, new StreamResult(writer)); return(writer.toString()); } catch (FileNotFoundException e) { } catch (TransformerConfigurationException e) { // An error occurred in the XSL file } catch (TransformerException e) { // An error occurred while applying the XSL file // Get location of error in input file SourceLocator locator = e.getLocator(); int col = locator.getColumnNumber(); int line = locator.getLineNumber(); String publicId = locator.getPublicId(); String systemId = locator.getSystemId(); } return null; } } public class trimpath { public static String shortpath(String path, int elements) { String [] filePathArray = path.split(File.separator); return StringUtils.join(Arrays.copyOfRange( filePathArray, filePathArray.length - elements, filePathArray.length), File.separator) } } // see http://stackoverflow.com/questions/1137563/xsl-how-to-copy-a-tree-but-removing-some-nodes/1137628#1137628 // // simple xsl template to discard any node matching // org.jenkinsci.plugins.envinject.EnvInjectListener_-JobSetupEnvironmentWrapper xsl = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + " <xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">\n" + " <xsl:template match=\"node() | @*\">\n" + " <xsl:copy>\n" + " <xsl:apply-templates select=\"node() | @*\"/>\n" + " </xsl:copy>\n" + " </xsl:template>\n" + " <xsl:template match=\"org.jenkinsci.plugins.envinject.EnvInjectListener_-JobSetupEnvironmentWrapper\"/> <!-- this empty template will this element -->\n" + "</xsl:stylesheet>"; // see http://stackoverflow.com/questions/141993/best-way-to-compare-2-xml-documents-in-java // // Set up docbuilder stuff for xml parsing/comparison dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setCoalescing(true); dbf.setIgnoringElementContentWhitespace(true); dbf.setIgnoringComments(true); db = dbf.newDocumentBuilder(); found = 0; for(item in Hudson.instance.getItems(hudson.model.Project)) { AbstractProject project = item; println(item.getName()); jch = new JobConfigHistoryProjectAction(project); allJch = jch.getConfigs() if(allJch.size() < 1) { continue; } // avoid deleting while the build is running, just in case if(item.isBuilding()) { continue; } // note: configs are ordered latest to earliest itr = allJch.listIterator(allJch.size()); // always keep the very first one prevCfg = itr.previous(); prevCfgFile = URLDecoder.decode(prevCfg.getFile(), "utf-8") + "/config.xml" prevdoc = db.parse(new InputSource(new StringReader(BasicXsl.xsl(prevCfgFile, xsl)))); prevdoc.normalizeDocument(); while(itr.hasPrevious()) { cfg = itr.previous(); cfgFile = URLDecoder.decode(cfg.getFile(), "utf-8") + "/config.xml" cfgdoc = db.parse(new InputSource(new StringReader(BasicXsl.xsl(cfgFile, xsl)))); cfgdoc.normalizeDocument(); // compare using document object if(!prevdoc.isEqualNode(cfgdoc)) { println("Ignoring as files differ: " + trimpath.shortpath(prevCfgFile, 3) + " " + trimpath.shortpath(cfgFile, 3) ); // see http://stackoverflow.com/questions/139076/how-to-pretty-print-xml-from-java // // since the above check doesn't appear to catch all contents that should be considered equal // compare the actual xml outputted format = new OutputFormat(cfgdoc); format.setLineWidth(65); format.setIndenting(true); format.setIndent(2); out1 = new StringWriter(); serializer = new XMLSerializer(out1, format); serializer.serialize(cfgdoc); format = new OutputFormat(prevdoc); format.setLineWidth(65); format.setIndenting(true); format.setIndent(2); out2 = new StringWriter(); serializer = new XMLSerializer(out2, format); serializer.serialize(prevdoc); if (out1.toString() == out2.toString()) { // why is this? how can the formatted output be the same, but a comparison of the xml docs // which is told to ignore all whitespace differences, says they are not the same? println("XML output is equal"); } else { prevCfg = cfg; prevCfgFile = cfgFile; prevdoc = cfgdoc; continue; } } else { println("Files are the same: " + trimpath.shortpath(prevCfgFile, 3) + " " + trimpath.shortpath(cfgFile, 3) ); } // ignore those created by other users besides SYSTEM since they were deliberately saved if( cfg.getUserID() != "SYSTEM" ) { continue; } println("Following file can be deleted: " + trimpath.shortpath(cfgFile, 3)); found += 1; // limit for now if (found > 10) { break; } } // limit for now if (found > 10) { break; } } -- Regards, Darragh Bailey