Erik Hatcher <[EMAIL PROTECTED]> wrote: > There already is a <containsregexp> in the latest version of Ant. Uups, I didn't see this because I only worked with Ant 1.5.x.
> I'm > not sure how the implementations compare. Could you compare yours > with it and offer any benefits yours has as a patch against the HEAD > version of Ant's codebase? Like the <replaceregexp> Task (org.apache.tools.ant.taskdefs.optional.ReplaceRegExp) the selector supports two additional parameters: "byline" and "flags". You will find a short documentation inside the JavaDoc comments. I have attached a file "patch.txt" generated by CVS rdiff (patch). Thorsten
Index: ContainsRegexpSelector.java =================================================================== RCS file: ContainsRegexpSelector.java diff -N ContainsRegexpSelector.java --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ ContainsRegexpSelector.java 9 Sep 2003 14:59:10 -0000 @@ -0,0 +1,376 @@ +/* + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002-2003 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, if + * any, must include the following acknowlegement: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowlegement may appear in the software itself, + * if and wherever such third-party acknowlegements normally appear. + * + * 4. The names "Ant" and "Apache Software + * Foundation" must not be used to endorse or promote products derived + * from this software without prior written permission. For written + * permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache" + * nor may "Apache" appear in their names without prior written + * permission of the Apache Group. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + */ + +package org.apache.tools.ant.types.selectors; + +import java.io.File; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; + +import org.apache.tools.ant.types.Parameter; +import org.apache.tools.ant.types.selectors.BaseExtendSelector; +import org.apache.tools.ant.util.regexp.Regexp; +import org.apache.tools.ant.util.regexp.RegexpFactory; +import org.apache.tools.ant.BuildException; +import org.apache.tools.ant.Project; + +/** + * Selector that filters files based on whether they contain a + * particular pattern expressed with a regular expression. + * The input file(s) must be able to be properly processed by + * a Reader instance. That is, it must be text only, no binary. + * + * The syntax of the regular expression depends on the implemtation that + * you choose to use. The system property <code>ant.regexp.regexpimpl</code> + * will be the classname of the implementation that will be used (the default + * is <code>org.apache.tools.ant.util.regexp.JakartaOroRegexp</code> and + * requires the Jakarta Oro Package). + * + * <pre> + * For jdk <= 1.3, there are two available implementations: + * org.apache.tools.ant.util.regexp.JakartaOroRegexp (the default) + * Requires the jakarta-oro package + * + * org.apache.tools.ant.util.regexp.JakartaRegexpRegexp + * Requires the jakarta-regexp package + * + * For jdk >= 1.4 an additional implementation is available: + * org.apache.tools.ant.util.regexp.Jdk14RegexpRegexp + * Requires the jdk 1.4 built in regular expression package. + * + * Attributes: + * + * pattern --> The Regular expression to search for + * flags --> The options to give to the search. For more information, consult the Perl5 syntax. + * i = Case insensitive match + * m = Multiline. Treat the string as multiple lines of input, using + * "^" and "$" as the start or end of any line, respectively, rather + * than start or end of string. + * m = Singleline. Treat the string as a single line of input, using + * "." to match any character, including a newline, which normally, + * it would not match. + * + * byline --> Should files be processed a single line at a time (default is false) + * "true" indicates to perform search on a line by line basis + * "false" indicates to perform search on the whole file at once. + * + * + * @author Thorsten Möller - [EMAIL PROTECTED] + * + * $Revision: $; $Author: $; $Date: $ + */ +public class ContainsRegexpSelector extends BaseExtendSelector +{ + + private boolean byline; + private String flags; + private Regexp regexp = null; + private static final RegexpFactory factory = new RegexpFactory(); + + public static final String PATTERN_KEY = "pattern"; + public static final String FLAGS_KEY = "flags"; + public static final String BYLINE_KEY = "byline"; + + public ContainsRegexpSelector() + { + this.regexp = factory.newRegexp(); + } + + public String toString() + { + StringBuffer buf = new StringBuffer("{containsRegexpSelector pattern: "); + buf.append(regexp.getPattern()); + buf.append(" byline: "); + buf.append(Boolean.toString(byline)); + buf.append(" flags: "); + buf.append(flags); + buf.append("}"); + return buf.toString(); + } + + /** + * Process the file(s) one line at a time. + * This is useful if you want to only search for the first occurence of a regular expression on + * each line, which is not easy to do when processing the file as a whole. + * Defaults to <i>false</i>.</td> + */ + public void setByLine(String byline) + { + Boolean res = Boolean.valueOf(byline); + if (res == null) + { + res = Boolean.FALSE; + } + this.byline = res.booleanValue(); + } + + /** + * The flags to use when matching the regular expression. For more + * information, consult the Perl5 syntax. + * <ul> + * <li>i : Case Insensitive. Do not consider case in the match + * <li>m : Multiline. Treat the string as multiple lines of input, + * using "^" and "$" as the start or end of any line, respectively, rather than start or end of string. + * <li>s : Singleline. Treat the string as a single line of input, using + * "." to match any character, including a newline, which normally, it would not match. + *</ul> + */ + public void setFlags(String flags) + { + this.flags = flags; + } + + /** + * The pattern to search for within a file. + * + * @param regexp the string that a file must contain to be selected. + */ + public void setRegexp(String pattern) + { + this.regexp.setPattern(pattern); + } + + /** + * When using this as a custom selector, this method will be called. + * It translates each parameter into the appropriate setXXX() call. + * + * @param parameters the complete set of parameters for this selector + */ + public void setParameters(Parameter[] parameters) + { + super.setParameters(parameters); + if (parameters != null) + { + for (int i = 0; i < parameters.length; i++) + { + String paramname = parameters[i].getName(); + if (PATTERN_KEY.equalsIgnoreCase(paramname)) + { + setRegexp(parameters[i].getValue()); + } + else if (BYLINE_KEY.equalsIgnoreCase(paramname)) + { + setByLine(parameters[i].getValue()); + } + else if (FLAGS_KEY.equalsIgnoreCase(paramname)) + { + setFlags(parameters[i].getValue()); + } + else + { + setError("Invalid parameter " + paramname); + } + } + } + } + + /** + * Checks to make sure all settings are kosher. In this case, it + * means that the pattern attribute has been set. + * + */ + public void verifySettings() + { + if (regexp == null) + { + setError("The pattern attribute is required"); + } + } + + /** + * The heart of the matter. This is where the selector gets to decide + * on the inclusion of a file in a particular fileset. + * + * @param basedir the base directory the scan is being done from + * @param filename is the name of the file to check + * @param file is a java.io.File object the selector can use + * @return whether the file should be selected or not + */ + public boolean isSelected(File basedir, String filename, File file) + { + + // throw BuildException on error + validate(); + + if (file.isDirectory()) + { + return true; + } + + int options = 0; + // if (flags.indexOf('g') != -1) options |= Regexp.REPLACE_ALL; + if (flags.indexOf('i') != -1) options |= Regexp.MATCH_CASE_INSENSITIVE; + if (flags.indexOf('m') != -1) options |= Regexp.MATCH_MULTILINE; + if (flags.indexOf('s') != -1) options |= Regexp.MATCH_SINGLELINE; + + FileReader r = null; + try + { + r = new FileReader(file); + BufferedReader br = new BufferedReader(r); + log("Searching pattern '" + regexp.getPattern() + + "' in '" + file.getPath() + "'" + + (byline ? " by line" : "") + + (flags.length() > 0 ? " with flags: '" + flags + "'" : "") + + ".", + Project.MSG_VERBOSE); + + if (byline) + { + StringBuffer linebuf = new StringBuffer(); + String line = null; + int c; + boolean hasCR = false; + + do + { + c = br.read(); + if (c == '\r') + { + if (hasCR) + { + // second CR -> EOL + possibly empty line + line = linebuf.toString(); + if (regexp.matches(line, options)) + { + return true; + } + linebuf.setLength(0); + // hasCR is still true (for the second one) + } + else + { + // first CR in this line + hasCR = true; + } + } + else if (c == '\n') + { + // LF -> EOL + line = linebuf.toString(); + if (regexp.matches(line, options)) + { + return true; + } + if (hasCR) + { + hasCR = false; + } + linebuf.setLength(0); + } + else + { // any other char + if ((hasCR) || (c < 0)) + { + // Mac-style linebreak or EOF (or both) + line = linebuf.toString(); + if (regexp.matches(line, options)) + { + return true; + } + if (hasCR) + { + hasCR = false; + } + linebuf.setLength(0); + } + if (c >= 0) + { + linebuf.append((char) c); + } + } + } + while (c >= 0); + } + else + { + int flen = (int) file.length(); + char tmpBuf[] = new char[flen]; + int numread = 0; + int totread = 0; + while (numread != -1 && totread < flen) + { + numread = br.read(tmpBuf, totread, flen); + totread += numread; + } + if (regexp.matches(new String(tmpBuf), options)) + { + return true; + } + } + r.close(); + r = null; + } + catch (IOException ioe) + { + throw new BuildException("Could not read file " + filename); + } + finally + { + try + { + if (r != null) + { + r.close(); + } + } + catch (Exception e) + { + throw new BuildException("Could not close file " + filename); + } + } + return false; + } +} \ No newline at end of file
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]