madrob commented on code in PR #865: URL: https://github.com/apache/solr/pull/865#discussion_r876342658
########## solr/modules/sql/src/java/org/apache/solr/handler/sql/SolrFilter.java: ########## @@ -346,21 +346,52 @@ protected String translateAnd(RexNode node0) { } protected String translateLike(RexNode like) { - Pair<String, RexLiteral> pair = getFieldValuePair(like); + Pair<Pair<String, RexLiteral>, Character> pairWithEscapeCharacter = + getFieldValuePairWithEscapeCharacter(like); + Pair<String, RexLiteral> pair = pairWithEscapeCharacter.getKey(); + Character escapeChar = pairWithEscapeCharacter.getValue(); + String terms = pair.getValue().toString().trim(); - terms = terms.replace("'", "").replace('%', '*').replace('_', '?'); - boolean wrappedQuotes = false; + terms = translateLikeTermToSolrSyntax(terms, escapeChar); + if (!terms.startsWith("(") && !terms.startsWith("[") && !terms.startsWith("{")) { - // restore the * and ? after escaping - terms = - "\"" - + ClientUtils.escapeQueryChars(terms).replace("\\*", "*").replace("\\?", "?") - + "\""; - wrappedQuotes = true; - } + terms = escapeWithWildcard(terms); + + // if terms contains multiple words and one or more wildcard chars, then we need to employ + // the complexphrase parser + // but that expects the terms wrapped in double-quotes, not parens + boolean hasMultipleTerms = terms.split("\\s+").length > 1; + if (hasMultipleTerms && (terms.contains("*") || terms.contains("?"))) { + String quotedTerms = "\"" + terms.substring(1, terms.length() - 1) + "\""; + return "{!complexphrase}" + pair.getKey() + ":" + quotedTerms; + } + } // else treat as an embedded Solr query and pass-through + + return pair.getKey() + ":" + terms; + } - String query = pair.getKey() + ":" + terms; - return wrappedQuotes ? "{!complexphrase}" + query : query; + private String translateLikeTermToSolrSyntax(String term, Character escapeChar) { + boolean isEscaped = false; + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < term.length() - 1; i++) { + char c = term.charAt(i); + // Only replace special characters if they are not escaped + if (escapeChar != null && c == escapeChar) { + isEscaped = true; + } + if (c == '%' && !isEscaped) { + sb.append('*'); + } else if (c == '_' && !isEscaped) { + sb.append('?'); + } else if (c == '\'' && isEscaped) { + sb.append('\''); + isEscaped = false; + } else if ((escapeChar == null || escapeChar != c) && c != '\'') { + sb.append(c); + isEscaped = false; + } Review Comment: Right. So I think most of this condition had already been checked at line 379 above? Maybe 382 needs to be an `else if`? Something here feels like it is more complicated than it needs to be, and I'm struggling a little to understand it. Maybe we need a test case where `%` is the escape char and make sure that we don't break that way? It's kind of a strange use case but it seems like it should work? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org