On Fri, Feb 26, 2021 at 10:24 PM Bart via lazarus <
lazarus@lists.lazarus-ide.org> wrote:

> On Fri, Feb 26, 2021 at 7:15 PM Bart <bartjun...@gmail.com> wrote:
> My backup and some related programs still compile, but instatly raise
> an exception when they start to perform their main task.
> Thank you very much.
>
> The normal way of doing this is:
> Deprecate the function in question, but d NOT kill it's functionality.
> Add a useful deprecated message.
> Remove the function in the next major release (deprecate in 2.1, and
> so 2.2, only remove in 2.3, si't be gone in 2.4).
> Simply removing functionality like you have done now will alienate
> users from Lazarus, since apparently "we" cannot be trusted.
>
> Juha: you seem to be obsessed with speeding up string handling code.
> This is not really a problem as long as you are not deaf to arguments
> against your changes.
> You introduce new bugs, remove old features, all for the sake of speed.
> All that when, in my perception, this code is mostly used in
> conjunction with file IO, which is orders of magnitude slower than
> even slopy string handling.
>

True, it created more conflicts than I anticipated.
I reverted the new TMask in r64675. It must be worked later in trunk. Now a
2.2 fork will happen in few weeks.
Sorry for the hassle.

José and others. you can see my adaptation of your code in Lazarus trunk
just before the revert, eg. r64674.
I also attach the unit here.
I fixed the case-insensitive Unicode match by simply replacing LowerCase()
with UTF8LowerCase(). It is a well optimized function.
First I planned to use UTF8CompareLatinTextFast() but it did not fit here.
There is a unit test project in components/lazutils/test/.
The code passes all tests there!
Unicode is fully supported also in mask ranges.

Let's continue the integration later.

Regards,
Juha
{
 *****************************************************************************
  This file is part of LazUtils.

  See the file COPYING.modifiedLGPL.txt, included in this distribution,
  for details about the license.
 *****************************************************************************

 Match text using wildcards and sets.

 Current version is from José Mejuto. When porting to LazUtils,
 functions from LazUTF8 were used for full Unicode support.
}
unit Masks;

{$mode objfpc}{$H+}

// RANGES_AUTOREVERSE
// If reverse ranges if needed, so range "[z-a]" is interpreted as "[a-z]"
{$DEFINE RANGES_AUTOREVERSE}
{$DEFINE USE_INLINE}

interface

uses
  Classes, SysUtils, Contnrs, LazUtilsStrConsts, LazUTF8;

type
  { EMaskError }

  EMaskError=class(EConvertError)
  public
    type
      TMaskExceptionCode=(eMaskException_InternalError,
                          eMaskException_InvalidCharMask,
                          eMaskException_MissingClose,
                          eMaskException_IncompleteMask,
                          eMaskException_InvalidEscapeChar,
                          eMaskException_InvalidUTF8Sequence
                          );
  protected
    cCode: TMaskExceptionCode;
  public
    constructor Create(const msg: string; aCode: TMaskExceptionCode);
    constructor CreateFmt(const msg: string; args: array of const; aCode: TMaskExceptionCode);
    property Code: TMaskExceptionCode read cCode;
  end;

  { TMaskBase }

  TMaskBase = class
  private
    procedure SetMaskEscapeChar(AValue: Char);
  protected

    type
      // Literal = It must match
      // Range = Match any char in the range
      // Negate = Negate match in a group
      // AnyChar = It matches any char, but one must match
      // AnyCharOrNone = Matches one or none char (only in a group)
      // AnyCharToNext = Matches any chars amount, if fail, restart in the
      //                 next position up to finish the mask or the matched string
      // OptionalChar = Optional char
      // CharsGroupBegin = Begin optional chars or ranges "["
      // CharsGroupEnd = End optional chars or ranges "]"
      TMaskOpCode = (
        Literal=0,
        Range=1,
        Negate=2,
        AnyChar=3,
        AnyCharOrNone=4,
        AnyCharToNext=5,
        OptionalChar=6,
        CharsGroupBegin=10,
        CharsGroupEnd=11
      );
      TMaskOpcodesEnum=(eMaskOpcodeAnyChar,
                        eMaskOpcodeAnyCharOrNone,
                        eMaskOpcodeAnyText,
                        eMaskOpcodeRange,
                        eMaskOpcodeOptionalChar,
                        eMaskOpcodeNegateGroup,
                        eMaskOpcodeEscapeChar);
      TMaskOpcodesSet=set of TMaskOpcodesEnum;

      TMaskFailCause = (
        Success = 0,
        MatchStringExhausted = 1,
        MaskExhausted = 2,
        MaskNotMatch = 3,
        UnexpectedEnd = 4
      );
      (*
        Windows mask works in a different mode than regular mask, it has too many
        quirks and corner cases inherited from CP/M, then adapted to DOS (8.3) file
        names and adapted again for long file names.

        Anyth?ng.abc    = "?" matches exactly 1 char
        Anyth*ng.abc    = "*" matches 0 or more of chars

        ------- Quirks -------

        --eWindowsQuirk_AnyExtension
          Anything*.*     = ".*" is removed.

        --eWindowsQuirk_FilenameEnd
          Anything??.abc  = "?" matches 1 or 0 chars (except '.')
                            (Not the same as "Anything*.abc", but the same
                            as regex "Anything.{0,2}\.abc")
                            Internally converted to "Anything[??].abc"

        --eWindowsQuirk_Extension3More
          Anything.abc    = Matches "Anything.abc" but also "Anything.abc*" (3 char extension)
          Anything.ab     = Matches "Anything.ab" and never "anything.abcd"

        --eWindowsQuirk_EmptyIsAny
          ""              = Empty string matches anything "*"

        --eWindowsQuirk_AllByExtension (Not in use anymore)
          .abc            = Runs as "*.abc"

        --eWindowsQuirk_NoExtension
          Anything*.      = Matches "Anything*" without extension
      *)
      TWindowsQuirks=(eWindowsQuirk_AnyExtension, eWindowsQuirk_FilenameEnd,
                      eWindowsQuirk_Extension3More, eWindowsQuirk_EmptyIsAny,
                      eWindowsQuirk_AllByExtension, eWindowsQuirk_NoExtension);
      TWindowsQuirkSet=set of TWindowsQuirks;
    const GROW_BY=100;
          TWindowsQuirksAllAllowed=[eWindowsQuirk_AnyExtension,
                                    eWindowsQuirk_FilenameEnd,
                                    eWindowsQuirk_Extension3More,
                                    eWindowsQuirk_EmptyIsAny,
                                    eWindowsQuirk_AllByExtension,
                                    eWindowsQuirk_NoExtension];
          TWindowsQuirksDefaultAllowed=[eWindowsQuirk_AnyExtension,
                                        eWindowsQuirk_FilenameEnd,
                                        eWindowsQuirk_Extension3More,
                                        eWindowsQuirk_EmptyIsAny,
                                        {eWindowsQuirk_AllByExtension,} // Not in use anymore
                                        eWindowsQuirk_NoExtension];
          TMaskOpCodesAllAllowed=[eMaskOpcodeAnyChar,
                                  eMaskOpcodeAnyCharOrNone,
                                  eMaskOpcodeAnyText,
                                  eMaskOpcodeRange,
                                  eMaskOpcodeOptionalChar,
                                  eMaskOpcodeNegateGroup,
                                  eMaskOpcodeEscapeChar];
          TMaskOpCodesDefaultAllowed=TMaskOpCodesAllAllowed;
  protected
    procedure Add(const aLength: integer; const aData: PBYTE);
    procedure Add(const aValue: integer);{$IFDEF USE_INLINE}inline;{$ENDIF}
    procedure Add(const aValue: TMaskOpCode);{$IFDEF USE_INLINE}inline;{$ENDIF}
    procedure IncrementLastCounterBy(const aOpcode: TMaskOpCode; const aValue: integer);
  protected
    cCaseSensitive: Boolean;
    cMaskIsCompiled: Boolean;
    cMaskCompiled: TBytes;
    cMaskCompiledIndex: integer;
    cMaskCompiledAllocated: integer;
    cMaskCompiledLimit: integer;
    cMaskLimit: integer;
    cMatchStringLimit: integer;
    cMatchMinimumLiteralBytes: SizeInt;
    cMatchMaximumLiteralBytes: SizeInt;
    cMaskOpcodesAllowed: TMaskOpcodesSet;
    // EscapeChar forces next char to be a literal one, not a wildcard.
    cMaskEscapeChar: Char;
    procedure Compile; virtual;
    class procedure Exception_InvalidCharMask(const aMaskChar: string; const aOffset: integer=-1); static;
    class procedure Exception_MissingCloseChar(const aMaskChar: string; const aOffset: integer=-1); static;
    class procedure Exception_IncompleteMask(); static;
    class procedure Exception_InvalidEscapeChar(); static;
    procedure Exception_InternalError();
    function intfMatches(aMatchOffset: integer; aMaskIndex: integer): TMaskFailCause; virtual; abstract;
    property OPCodesAllowed: TMaskOpcodesSet read cMaskOpcodesAllowed write cMaskOpcodesAllowed;
  public
    constructor Create(aCaseSensitive: Boolean=false);
    constructor CreateAdvanced(aCaseSensitive: Boolean=false; aOpcodesAllowed: TMaskOpcodesSet=TMaskOpCodesAllAllowed);
    property CaseSensitive: Boolean read cCaseSensitive;
    property EscapeChar: Char read cMaskEscapeChar write SetMaskEscapeChar;
  end;

  { TMaskUTF8 }

  TMaskUTF8 = class (TMaskBase)
  private
    cMatchString: RawByteString;
  protected
    cOriginalMask: RawByteString;
    class function CompareUTF8Sequences(const P1,P2: PChar): integer; static;{$IFDEF USE_INLINE}inline;{$ENDIF}
    function intfMatches(aMatchOffset: integer; aMaskIndex: integer): TMaskFailCause; override;
  public
    constructor Create(const aMask: RawByteString; aCaseSensitive: Boolean = False);
    constructor CreateAdvanced(const aMask: RawByteString; aCaseSensitive: Boolean=false;
      aOpcodesAllowed: TMaskOpcodesSet=TMaskOpCodesAllAllowed);
    procedure Compile; override;
    function Matches(const aStringToMatch: RawByteString): Boolean; virtual;
    property Mask: RawByteString read cOriginalMask write cOriginalMask;
    property OPCodesAllowed;
  end;

  TMask = class(TMaskUTF8);

  { TMaskUTF8Windows }

  TMaskUTF8Windows=class(TMask)
  protected
    cMaskWindowsQuirkAllowed: TWindowsQuirkSet;
    cMaskWindowsQuirkInUse: TWindowsQuirkSet;
    cWindowsMask: RawByteString;
    class procedure SplitFileNameExtension(const aSourceFileName: RawByteString;
      out aFileName: RawByteString; out aExtension: RawByteString; aIsMask: Boolean=false); static;
  public
    constructor Create(const aMask: RawByteString; aCaseSensitive: Boolean = False);
    constructor CreateAdvanced(const aMask: RawByteString; aCaseSensitive: Boolean=false;
      aWindowsQuirksAllowed: TWindowsQuirkSet=TWindowsQuirksAllAllowed);
    procedure Compile; override;
    function Matches(const aFileName: RawByteString): Boolean; override;
    property Mask: RawByteString read cWindowsMask write cWindowsMask;
    property Quirks: TWindowsQuirkSet read cMaskWindowsQuirkAllowed write cMaskWindowsQuirkAllowed;
  end;

  TMaskWindows = class(TMaskUTF8Windows);

  { TParseStringList }

  TParseStringList = class(TStringList)
  public
    constructor Create(const AText, ASeparators: String);
  end;

  { TMaskList }

  TMaskList = class
  private
    FMasks: TObjectList;
    function GetCount: Integer;
    function GetItem(Index: Integer): TMask;
  public
    constructor Create(const AValue: String);
    constructor Create(const AValue: String; ASeparator: Char; CaseSensitive: Boolean = False);
    constructor CreateWindows(const AValue: String; ASeparator: Char; CaseSensitive: Boolean);
    constructor CreateSysNative(const AValue: String; ASeparator: Char; CaseSensitive: Boolean);
    destructor Destroy; override;

    function Matches(const AFileName: String): Boolean;
    // Don't call this. Create with TMaskList.CreateWindows, then call Matches.
    function MatchesWindowsMask(const AFileName: String): Boolean;

    property Count: Integer read GetCount;
    property Items[Index: Integer]: TMask read GetItem;
  end;

function MatchesMask(const FileName, Mask: String; CaseSensitive: Boolean = False): Boolean;
function MatchesWindowsMask(const FileName, Mask: String; CaseSensitive: Boolean = False): Boolean;

function MatchesMaskList(const FileName, Mask: String): Boolean;
function MatchesMaskList(const FileName, Mask: String; Separator: Char): Boolean;
function MatchesMaskList(const FileName, Mask: String; Separator: Char; CaseSensitive: Boolean): Boolean;
function MatchesWindowsMaskList(const FileName, Mask: String): Boolean;
function MatchesWindowsMaskList(const FileName, Mask: String; Separator: Char): Boolean;
function MatchesWindowsMaskList(const FileName, Mask: String; Separator: Char; CaseSensitive: Boolean): Boolean;


implementation

function MatchesMask(const FileName, Mask: String; CaseSensitive: Boolean): Boolean;
var
  AMask: TMask;
begin
  AMask := TMask.Create(Mask, CaseSensitive);
  try
    Result := AMask.Matches(FileName);
  finally
    AMask.Free;
  end;
end;

function MatchesWindowsMask(const FileName, Mask: String; CaseSensitive: Boolean): Boolean;
var
  AMask: TMaskWindows;
begin
  AMask := TMaskWindows.Create(Mask, CaseSensitive);
  try
    Result := AMask.Matches(FileName);
  finally
    AMask.Free;
  end;
end;

function MatchesMaskList(const FileName, Mask: String): Boolean;
begin
  Result := MatchesMaskList(FileName, Mask, ';', False);
end;

function MatchesMaskList(const FileName, Mask: String; Separator: Char): Boolean;
begin
  Result := MatchesMaskList(FileName, Mask, Separator, False);
end;

function MatchesMaskList(const FileName, Mask: String; Separator: Char; CaseSensitive: Boolean): Boolean;
var
  AMaskList: TMaskList;
begin
  AMaskList := TMaskList.Create(Mask, Separator, CaseSensitive);
  try
    Result := AMaskList.Matches(FileName);
  finally
    AMaskList.Free;
  end;
end;

function MatchesWindowsMaskList(const FileName, Mask: String): Boolean;
begin
  Result := MatchesWindowsMaskList(FileName, Mask, ';', False);
end;

function MatchesWindowsMaskList(const FileName, Mask: String; Separator: Char): Boolean;
begin
  Result := MatchesWindowsMaskList(FileName, Mask, Separator, False);
end;

function MatchesWindowsMaskList(const FileName, Mask: String; Separator: Char; CaseSensitive: Boolean): Boolean;
var
  AMaskList: TMaskList;
begin
  AMaskList := TMaskList.CreateWindows(Mask, Separator, CaseSensitive);
  try
    Result := AMaskList.Matches(FileName);
  finally
    AMaskList.Free;
  end;
end;

{ EMaskError }

constructor EMaskError.Create(const msg: string; aCode: TMaskExceptionCode);
begin
  CreateFmt(msg,[],aCode);
end;

constructor EMaskError.CreateFmt(const msg: string; args: array of const;
  aCode: TMaskExceptionCode);
begin
  cCode:=aCode;
  Inherited CreateFmt(msg,args);
end;

{ TMaskBase }

procedure TMaskBase.SetMaskEscapeChar(AValue: Char);
begin
  if cMaskEscapeChar=AValue then Exit;
  if cMaskEscapeChar>#127 then begin
    Exception_InvalidEscapeChar();
  end;
  cMaskEscapeChar:=AValue;
end;

procedure TMaskBase.Add(const aLength: integer; const aData: PBYTE);
var
  lCounter: integer;
begin
  if cMaskCompiledIndex+aLength>=cMaskCompiledAllocated then begin
    cMaskCompiledAllocated:=cMaskCompiledAllocated+aLength+GROW_BY;
    SetLength(cMaskCompiled,cMaskCompiledAllocated);
  end;
  for lCounter := 0 to Pred(aLength) do begin
    cMaskCompiled[cMaskCompiledIndex]:=(aData+lCounter)^;
    inc(cMaskCompiledIndex);
  end;
end;

procedure TMaskBase.Add(const aValue: integer);
begin
  Add(sizeof(aValue),@aValue);
end;

procedure TMaskBase.Add(const aValue: TMaskOpCode);
var
  v: BYTE;
begin
  v:=BYTE(aValue);
  Add(1,@v);
end;

procedure TMaskBase.IncrementLastCounterBy(const aOpcode: TMaskOpCode;
  const aValue: integer);
var
  p: PINTEGER;
begin
  cMaskCompiledIndex:=cMaskCompiledIndex-sizeof(aValue);
  if TMaskOpCode(cMaskCompiled[cMaskCompiledIndex-1])<>aOpcode then begin
    Exception_InternalError();
  end;
  P:=@cMaskCompiled[cMaskCompiledIndex];
  Add(P^+aValue);
end;

procedure TMaskBase.Compile;
begin
  cMaskIsCompiled:=true;
end;

class procedure TMaskBase.Exception_InvalidCharMask(const aMaskChar: string;
  const aOffset: integer);
begin
  if aOffset>=0 then begin
    raise EMaskError.CreateFmt(rsInvalidCharMaskAt, [aMaskChar, aOffset], eMaskException_InvalidCharMask);
  end else begin
    raise EMaskError.CreateFmt(rsInvalidCharMask, [aMaskChar], eMaskException_InvalidCharMask);
  end;
end;

class procedure TMaskBase.Exception_MissingCloseChar(const aMaskChar: string;
  const aOffset: integer);
begin
  if aOffset>=0 then begin
    raise EMaskError.CreateFmt(rsMissingCloseCharMaskAt, [aMaskChar, aOffset], eMaskException_MissingClose);
  end else begin
    raise EMaskError.CreateFmt(rsMissingCloseCharMask, [aMaskChar], eMaskException_MissingClose);
  end;
end;

class procedure TMaskBase.Exception_IncompleteMask();
begin
  raise EMaskError.CreateFmt(rsIncompleteMask, [], eMaskException_IncompleteMask);
end;

class procedure TMaskBase.Exception_InvalidEscapeChar();
begin
  raise EMaskError.Create(rsInvalidEscapeChar, eMaskException_InvalidEscapeChar);
end;

procedure TMaskBase.Exception_InternalError();
begin
  raise EMaskError.CreateFmt(rsInternalError, [self.ClassName], eMaskException_InternalError);
end;

constructor TMaskBase.CreateAdvanced(aCaseSensitive: Boolean;
  aOpcodesAllowed: TMaskOpcodesSet);
begin
  cMaskOpcodesAllowed:=aOpcodesAllowed;
  cCaseSensitive:=aCaseSensitive;
  cMaskEscapeChar:='\';
end;

constructor TMaskBase.Create(aCaseSensitive: Boolean);
begin
  CreateAdvanced(aCaseSensitive,TMaskOpCodesDefaultAllowed);
end;

{ TMask }

procedure TMaskUTF8.Compile;
var
  j: Integer;
  lCharsGroupInsertSize: integer;
  lCPLength: integer;
  lLast: TMaskOpCode;
  lMask: RawByteString;

begin
  inherited Compile;
  if cCaseSensitive then
    lMask:=cOriginalMask
  else
    lMask:=UTF8LowerCase(cOriginalMask);
  cMaskLimit:=Length(lMask);
  lLast:=TMaskOpCode.Literal;
  SetLength(cMaskCompiled,0);
  j:=1;
  while j<=cMaskLimit do begin
    lCPLength:=UTF8CodepointSizeFast(@lMask[j]);
    if (eMaskOpcodeEscapeChar in cMaskOpcodesAllowed) and (lMask[j]=cMaskEscapeChar) then begin
      // next is Literal
      inc(j,lCPLength);
      if j<=cMaskLimit then begin
        lCPLength:=UTF8CodepointSizeFast(@lMask[j]);
        Add(TMaskOpCode.Literal);
        Add(lCPLength,@lMask[j]);
        inc(cMatchMinimumLiteralBytes,lCPLength);
        if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then inc(cMatchMaximumLiteralBytes,lCPLength);
        lLast:=TMaskOpCode.Literal;
        inc(j,lCPLength);
      end else begin
        Exception_IncompleteMask();
      end;
    end else begin
      if lMask[j] in ['*','?','['] then begin
        case lMask[j] of
          '*':
            begin
              if eMaskOpcodeAnyText in cMaskOpcodesAllowed then begin
                if lLast<>TMaskOpCode.AnyCharToNext then begin
                  Add(TMaskOpCode.AnyCharToNext);
                  lLast:=TMaskOpCode.AnyCharToNext;
                  // * = No limit
                  cMatchMaximumLiteralBytes:=High(cMatchMaximumLiteralBytes);
                end;
              end else begin
                Add(TMaskOpCode.Literal);
                Add(lCPLength,@lMask[j]);
                inc(cMatchMinimumLiteralBytes,lCPLength);
                if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then inc(cMatchMaximumLiteralBytes,lCPLength);
                lLast:=TMaskOpCode.Literal;
              end;
            end;
          '?':
            begin
              if eMaskOpcodeAnyChar in cMaskOpcodesAllowed then begin
                Add(TMaskOpCode.AnyChar);
                inc(cMatchMinimumLiteralBytes,1);
                if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then inc(cMatchMaximumLiteralBytes,4);
                lLast:=TMaskOpCode.AnyChar;
              end else begin
                Add(TMaskOpCode.Literal);
                Add(lCPLength,@lMask[j]);
                inc(cMatchMinimumLiteralBytes,lCPLength);
                if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then inc(cMatchMaximumLiteralBytes,lCPLength);
                lLast:=TMaskOpCode.Literal;
              end;
            end;
          '[':
            begin
              if (eMaskOpcodeOptionalChar in cMaskOpcodesAllowed) or
                 (eMaskOpcodeRange in cMaskOpcodesAllowed) or
                 (eMaskOpcodeAnyCharOrNone in cMaskOpcodesAllowed)
                 then begin
                lLast:=TMaskOpCode.CharsGroupBegin;
                Add(TMaskOpCode.CharsGroupBegin);
                inc(cMatchMinimumLiteralBytes,1);
                if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then inc(cMatchMaximumLiteralBytes,4);
                lCharsGroupInsertSize:=cMaskCompiledIndex;
                Add(0);
                inc(j); // CP length is 1 because it is "["
                if j<cMaskLimit then begin
                  if (lMask[j]='!') and (eMaskOpcodeNegateGroup in cMaskOpcodesAllowed) then begin
                    Add(TMaskOpCode.Negate);
                    inc(j); // CP length is 1 because it is "!"
                    lLast:=TMaskOpCode.Negate;
                  end;
                end;

                while j<=cMaskLimit do begin
                  lCPLength:=UTF8CodepointSizeFast(@lMask[j]);

                  if (lMask[j]='?') and (eMaskOpcodeAnyCharOrNone in cMaskOpcodesAllowed) then begin
                    // This syntax is permitted [??] but not this one [?a] or [a?]
                    if (lLast=TMaskOpCode.CharsGroupBegin) or (lLast=TMaskOpCode.AnyCharOrNone) then begin
                      if lLast=TMaskOpCode.AnyCharOrNone then begin
                        // Increment counter
                        IncrementLastCounterBy(TMaskOpCode.AnyCharOrNone,1);
                      end else begin
                        Add(TMaskOpCode.AnyCharOrNone);
                        Add(1); // Counter
                        // Discount minimun bytes added at the "CharGroupBegin"
                        // because [?] could be 1 or 0 chars, so minimum is zero
                        // but the CharsGroupBegin assumes 1 char as all other
                        // masks replace the group by 1 char position.
                        // This code will run 1 time per group at maximun.
                        dec(cMatchMinimumLiteralBytes,1);
                        if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then dec(cMatchMaximumLiteralBytes,4);
                      end;
                      if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then inc(cMatchMaximumLiteralBytes,4);
                      lLast:=TMaskOpCode.AnyCharOrNone;
                    end else begin
                      Exception_InvalidCharMask(lMask[j],j);
                    end;

                  end else if (lLast=TMaskOpCode.AnyCharOrNone) and (lMask[j]<>']') then begin
                    //lMask[j] is not '?', but previous mask was '?' and it is an invalid sequence.
                    // "[??] = Valid" // "[a?] or [?a] = Invalid"
                    Exception_InvalidCharMask(lMask[j],j);

                  end else if ((j+lCPLength+1)<=cMaskLimit) and (lMask[j+lCPLength]='-') and (eMaskOpcodeRange in cMaskOpcodesAllowed) then begin
                    // j+lCPLength+1 --explained--
                    //------------------------------
                    // j+lCPLength is next UTF8 after current UTF8 CP
                    // +1 is at least one byte in UTF8 sequence after "-"
                    // Check if it is a range
                    Add(TMaskOpCode.Range);
                    // Check if reverse range is needed
                    {$IFDEF RANGES_AUTOREVERSE}
                    if CompareUTF8Sequences(@lMask[j],@lMask[j+lCPLength+1])<0 then begin
                      Add(lCPLength,@lMask[j]);
                      inc(j,lCPLength);
                      inc(j,1); // The "-"
                      lCPLength:=UTF8CodepointSizeFast(@lMask[j]);
                      Add(lCPLength,@lMask[j]);
                    end else begin
                      Add(UTF8CodepointSizeFast(@lMask[j+lCPLength+1]),@lMask[j+lCPLength+1]);
                      Add(lCPLength,@lMask[j]);
                      inc(j,lCPLength+1);
                      lCPLength:=UTF8CodepointSizeFast(@lMask[j]);
                    end;
                    {$ELSE}
                      Add(lCPLength,@lMask[j]);
                      inc(j,lCPLength);
                      inc(j,1); // The "-"
                      lCPLength:=UTF8CodepointSizeFast(@lMask[j]);
                      Add(lCPLength,@lMask[j]);
                    {$ENDIF}
                    lLast:=TMaskOpCode.Range;

                  end else if lMask[j]=']' then begin
                    if lLast=TMaskOpCode.CharsGroupBegin then begin
                      //Error empty match
                      Exception_InvalidCharMask(lMask[j],j);
                    end;
                    // Insert the new offset in case of a positive match in CharsGroup
                    PInteger(@cMaskCompiled[lCharsGroupInsertSize])^:=cMaskCompiledIndex;
                    Add(TMaskOpCode.CharsGroupEnd);
                    lLast:=TMaskOpCode.CharsGroupEnd;
                    break;
                  end else begin
                    Add(TMaskOpCode.OptionalChar);
                    Add(lCPLength,@lMask[j]);
                    lLast:=TMaskOpCode.OptionalChar;
                  end;
                  inc(j,lCPLength);
                end;
                if j>cMaskLimit then begin
                  Exception_MissingCloseChar(']',cMaskLimit);
                end;
              end else begin
                Add(TMaskOpCode.Literal);
                Add(lCPLength,@lMask[j]);
                inc(cMatchMinimumLiteralBytes,lCPLength);
                if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then inc(cMatchMaximumLiteralBytes,lCPLength);
                lLast:=TMaskOpCode.Literal;
              end;
            end;
        end;
      end else begin
        // Literal
        Add(TMaskOpCode.Literal);
        Add(lCPLength,@lMask[j]);
        inc(cMatchMinimumLiteralBytes,lCPLength);
        if cMatchMaximumLiteralBytes<High(cMatchMaximumLiteralBytes) then inc(cMatchMaximumLiteralBytes,lCPLength);
        lLast:=TMaskOpCode.Literal;
      end;
      inc(j,lCPLength);
    end;
  end;
  SetLength(cMaskCompiled,cMaskCompiledIndex);
  cMaskCompiledLimit:=cMaskCompiledIndex-1;
end;

class function TMaskUTF8.CompareUTF8Sequences(const P1, P2: PChar): integer;
var
  l1,l2: integer;
  l: integer;
begin
  l1:=UTF8CodepointSizeFast(p1);
  l2:=UTF8CodepointSizeFast(p2);
  Result:=0;
  l:=0;
  while (l<l1) and (l<l2) do begin
    Result:=Integer((P1+l)^)-integer((P2+l)^);
    if Result<>0 then exit;
    inc(l);
  end;
  Result:=l1-l2;
end;

function TMaskUTF8.intfMatches(aMatchOffset: integer; aMaskIndex: integer): TMaskFailCause;
var
  c1,c2: PChar;
  lFailCause: TMaskFailCause;
  lNegateCharGroup: Boolean;
  lSkipOnSuccessGroup: integer;
  t1: Boolean;
  j: integer;
  lTryCounter: integer;
begin
  lSkipOnSuccessGroup:=0;
  Result:=UnexpectedEnd;
  lNegateCharGroup:=false;
  while aMaskIndex<=cMaskCompiledLimit do begin
    case TMaskOpCode(cMaskCompiled[aMaskIndex]) of
      TMaskOpCode.Literal:
        begin
          if aMatchOffset>cMatchStringLimit then begin
            // Error, no char to match.
            Result:=TMaskFailCause.MatchStringExhausted;
            exit;
          end;
          inc(aMaskIndex);
          if CompareUTF8Sequences(@cMaskCompiled[aMaskIndex],@cMatchString[aMatchOffset])<>0 then begin
            Result:=TMaskFailCause.MaskNotMatch;
            Exit;
          end;
          inc(aMaskIndex,UTF8CodepointSizeFast(@cMaskCompiled[aMaskIndex]));
          inc(aMatchOffset,UTF8CodepointSizeFast(@cMatchString[aMatchOffset]));
        end;
      TMaskOpCode.AnyChar:
        begin
          inc(aMaskIndex);
          if aMatchOffset>cMatchStringLimit then begin
            // Error, no char to match.
            Result:=TMaskFailCause.MatchStringExhausted;
            exit;
          end;
          inc(aMatchOffset,UTF8CodepointSizeFast(@cMatchString[aMatchOffset]));
        end;
      TMaskOpCode.Negate:
        begin
          lNegateCharGroup:=true;
          inc(aMaskIndex);
        end;
      TMaskOpCode.CharsGroupBegin:
        begin
          lNegateCharGroup:=false;
          inc(aMaskIndex);
          lSkipOnSuccessGroup:=PInteger(@cMaskCompiled[aMaskIndex])^;
          inc(aMaskIndex,sizeof(integer));
        end;
      TMaskOpCode.CharsGroupEnd:
        begin
          if lNegateCharGroup then begin
            aMaskIndex:=lSkipOnSuccessGroup+1;
            inc(aMatchOffset,UTF8CodepointSizeFast(@cMatchString[aMatchOffset]));
          end else begin
            Result:=TMaskFailCause.MaskNotMatch;
            exit;
          end;
        end;
      TMaskOpCode.OptionalChar:
        begin
          inc(aMaskIndex);
          if aMatchOffset>cMatchStringLimit then begin
            // Error, no char to match.
            Result:=TMaskFailCause.MatchStringExhausted;
            exit;
          end;
          if CompareUTF8Sequences(@cMaskCompiled[aMaskIndex],@cMatchString[aMatchOffset])=0 then begin
            if lNegateCharGroup then begin
              Result:=TMaskFailCause.MaskNotMatch;
              exit;
            end;
            aMaskIndex:=lSkipOnSuccessGroup+1;
            inc(aMatchOffset,UTF8CodepointSizeFast(@cMatchString[aMatchOffset]));
          end else begin
            inc(aMaskIndex,UTF8CodepointSizeFast(@cMaskCompiled[aMaskIndex]));
          end;
        end;
      TMaskOpCode.Range:
        begin
          if aMatchOffset>cMatchStringLimit then begin
            // Error, no char to match.
            Result:=TMaskFailCause.MatchStringExhausted;
            exit;
          end;
          inc(aMaskIndex);
          c1:=@cMaskCompiled[aMaskIndex];
          inc(aMaskIndex,UTF8CodepointSizeFast(C1));
          c2:=@cMaskCompiled[aMaskIndex];
          inc(aMaskIndex,UTF8CodepointSizeFast(C2));
          t1:=(CompareUTF8Sequences(@cMatchString[aMatchOffset],c1)>=0) and
              (CompareUTF8Sequences(@cMatchString[aMatchOffset],c2)<=0);
          if t1 then begin
            if not lNegateCharGroup then begin
              //Jump to CharsGroupEnd+1 because if CharsGroupEnd is reached
              //it means that all optional chars and ranges have not matched the string.
              aMaskIndex:=lSkipOnSuccessGroup+1;
              inc(aMatchOffset,UTF8CodepointSizeFast(@cMatchString[aMatchOffset]));
            end else begin
              Result:=TMaskFailCause.MaskNotMatch;
              exit;
            end;
          end
        end;
      TMaskOpCode.AnyCharToNext:
        begin
          // if last is "*", everything in remain data matches
          if aMaskIndex=cMaskCompiledLimit then begin
            Result:=TMaskFailCause.Success;
            exit;
          end;
          if aMatchOffset>cMatchStringLimit then begin
            if aMaskIndex=cMaskCompiledLimit then begin
              Result:=TMaskFailCause.Success;
              exit;
            end;
            Result:=TMaskFailCause.MatchStringExhausted;
            exit;
          end;
          inc(aMaskIndex);
          while aMatchOffset<=cMatchStringLimit do begin
            lFailCause:=intfMatches(aMatchOffset,aMaskIndex);
            if lFailCause=TMaskFailCause.Success then begin
              Result:=TMaskFailCause.Success;
              exit;
            end else if lFailCause=TMaskFailCause.MatchStringExhausted then begin
              Result:=TMaskFailCause.MatchStringExhausted;
              exit;
            end;
            inc(aMatchOffset,UTF8CodepointSizeFast(@cMatchString[aMatchOffset]));
          end;
          Result:=TMaskFailCause.MatchStringExhausted;
          exit;
        end;
      TMaskOpCode.AnyCharOrNone:
        begin
          inc(aMaskIndex);
          lTryCounter:=PInteger(@cMaskCompiled[aMaskIndex])^;
          inc(aMaskIndex,sizeof(integer));
          if TMaskOpCode(cMaskCompiled[aMaskIndex])<>TMaskOpCode.CharsGroupEnd then begin
            Exception_InternalError();
          end else begin
            aMaskIndex:=lSkipOnSuccessGroup+1;
          end;

          // Try to match remain mask eating, 0,1,2,...,lTryCounter chars.
          for j := 0 to lTryCounter do begin
            if aMatchOffset>cMatchStringLimit then begin
              if aMaskIndex=cMaskCompiledLimit+1 then begin
                Result:=TMaskFailCause.Success;
                exit;
              end;
              Result:=TMaskFailCause.MatchStringExhausted;
              exit;
            end;
            lFailCause:=intfMatches(aMatchOffset,aMaskIndex);
            if lFailCause=TMaskFailCause.Success then begin
              Result:=TMaskFailCause.Success;
              exit;
            end else if lFailCause=TMaskFailCause.MatchStringExhausted then begin
              Result:=TMaskFailCause.MatchStringExhausted;
              exit;
            end;
            inc(aMatchOffset,UTF8CodepointSizeFast(@cMatchString[aMatchOffset]));
          end;
          Result:=TMaskFailCause.MatchStringExhausted;
          exit;
        end;
      else
        begin
          Exception_InternalError();
        end;
    end;
  end;
  if (aMaskIndex>cMaskCompiledLimit) and (aMatchOffset>cMatchStringLimit) then begin
    Result:=TMaskFailCause.Success;
  end else begin
    if aMaskIndex>cMaskCompiledLimit then begin
      Result:=TMaskFailCause.MaskExhausted;
    end else begin
      Result:=TMaskFailCause.MatchStringExhausted;
    end;
  end;
end;

constructor TMaskUTF8.Create(const aMask: RawByteString; aCaseSensitive: Boolean);
begin
  inherited Create(aCaseSensitive);
  cOriginalMask:=aMask;
end;

constructor TMaskUTF8.CreateAdvanced(const aMask: RawByteString;
  aCaseSensitive: Boolean; aOpcodesAllowed: TMaskOpcodesSet);
begin
  inherited CreateAdvanced(aCaseSensitive,aOpcodesAllowed);
  cOriginalMask:=aMask;
end;

function TMaskUTF8.Matches(const aStringToMatch: RawByteString): Boolean;
begin
  if not cMaskIsCompiled then Compile;
  if cCaseSensitive then
    cMatchString:=aStringToMatch
  else
    cMatchString:=UTF8LowerCase(aStringToMatch);
  cMatchStringLimit:=length(cMatchString);
  if (cMatchStringLimit>=cMatchMinimumLiteralBytes)
  and (cMatchStringLimit<=cMatchMaximumLiteralBytes) then
    Result:=intfMatches(1,0)=TMaskFailCause.Success
  else
    Result:=false; // There are too many or not enough bytes to match the string
end;

{ TMaskWindows }

class procedure TMaskUTF8Windows.SplitFileNameExtension(
  const aSourceFileName: RawByteString; out aFileName: RawByteString;
  out aExtension: RawByteString; aIsMask: Boolean);
var
  j: Integer;
  lLowLimit: integer;
begin
  // Default values
  aFileName:=aSourceFileName;
  aExtension:='';

  // This is because .foo is considered a file name ".foo" as one.
  if aIsMask then begin
    lLowLimit:=0;
  end else begin
    lLowLimit:=1;
  end;

  j:=Length(aSourceFileName);
  while j>lLowLimit do begin
    if aSourceFileName[j]='.' then begin
      aFileName:=copy(aSourceFileName,1,j-1);
      aExtension:=copy(aSourceFileName,j);
      break;
    end;
    dec(j);
  end;
end;

constructor TMaskUTF8Windows.Create(const aMask: RawByteString; aCaseSensitive: Boolean);
begin
  CreateAdvanced(aMask,aCaseSensitive,TWindowsQuirksDefaultAllowed);
  Compile;
end;

constructor TMaskUTF8Windows.CreateAdvanced(const aMask: RawByteString;
  aCaseSensitive: Boolean; aWindowsQuirksAllowed: TWindowsQuirkSet);
begin
  cMaskWindowsQuirkAllowed:=aWindowsQuirksAllowed;
  cWindowsMask:=aMask;
  inherited CreateAdvanced(aMask,aCaseSensitive,TMaskOpCodesAllAllowed);
end;

procedure TMaskUTF8Windows.Compile;

  function OptionalQMarksAtEnd(aMask: RawByteString): RawByteString;
  var
    lCounter: integer;
    k: integer;
  begin
    lCounter:=0;
    for k := Length(aMask) downto 1 do begin
      if aMask[k]='?' then begin
        inc(lCounter);
      end else begin
        break;
      end;
    end;
    if lCounter>0 then begin
      aMask:=copy(aMask,1,Length(aMask)-lCounter)+'['+StringOfChar('?',lCounter)+']';
    end;
    Result:=aMask;
  end;

  function EscapeSpecialChars(const aString: RawByteString): RawByteString;
  var
    j: integer;
  begin
    Result:=aString;
    for j := Length(Result) downto 1 do begin
      if Result[j] in ['[',']',cMaskEscapeChar] then begin
        // Escape the []\ chars as in Windows mask mode they are plain chars.
        insert(cMaskEscapeChar,Result,j);
      end;
    end;
  end;

var
  lFileNameMask: RawByteString;
  lExtensionMask: RawByteString;
  lModifiedMask: RawByteString;

begin
  lModifiedMask:=cWindowsMask;

  // Quirk "blah.*" = "blah*"
  if eWindowsQuirk_AnyExtension in cMaskWindowsQuirkAllowed then begin
    if RightStr(lModifiedMask,3)='*.*' then begin
      lModifiedMask:=copy(lModifiedMask,1,Length(lModifiedMask)-2);
      cMaskWindowsQuirkInUse:=cMaskWindowsQuirkInUse+[eWindowsQuirk_AnyExtension];
    end;
  end;

  SplitFileNameExtension(lModifiedMask,lFileNameMask,lExtensionMask,true);

  // Quirk "blah.abc" = "blah.abc*"
  if eWindowsQuirk_Extension3More in cMaskWindowsQuirkAllowed then begin
    if (Length(lExtensionMask)=4) and (Length(lFileNameMask)>0) then begin
      lExtensionMask:=lExtensionMask+'*';
      cMaskWindowsQuirkInUse:=cMaskWindowsQuirkInUse+[eWindowsQuirk_Extension3More];
    end;
  end;

  // Quirk "" = "*"
  if (Length(lFileNameMask)=0) and (Length(lExtensionMask)=0) then begin
    if eWindowsQuirk_EmptyIsAny in cMaskWindowsQuirkAllowed then begin
      lFileNameMask:='*';
      cMaskWindowsQuirkInUse:=cMaskWindowsQuirkInUse+[eWindowsQuirk_EmptyIsAny];
    end;
  end else begin
  // Quirk ".abc"
    if eWindowsQuirk_AllByExtension in cMaskWindowsQuirkAllowed then begin
      if (Length(lFileNameMask)=0) and (length(lExtensionMask)>0) then begin
        if lExtensionMask[1]='.' then begin
          lFileNameMask:='*';
          cMaskWindowsQuirkInUse:=cMaskWindowsQuirkInUse+[eWindowsQuirk_AllByExtension];
        end;
      end;
    end;
  end;

  lFileNameMask:=EscapeSpecialChars(lFileNameMask);
  lExtensionMask:=EscapeSpecialChars(lExtensionMask);

  // Quirk "file???.ab?" matches "file1.ab1" and "file123.ab"
  if eWindowsQuirk_FilenameEnd in cMaskWindowsQuirkAllowed then begin
    lFileNameMask:=OptionalQMarksAtEnd(lFileNameMask);
    lExtensionMask:=OptionalQMarksAtEnd(lExtensionMask);
  end;

  if eWindowsQuirk_NoExtension in cMaskWindowsQuirkAllowed then begin
    if Length(lExtensionMask)=1 then begin
      cMaskWindowsQuirkInUse:=[eWindowsQuirk_NoExtension];
      lExtensionMask:='';
    end;
  end;

  inherited Mask:=lFileNameMask+lExtensionMask;
  inherited Compile;
end;

function TMaskUTF8Windows.Matches(const aFileName: RawByteString): Boolean;
var
  lFileName, lExtension: RawByteString;
begin
  if eWindowsQuirk_NoExtension in cMaskWindowsQuirkInUse then begin
    SplitFileNameExtension(aFileName,lFileName,lExtension,false);
    // eWindowsQuirk_NoExtension = Empty extension
    if lExtension<>'' then exit(false);
  end;
  Result:=Inherited Matches(aFileName);
end;


{ TParseStringList }

constructor TParseStringList.Create(const AText, ASeparators: String);
var
  I, S: Integer;
begin
  inherited Create;
  S := 1;
  for I := 1 to Length(AText) do
  begin
    if Pos(AText[I], ASeparators) > 0 then
    begin
      if I > S then
        Add(Copy(AText, S, I - S));
      S := I + 1;
    end;
  end;
  if Length(AText) >= S then
    Add(Copy(AText, S, Length(AText) - S + 1));
end;

{ TMaskList }

constructor TMaskList.Create(const AValue: String);
begin
  Create(AValue, ';', False);
end;

constructor TMaskList.Create(const AValue: String; ASeparator: Char; CaseSensitive: Boolean);
var
  S: TParseStringList;
  I: Integer;
begin
  FMasks := TObjectList.Create(True);
  S := TParseStringList.Create(AValue, ASeparator);
  try
    for I := 0 to S.Count - 1 do
      FMasks.Add(TMask.Create(S[I], CaseSensitive));
  finally
    S.Free;
  end;
end;

constructor TMaskList.CreateWindows(const AValue: String; ASeparator: Char; CaseSensitive: Boolean);
var
  S: TParseStringList;
  I: Integer;
begin
  FMasks := TObjectList.Create(True);
  S := TParseStringList.Create(AValue, ASeparator);
  try
    for I := 0 to S.Count - 1 do
      FMasks.Add(TMaskWindows.Create(S[I], CaseSensitive));
  finally
    S.Free;
  end;
end;

constructor TMaskList.CreateSysNative(const AValue: String; ASeparator: Char; CaseSensitive: Boolean);
begin
  {$IFDEF Windows}
  CreateWindows(AValue, ASeparator, CaseSensitive);
  {$ELSE}
  Create(AValue, ASeparator, CaseSensitive);
  {$ENDIF}
end;

destructor TMaskList.Destroy;
begin
  FMasks.Free;
  inherited Destroy;
end;

function TMaskList.GetItem(Index: Integer): TMask;
begin
  Result := TMask(FMasks.Items[Index]);
end;

function TMaskList.GetCount: Integer;
begin
  Result := FMasks.Count;
end;

function TMaskList.Matches(const AFileName: String): Boolean;
var
  I: Integer;
begin
  Result := False;
  for I := 0 to FMasks.Count - 1 do
  begin
    if TMask(FMasks.Items[I]).Matches(AFileName) then
    begin
      Result := True;
      Exit;
    end;
  end;
end;

function TMaskList.MatchesWindowsMask(const AFileName: String): Boolean;
begin
  raise Exception.Create('Create with TMaskList.CreateWindows, then call Matches.');
end;

end.

-- 
_______________________________________________
lazarus mailing list
lazarus@lists.lazarus-ide.org
https://lists.lazarus-ide.org/listinfo/lazarus

Reply via email to