On Tue, Dec 28, 2021 at 12:08 PM Martin Frb via lazarus <lazarus@lists.lazarus-ide.org> wrote:
> I would like to see the generates assembler on M1, if that is possible? (for > code with optimization off, as well as code with whatever optimization was > used so far) @Noel: Here's example code (standalone) you can use to test both signed and unsigned versions. Save this code as ulen.pas In order to get assembler output compile with: fpc -al ulen.pas This will produce the file ulen.s You can attach or copy that here. ============= program ulen; {$mode objfpc}{$H+} {$optimization off} {$codepage utf8} uses SysUtils; function UTF8LengthFast_Signed(p: PChar; ByteCount: PtrInt): PtrInt; const {$ifdef CPU32} ONEMASK =$01010101; EIGHTYMASK=$80808080; {$endif} {$ifdef CPU64} ONEMASK =$0101010101010101; EIGHTYMASK=$8080808080808080; {$endif} var pnx: PPtrInt absolute p; // To get contents of text in PtrInt blocks. x refers to 32 or 64 bits pn8: pint8 absolute pnx; // To read text as Int8 in the initial and final loops ix: PtrInt absolute pnx; // To read text as PtrInt in the block loop nx: PtrInt; // values processed in block loop i,cnt,e: PtrInt; begin Result := 0; e := ix+ByteCount; // End marker // Handle any initial misaligned bytes. cnt := (not (ix-1)) and (sizeof(PtrInt)-1); if cnt>ByteCount then cnt := ByteCount; for i := 1 to cnt do begin // Is this byte NOT the first byte of a character? //writeln('pn8^ = ',byte(pn8^).ToBinString); //writeln('pn8^ shr 7 = ',Byte(Byte(pn8^) shr 7).ToBinString); //writeln('not pn8^ = ',Byte(not pn8^).ToBinString); //writeln('(not pn8^) shr 6 = ',Byte((not pn8^) shr 6).ToBinString); //writeln; Result += (pn8^ shr 7) and ((not pn8^) shr 6); inc(pn8); end; // Handle complete blocks for i := 1 to (ByteCount-cnt) div sizeof(PtrUInt) do begin // Count bytes which are NOT the first byte of a character. nx := ((pnx^ and EIGHTYMASK) shr 7) and ((not pnx^) shr 6); {$push}{$overflowchecks off} // "nx * ONEMASK" causes an arithmetic overflow. Result += (nx * ONEMASK) >> ((sizeof(PtrInt) - 1) * 8); {$pop} inc(pnx); end; // Take care of any left-over bytes. while ix<e do begin // Is this byte NOT the first byte of a character? //writeln('pn8^ = ',byte(pn8^).ToBinString); //writeln('pn8^ shr 7 = ',Byte(Byte(pn8^) shr 7).ToBinString); //writeln('not pn8^ = ',Byte(not pn8^).ToBinString); //writeln('(not pn8^) shr 6 = ',Byte((not pn8^) shr 6).ToBinString); //writeln; //writeln('',); Result += (pn8^ shr 7) and ((not pn8^) shr 6); inc(pn8); end; Result := ByteCount - Result; end; function UTF8LengthFast_Unsigned(p: PChar; ByteCount: PtrInt): PtrInt; const {$ifdef CPU32} ONEMASK =$01010101; EIGHTYMASK=$80808080; {$endif} {$ifdef CPU64} ONEMASK =$0101010101010101; EIGHTYMASK=$8080808080808080; {$endif} var pnx: PPtrUInt absolute p; // To get contents of text in PtrInt blocks. x refers to 32 or 64 bits pn8: puint8 absolute pnx; // To read text as Int8 in the initial and final loops ix: PtrUInt absolute pnx; // To read text as PtrInt in the block loop nx: PtrUInt; // values processed in block loop i,cnt,e: PtrInt; begin Result := 0; e := ix+PtrUInt(ByteCount); // End marker // Handle any initial misaligned bytes. cnt := (not (ix-1)) and (sizeof(PtrInt)-1); if cnt>ByteCount then cnt := ByteCount; for i := 1 to cnt do begin // Is this byte NOT the first byte of a character? //writeln('pn8^ = ',byte(pn8^).ToBinString); //writeln('pn8^ shr 7 = ',Byte(Byte(pn8^) shr 7).ToBinString); //writeln('not pn8^ = ',Byte(not pn8^).ToBinString); //writeln('(not pn8^) shr 6 = ',Byte((not pn8^) shr 6).ToBinString); //writeln; Result += (pn8^ shr 7) and ((not pn8^) shr 6); inc(pn8); end; // Handle complete blocks for i := 1 to (ByteCount-cnt) div sizeof(PtrUInt) do begin // Count bytes which are NOT the first byte of a character. nx := ((pnx^ and EIGHTYMASK) shr 7) and ((not pnx^) shr 6); {$push}{$overflowchecks off} // "nx * ONEMASK" causes an arithmetic overflow. Result += (nx * ONEMASK) >> ((sizeof(PtrInt) - 1) * 8); {$pop} inc(pnx); end; // Take care of any left-over bytes. while ix<e do begin // Is this byte NOT the first byte of a character? //writeln('pn8^ = ',byte(pn8^).ToBinString); //writeln('pn8^ shr 7 = ',Byte(Byte(pn8^) shr 7).ToBinString); //writeln('not pn8^ = ',Byte(not pn8^).ToBinString); //writeln('(not pn8^) shr 6 = ',Byte((not pn8^) shr 6).ToBinString); //writeln; //writeln('',); Result += (pn8^ shr 7) and ((not pn8^) shr 6); inc(pn8); end; Result := ByteCount - Result; end; var Len: PtrInt; Euro: String; begin DefaultSystemCodePage := CP_UTF8; Euro := '€'; writeln('Signed version'); Len := Utf8LengthFast_Signed(PChar(Euro), Length(Euro)); writeln('Len = ',Len); writeln('Unsigned version'); Len := Utf8LengthFast_Unsigned(PChar(Euro), Length(Euro)); writeln('Len = ',Len); end. ========================== -- Bart -- _______________________________________________ lazarus mailing list lazarus@lists.lazarus-ide.org https://lists.lazarus-ide.org/listinfo/lazarus