On Tue, 26 Jul 2016 11:01:28 +0200 Jonas Maebe <jonas.ma...@elis.ugent.be> wrote:
>[...] > Could you try the same program with u1 as plain ansistring instead of > utf8string? (with an additional > "setcodepage(rawbytestring(u1),65001,false);" after assigning u1) Sure: {$APPTYPE CONSOLE} type tcp866 = type ansistring(866); var s1, s2: tcp866; u1: UTF8String; r1: RawByteString; a1, a2: AnsiString; begin s1:='cp866'; setcodepage(rawbytestring(s1),65001,false); Writeln('s1 = "', s1, '" cp = ', StringCodePage(s1)); a1:='acp'; setcodepage(rawbytestring(a1),65001,false); Writeln('a1 = "', a1, '" cp = ', StringCodePage(a1)); u1:='utf8'; Writeln('u1 = "', u1, '" cp = ', StringCodePage(u1)); s2:=s1+u1; Writeln('s2:=s1+u1 = "', s2, '" cp = ', StringCodePage(s2)); s2:=u1+s1; Writeln('s2:=u1+s1 = "', s2, '" cp = ', StringCodePage(s2)); r1:=s1+u1; Writeln('r1:=s1+u1 = "', r1, '" cp = ', StringCodePage(r1)); r1:=u1+s1; Writeln('r1:=u1+s1 = "', r1, '" cp = ', StringCodePage(r1)); a2:=s1+u1; Writeln('a2:=s1+u1 = "', a2, '" cp = ', StringCodePage(a2)); a2:=u1+s1; Writeln('a2:=u1+s1 = "', a2, '" cp = ', StringCodePage(a2)); s2:=s1+a1; Writeln('s2:=s1+a1 = "', s2, '" cp = ', StringCodePage(s2)); s2:=a1+s1; Writeln('s2:=a1+s1 = "', s2, '" cp = ', StringCodePage(s2)); r1:=s1+a1; Writeln('r1:=s1+a1 = "', r1, '" cp = ', StringCodePage(r1)); r1:=a1+s1; Writeln('r1:=a1+s1 = "', r1, '" cp = ', StringCodePage(r1)); a2:=s1+a1; Writeln('a2:=s1+a1 = "', a2, '" cp = ', StringCodePage(a2)); a2:=a1+s1; Writeln('a2:=a1+s1 = "', a2, '" cp = ', StringCodePage(a2)); readln; end. s1 = "cp866" cp = 65001 a1 = "acp" cp = 65001 u1 = "utf8" cp = 65001 s2:=s1+u1 = "cp866utf8" cp = 866 s2:=u1+s1 = "utf8cp866" cp = 866 r1:=s1+u1 = "cp866utf8" cp = 1252 r1:=u1+s1 = "utf8cp866" cp = 1252 a2:=s1+u1 = "cp866utf8" cp = 1252 a2:=u1+s1 = "utf8cp866" cp = 1252 s2:=s1+a1 = "cp866acp" cp = 866 s2:=a1+s1 = "acpcp866" cp = 866 r1:=s1+a1 = "cp866acp" cp = 1252 r1:=a1+s1 = "acpcp866" cp = 1252 a2:=s1+a1 = "cp866acp" cp = 1252 a2:=a1+s1 = "acpcp866" cp = 1252 It seems the Delphi rules for non rawbytestrings are: - Concatenate two same declared strings: append bytes, copy dyn. cp from left operand. Declared cp of result is left operand. - Assign same declared: no conversion, only refcount. - Concatenate two different declared strings: convert both to UnicodeString and append. Maybe there is an optimization for same dyn cp. - Assign different declared strings: convert to LHS. Mattias _______________________________________________ fpc-pascal maillist - fpc-pascal@lists.freepascal.org http://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal