0

How to iterate individual chars in UTF8String? I need to print out individual chars not bytes..

program Project3;

uses
  System.SysUtils;

var
  Str: UTF8String;
  I: Integer;
begin
  Str := 'Декат';
  for I := 0 to Length(Str) do
  WriteLn(Str[I]);
  ReadLn;
end.
John Lewis
  • 337
  • 3
  • 12

1 Answers1

5

Here is a working demo:

program Project11;

{$APPTYPE CONSOLE}

uses
  Windows, SysUtils;

function GetNextChar(const S: UTF8String; N: Integer): Integer;
var
  B: Byte;

begin
  if N > Length(S) then begin
    Result:= -1;
    Exit;
  end;
  B:= Byte(S[N]);
  if (B and $80 = 0 ) then
    Result:= N + 1
  else if (B and $E0 = $C0) then
    Result:= N + 2
  else if (B and $F0 = $E0) then
    Result:= N + 3
  else if (B and $F8 = $F0) then
    Result:= N + 4
  else
    Result:= -1; // invalid code
end;

procedure Test;
var
  S: UTF8String;
  S1: UTF8String;
  N, M: Integer;

begin
  S:= 'Декат';
  N:= 1;
  SetConsoleOutputCP(CP_UTF8);
  Writeln(S);
  while True do begin
    M:= GetNextChar(S, N);
    if M < 0 then Break;
    S1:= Copy(S, N, M - N);
    Writeln(N, ':  ', S1);
    N:= M;
  end;
end;

begin
  try
    Readln; // Select consolas font here
    Test;
  except
    on E: Exception do
      Writeln(E.ClassName, ': ', E.Message);
  end;
  Readln;
end.
kludg
  • 27,213
  • 5
  • 67
  • 118