unit UnitHTMLConvert;

//
// While this was a decent idea, it's much better to just get a copy of an HTML
// clip in RichText as well
//

interface

uses VCL.ComCtrls, VCL.Graphics, System.Classes, System.SysUtils;

procedure HTMLtoRTF(html: string; var rtf: TRichedit);

implementation

uses System.Generics.Collections,  UnitToken;

type THTMLTagType = (
    HTML_FONT,
    HTML_H1,HTML_H2,HTML_H3,HTML_H4,HTML_H5,HTML_H6,
    HTML_B,
    HTML_EM,
    HTML_STRONG,
    HTML_U,
    HTML_I,
    HTML_UL,
    HTML_BR,
    HTML_LI,
    HTML_OTHER
);

const
    THTMLHeaderType = [HTML_H1,HTML_H2,HTML_H3,HTML_H4,HTML_H5,HTML_H6];
    THTMLStyleStype =   [HTML_B,HTML_EM,HTML_STRONG,HTML_U,HTML_I];

procedure HTMLtoRTF(html: string; var rtf: TRichedit);
var
    FontStack : TStack<TFont>;
    currentFont: TFont;

    function GetTag(s: string; var i: Integer; var Tag, tagparams: string): Boolean;
    var
        inTag: Boolean;
    begin
        GetTag  := False;
        Tag  := '';
        tagparams := '';
        inTag  := true;

        while i <= Length(s) do begin
            Inc(i);
            if s[i] in ['<','>'] then begin
                result := s[i] = '>';
                EXIT;
            end;

            if inTag and (s[i]=' ') then begin
                inTag := (tag = '');
            end else if InTag then begin
                Tag := Tag + s[i];
            end else begin
                tagparams := tagparams + s[i];
            end;
        end;
    end;
    function TagToType(name : string) : THTMLTagType;
    begin
        result := HTML_OTHER;
        name := lowercase(name);

        if (length(name)=2) and (name[1]='h') then begin
            try
                case StrToInt(name[2]) of
                1: result := HTML_H1;
                2: result := HTML_H2;
                3: result := HTML_H3;
                4: result := HTML_H4;
                5: result := HTML_H5;
                6: result := HTML_H6;
                else begin
                    result := HTML_H6; // default to largest
                end;
                end;
            except
                EXIT;
            end;
        end else if name='font' then begin
            result := HTML_FONT;
        end else if (name='b') or (name='em') or (name='strong') then begin
            result := HTML_B;
        end else if name='u' then begin
            result := HTML_U;
        end else if name='i' then begin
            result := HTML_i;
        end else if name='ul' then begin
            result := HTML_UL
        end else if name='br' then begin
            result := HTML_BR
        end;
    end;
    function IsCloseTag(tag:string) : boolean;
    begin
        result := false;
        tag := trim(tag);
        if tag = '' then EXIT;
        result := tag[1] = '/';
    end;

    function HtmlToColor(Color: string): TColor;
    begin
        Result := StringToColor(
            '$' +
            Copy(Color, 6, 2) +
            Copy(Color, 4,2) +
            Copy(Color, 2, 2)
        );
    end;
    function CalculateRTFSize(pt: Integer): Integer;
    begin
        result := 30;
        case pt of
        1: Result := 6;
        2: Result := 9;
        3: Result := 12;
        4: Result := 15;
        5: Result := 18;
        6: Result := 22;
        end;
    end;

    procedure NewCurrentFont(size : integer=0);
    var fs : TFontStyles;
    begin
        FontStack.Push(currentfont);
        fs := currentfont.Style;
        currentfont := TFont.Create;
        currentfont.Assign(rtf.Font);
        currentfont.Style := fs;
        if size <> 0 then
            currentfont.Size := size;
    end;
    procedure RestoreCurrentFont;
    begin
        currentfont.Free;
        currentfont := FontStack.Pop
    end;
    procedure RenderText(s : string);
    var
        i,j : integer;
    begin
        if (s = '') or (trim(s) = '') then EXIT;
        s := PChar(s);
        i := length(rtf.Text);
        rtf.Lines.Add(s);
        j := length(rtf.Text);
        rtf.SelStart := i;
        rtf.SelLength := j - i;
        rtf.SelAttributes.Style := currentfont.Style;
        rtf.SelAttributes.Size := currentfont.Size;
        rtf.SelAttributes.Color := currentfont.Color;
    end;
var
   row: Integer;

   tag, tagparams: string;
   params: TStringList;
   wordwrap, list: Boolean;
   tagtype : THTMLTagType;
   text : string;
begin
   if Length(html) = 0 then EXIT;

   FontStack := TStack<TFont>.create;

   rtf.Lines.BeginUpdate;
   rtf.Lines.Clear;
   wordwrap  := rtf.wordwrap;
   rtf.WordWrap := False;

   rtf.Lines.Add('');
   Params := TStringList.Create;

   currentfont := TFont.Create;
   currentfont.Assign(rtf.Font);

   row := 0;
   List := False;
   rtf.selstart := 0;

   while html <> '' do begin
        text := UnitToken.TokenString(html,'<', false);
        if html = '' then begin
            RenderText(text);
            BREAK;
        end;
        tag := UnitToken.TokenString(html,'>', false);
        if html='' then begin
            RenderText(text);
            BREAK;
        end;
        RenderText(text);

        tagparams := tag;
        tag := UnitToken.TokenString(tagparams,' ', false);
        tagtype := TagToType(tag);

        if tagparams <> '' then begin
            params.Text := tagparams;
        end;


        tagtype := TagToType(tag);
        if tagtype in THTMLHeaderType then begin
            if IsCloseTag(tag) then begin
                RestoreCurrentFont;
            end else begin
                Case tagtype of
                HTML_H1: NewCurrentFont(6);
                HTML_H2: NewCurrentFont(9);
                HTML_H3: NewCurrentFont(12);
                HTML_H4: NewCurrentFont(15);
                HTML_H5: NewCurrentFont(18);
                HTML_H6: NewCurrentFont(22);
                End;
            end;
        end else if tagtype in THTMLStyleStype then begin
            if IsCloseTag(tag)  then begin
                RestoreCurrentFont;
                case tagtype of
                HTML_B,HTML_EM,HTML_STRONG: currentfont.Style := currentfont.Style - [fsbold];
                HTML_U: currentfont.Style := currentfont.Style - [fsUnderline];
                HTML_I: currentfont.Style := currentfont.Style - [fsItalic];
                end;
            end else begin
                NewCurrentFont;
                case tagtype of
                HTML_B,HTML_EM,HTML_STRONG: currentfont.Style := currentfont.Style + [fsbold];
                HTML_U:  currentfont.Style := currentfont.Style + [fsUnderline];
                HTML_I:  currentfont.Style := currentfont.Style + [fsItalic];
                end;
            end;
        end else begin
            case tagtype of
                HTML_FONT: begin
                    NewCurrentFont;
                    if params.Values['size'] <> '' then
                        currentfont.Size := CalculateRTFSize(StrToInt(params.Values['size']));
                    if params.Values['color'] <> '' then currentfont.Color :=
                        htmltocolor(params.Values['color']);
                end;
                HTML_UL: begin
                    if IsCloseTag(tag) then begin
                        list := False;
                        rtf.Lines.Add('');
                        Inc(row);
                        rtf.Lines.Add('');
                        Inc(row);
                    end else begin
                        list := True
                    end;
                end;
                HTML_BR, HTML_LI: begin
                    rtf.Lines.Add('');
                    Inc(row);
                end;
            end;
        end;
   end;
end;

end.
