unit UnitToken;
{
    Purpose:
        String utils related to tokenization.
        Just look at all the bad naming conventions - shamefull.

    Updates:
        Included apostropes in wordChars for words like
        "Can't" and "Don't".

}

interface

function TokenString(var s: string; delim: string; TrimSpace : boolean = true) : string;
function TokenStringStuffed(var s: string; delim: char; TrimSpace : boolean = true) : string;
function clipOneChar(s : string) : string;
function clipManyChars(s : string; clip : string) : string; overload;
function clipManyChars(s : string; count : cardinal) : string; overload;
function CharCount(s : string; c : char) : cardinal;
function CutWord(var s: string; WordChars: string) : string;
function CutWordExclude(var s: string; delimiters : string) : string;
function CapitalizeWords(s : string) : string;

function RightPadString(s : string; count : integer; value : char = ' ') : string;

function TokenInString(token : string; delim, str : string) : boolean;

function DeleteFromStart(var s : string; prefix : string) : string;

implementation

uses strutils, sysutils;


function DeleteFromStart(var s : string; prefix : string) : string;
begin
    result := '';
    if AnsiStartsStr(prefix, s) then begin
        delete(s,1,length(prefix));
        result := prefix;
    end
end;


function TokenInString(token : string; delim, str : string) : boolean;
var s : string;

begin
    repeat
        s := tokenstring(str,delim);
    until (s = token) or (str='');

    result := s = token;
end;

function RightPadString(s : string; count : integer; value : char = ' ') : string;
begin
    if length(s) > count then begin
        result := s;
        EXIT;
    end;

    result := s + StringOfChar(value, count - length(s));
end;

function CharCount(s : string; c : char) : cardinal;
var i : integer;
begin
    result := 0;
    for i := 1 to length(s) do begin
        if  s[i] = c then inc(Result);
    end;
end;

function CapitalizeWords(s : string) : string;
var word : string;
    letter : string;
const wordChars : string = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRTSUVWXYZ0123456789''';

// all punctuation on a keyboard, minus single quote
// NOTE: leading space intentded
const puncChars : string = ' `~!@#$%^&*()_-+={[}]|\:;"<,>.?/'#10;
begin
    result := '';
    s := lowercase(s);

    while (s <> '') do begin
        word := CutWordExclude(s, puncChars);
        if (word <> '') then begin
            letter := UpperCase(word[1]);
            word[1] := letter[1];
        end;

        result := result + word;
    end;
end;
function CutWord(var s: string; WordChars: string) : string;
var i : longint;
begin
    i := 1;

    // find a "word"
    while (i < length(s))
        and (pos(s[i],WordChars) > 0) do begin
        inc(i);
    end;

    // find all chars leading up to the next word
    // stop before getting to the next word
    while ((i+1) < length(s))
        and (pos(s[i+1],WordChars) = 0) do begin
        inc(i);
    end;

    // cut off the word and trailing delimiters
    result := LeftStr(s, i);
    s := rightstr(s, length(s) - i);
end;
function CutWordExclude(var s: string; delimiters : string) : string;
var i : longint;
begin
    i := 1;

    // find a "word"
    while (i < length(s))
        and (pos(s[i], delimiters) = 0) do begin
        inc(i);
    end;

    // find all chars leading up to the next word
    // stop before getting to the next word
    while ((i+1) < length(s))
        and (pos(s[i+1], delimiters) <> 0) do begin
        inc(i);
    end;

    // cut off the word and trailing delimiters
    result := LeftStr(s, i);
    s := rightstr(s, length(s) - i);
end;



function TokenString(var s: string; delim: string; TrimSpace : boolean = true) : string;
var delimPos: integer;
begin
    if TrimSpace then s := trimleft(s);

    delimPos := pos(delim, s);
    If (delimPos > 0) Then begin
        //'-- cut off first token and ditch the delimiter
        result := Leftstr(s, (delimPos - 1));
        s := rightstr(s,  length(s) - (length(delim) + length(result)));
    end Else begin
        //'-- return last token
        result := s;
        s := '';
    End;

end;
function TokenStringStuffed(var s: string; delim: char; TrimSpace : boolean = true) : string;
var
    i : integer;
begin
    result := '';
    i := 0;
    repeat
        if (i<>0) then begin
            result := result + '"';
            delete(s,1,1);
        end;
        result := result + TokenString(s, delim, TrimSpace);
        inc(i);
    until (length(s)=0) or (s[1] <> delim);
end;

function clipOneChar(s : string) : string;
begin
    result := rightstr(s, length(s) - 1);
end;

function clipManyChars(s : string; clip : string) : string;
begin
    result := rightstr(s, length(s) - length(clip));
end;

function clipManyChars(s : string; count : cardinal) : string;
begin
    result := rightstr(s, length(s) - integer(count));
end;


end.
