SMOLNET PORTAL home about changes
MODULE Translit;
IMPORT In, Out, Args := extArgs, Strings, Chars;

CONST
    MAXSTR = 1024; (* or whatever *)
    DASH = Chars.DASH;
    ENDSTR = Chars.ENDSTR;
    ESCAPE = "@";
    TAB* = Chars.TAB;

(* Error -- write an error string to standard out and
   halt program *)
PROCEDURE Error(s : ARRAY OF CHAR);
BEGIN
    Out.String(s);Out.Ln();
    ASSERT(FALSE);
END Error;

(* IsEscape - this procedure looks to see if we have an
escape sequence at position in variable i *)
PROCEDURE IsEscape*(src : ARRAY OF CHAR; i : INTEGER) : BOOLEAN;
VAR res : BOOLEAN; last : INTEGER;
BEGIN
  res := FALSE;
  last := Strings.Length(src) - 1;
  IF (i < last) & (src[i] = ESCAPE) THEN
    res := TRUE;
  END;
  RETURN res
END IsEscape;

(* ExpandEscape - this procedure takes a source array, a 
   position and appends the escaped value to the destination
   array.  It returns TRUE on successes, FALSE otherwise. *)
PROCEDURE ExpandEscape*(src : ARRAY OF CHAR; i : INTEGER; VAR dest : ARRAY OF CHAR) : BOOLEAN;
VAR res : BOOLEAN; j : INTEGER;
BEGIN
 res := FALSE;
 j := i + 1;
 IF j < Strings.Length(src)  THEN
    res := Chars.AppendChar(src[j], dest)
 END
 RETURN res
END ExpandEscape;

(* IsSequence - this procedure looks at position i and checks
   to see if we have a sequence to expand *)
PROCEDURE IsSequence*(src : ARRAY OF CHAR; i : INTEGER) : BOOLEAN;
VAR res : BOOLEAN;
BEGIN
  res := Strings.Length(src) - i >= 3;
  (* Do we have a sequence of alphanumeric character 
     DASH alphanumeric character? *)
  IF res & Chars.IsAlphaNum(src[i]) & (src[i+1] = DASH) & 
            Chars.IsAlphaNum(src[i+2]) THEN
      res := TRUE;
  END;
  RETURN res
END IsSequence;

(* ExpandSequence - this procedure expands a sequence x 
   starting at i and append the sequence into the destination 
   string. It returns TRUE on success, FALSE otherwise *)
PROCEDURE ExpandSequence*(src : ARRAY OF CHAR; i : INTEGER; VAR dest : ARRAY OF CHAR) : BOOLEAN;
VAR res : BOOLEAN; cur, start, end : INTEGER;
BEGIN
  (* Make sure sequence is ascending *)
  res := TRUE;
  start := ORD(src[i]);
  end := ORD(src[i+2]);
  IF start < end THEN
    FOR cur := start TO end DO
      IF res THEN
        res := Chars.AppendChar(CHR(cur), dest);
      END;
    END;
  ELSE
    res := FALSE;
  END;
  RETURN res
END ExpandSequence;


(* makeset -- make sets based on src expanded into destination *)
PROCEDURE MakeSet* (src : ARRAY OF CHAR; start : INTEGER; VAR dest : ARRAY OF CHAR) : BOOLEAN;
VAR i : INTEGER; makeset : BOOLEAN;
BEGIN
    i := start;
    makeset := TRUE;
    WHILE (makeset = TRUE) & (i < Strings.Length(src)) DO 
        IF IsEscape(src, i) THEN
            makeset := ExpandEscape(src, i, dest);
            i := i + 2;
        ELSIF IsSequence(src, i) THEN
            makeset := ExpandSequence(src, i, dest);
            i := i + 3;
        ELSE
            makeset := Chars.AppendChar(src[i], dest);
            i := i + 1;
        END;
    END;
    RETURN makeset
END MakeSet;


(* Index -- find position of character c in string s *)
PROCEDURE Index* (VAR s : ARRAY OF CHAR; c : CHAR) : INTEGER;
VAR
    i, index : INTEGER;
BEGIN
    i := 0;
    WHILE (s[i] # c) & (s[i] # ENDSTR) DO
        i := i + 1;
    END;
    IF (s[i] = ENDSTR) THEN
        index := -1; (* Value not found *)
    ELSE
        index := i; (* Value found *)
    END;
    RETURN index
END Index;

(* XIndex -- conditionally invert value found in index *)
PROCEDURE XIndex* (VAR inset : ARRAY OF CHAR; c : CHAR;
    allbut : BOOLEAN; lastto : INTEGER) : INTEGER;
VAR
    xindex : INTEGER;
BEGIN
    (* Uninverted index value *)
    xindex := Index(inset, c);
    (* Handle inverted index value *)
    IF (allbut = TRUE) THEN
        IF (xindex = -1)  THEN
            (* Translate as an inverted the response *)
            xindex := 0; (* lastto - 1; *)
        ELSE
            (* Indicate no translate *)
            xindex := -1;
        END;
    END;
    RETURN xindex
END XIndex;

(* Translit -- map characters *)
PROCEDURE Translit* ();
CONST
    NEGATE = Chars.CARET; (* ^ *)
VAR
    arg, fromset, toset : ARRAY MAXSTR OF CHAR;
    c : CHAR;
    i, lastto : INTEGER;
    allbut, squash : BOOLEAN;
    res : INTEGER;
BEGIN
    i := 0;
    lastto := MAXSTR - 1;
    (* NOTE: We are doing low level of string manipulation. Oberon
       strings are terminated by 0X, but Oberon compilers do not 
       automatically initialize memory to a specific state. In the 
       OBNC implementation of Oberon-7 assign "" to an assignment 
       like `s := "";` only writes a 0X to position zero of the 
       array of char. Since we're doing position based character 
       assignment and can easily overwrite a single 0X.  To be safe 
       we want to assign all the positions in the array to 0X so the 
       memory is in a known state.  *)
    Chars.Clear(arg);
    Chars.Clear(fromset);
    Chars.Clear(toset);
    IF (Args.count = 0) THEN
        Error("usage: translit from to");
    END;
    (* NOTE: I haven't used an IF ELSE here because we have
       additional conditions that lead to complex logic.  The
       procedure Error() calls ASSERT(FALSE); which in Oberon-7
       halts the program from further execution *)
    IF (Args.count > 0) THEN
        Args.Get(0, arg, res);
        allbut := (arg[0] = NEGATE);
        IF (allbut) THEN
            i := 1;
        ELSE
            i := 0;
        END;
        IF MakeSet(arg, i, fromset) = FALSE THEN
            Error("from set too long");
        END;
    END;
    (* NOTE: We've initialized our array of char earlier so we only
       need to know if we need to update toset to a new value *)
    Chars.Clear(arg);
    IF (Args.count = 2) THEN
        Args.Get(1, arg, res);
        IF MakeSet(arg, 0, toset) = FALSE THEN
            Error("to set too long");
        END;
    END;

    lastto := Strings.Length(toset);
    squash := (Strings.Length(fromset) > lastto) OR (allbut);
    REPEAT
        In.Char(c);
        IF In.Done THEN
            i := XIndex(fromset, c, allbut, lastto);
            IF (squash) & (i>=lastto) & (lastto>0) THEN (* translate *)
                Out.Char(toset[lastto]);
            ELSIF (i >= 0) & (lastto > 0) THEN    (* translate *)
                Out.Char(toset[i]);
            ELSIF i = -1 THEN                        (* copy *)
              (* Do not translate the character *)
              Out.Char(c);
              (* NOTE: No else clause needed as not writing out
			     a cut value is deleting *)
            END;
        END;
    UNTIL (In.Done # TRUE);
END Translit;

BEGIN
    Translit();
END Translit.


2.6 Character Transliteration
=============================

[Page 47](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/47/mode/1up)


**translit** is the most complicated program so far in the book.
Most of the translation process from Pascal to Oberon-7 has
remained similar to the previous examples.

My implementation of **translit** diverges from the K & P 
implementation at several points. Much of this is a result of Oberon 
evolution beyond Pascal. First Oberon counts arrays from zero 
instead of one so I have opted to use -1 as a value to indicate the 
index of a character in a string was not found. Equally I have 
simplified the logic in `xindex()` to make it clear how 
I am handling the index lookup described in `index()` of the Pascal 
implementation. K & P implemented `makeset()` and `dodash()`.
`dodash()` particularly looked troublesome. If you came across the 
function name `dodash()` without seeing the code comments 
"doing a dash" seems a little obscure.  I have chosen to name
that process "Expand Sequence" for clarity. I have simplified the 
task of making sets of characters for translation into three cases 
by splitting the test conditions from the actions. First check to see 
if we have an escape sequence and if so handle it. Second check to 
see if we have an expansion sequence and if so handle it else append 
the char found to the end of the set being assembled. This resulted 
in `dodash()` being replaced by `IsSequence()` and 
`ExpandSequence()`.  Likewise `esc()` was replaced with `IsEscape()`
and `ExpandEscape()`. I renamed `addchar()` to `AppendChar()`  in
the "Chars" module as that seemed more specific and clearer.

I choose to advance the value used when expanding a set description
in the loop inside of my `MakeSet()`. I minimized the side effects 
of the expand functions to the target destination.  It is clearer 
while in the `MakeSet()` loop to see the relationship of the test 
and transformation and how to advance through the string. This also 
allowed me to use fewer parameters to procedures which tends to make 
things more readable as well as simpler.

I have included an additional procedure not included in the K & P 
Pascal of this program. `Error()` displays a string and halts. K & P 
provide this as part of their Pascal environment. I have chosen to 
embed it here because it is short and trivial.

Translit suggested the "Chars" module because of the repetition in 
previous programs. In K & P the approach to code reuse is to create 
a separate source file and to included via a pre-processor. In 
Oberon we have the module concept.

My [Chars](Chars.Mod) module provides a useful set of test procedures
like `IsAlpha(c)`, `IsUpper(c)`, `IsLower()` in addition to the 
`CharInRange()` and `IsAlphaNum()`.  It also includes `AppendChar()` 
which can be used to append a single character value to an end of an 
array of char.


Program Documentation
---------------------

[Page 56](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/56/mode/1up)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

PROGRAM

    translit    transliterate characters

USAGE

    translit    [^]src [dest]

FUNCTION

    translit maps its input, on a character by character basis, and
    writes the translated version to its output.In the simplest case,
    each character is the argument src is translated to the
    corresponding character is the argument dest; all other characters
    are copies as is. Both the src and dest may contain sub-strings of
    the form c1 - c2 as shorthand for all the characters in the range
    c1..c2 and c2 must both be digits, or both be letter of the same
    case. If dest is absent, all characters represented by src are
    deleted. Otherwise, if dest is shorter than src, all characters
    is src that would map to or beyond the last character in
    dest are mapped to the last character in dest; moreover adjacent
    instances of such characters in the input are represented in the
    output by a single instance of the last character in dest. The

        translit 0-9 9

    converts each string of digits to the single digit 9.
    Finally, if src is proceeded by ^, then all but the characters
    represented by src are taken as the source string; i.e., they are
    all deleted if dest is absent, or they are all collapsed if the
    last character in dest is present.

EXAMPLE

    To convert upper case to lower:

        translit A-Z a-z

    To discard punctuation and isolate words by spaces on each line:

        translit ^a-zA-Z@n " "
        This is a simple-minded test, i.e., a test of translit.
        This is a simple minded test i e a test of translit

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Pascal Source
-------------

[Page 48](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/48/mode/1up)


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

{ index -- find position of character c in string s }
function index (var s : string; c : character) : integer;
var
    i : integer;
begin
    i := 1;
    while (s[i] <> c) and (s[i] <> ENDSTR) do
        i := i + 1;
    if (s[i] = ENDSTR) then
        index := 0
    else
        index := i
end;

{ xindex -- conditionally invert value from index }
function xindex (var inset : string; c : character;
    allbut : boolean; lastto : integer) : integer;
begin
    if (c = ENDFILE) then
        xindex := 0
    else if (not allbut) then
        xindex := index(inset, c)
    else if (index(inset, c) > 0) then
        xindex := 0
    else
        xindex := lastto + 1
end;

{ translit -- map characters }
procedure translit;
const
    NEGATE = CARET; { ^ }
var
    arg, fromset, toset : string;
    c : character;
    i, lastto : 0..MAXSTR;
    allbut, squash : boolean;
#include "makeset.p"
#include "xindex.p"
begin
    if (not getarg(1, arg, MAXSTR)) then
        error('usage: translit from to');
    allbut := (arg[1] = NEGATE);
    if (allbut) then
        i := 2
    else
        i := 1;
    if (not makeset(arg, i, fromset, MAXSTR)) then
        error('translit: "from" set too large"');
    if (not getarg(2, arg, MAXSTR)) then
        toset[1] := ENDSTR
    else if (not makeset(arg, 1, toset, MAXSTR)) then
        error('translit: "to" set too large')
    else if (Length(fromset) < Length(toset)) then
        error('translit: "from" shorter than "to"');

    lastto := Length(toset);
    squash := (Length(fromset) > lastto) or (allbut);
    repeat
        i := xindex(fromset, getc(c), allbut, lastto);
        if (squash) and (i>=lastto) and (lastto>0) then begin
            putc(toset[lastto]);
            repeat
                i := xindex(fromset, getc(c), allbut, lastto)
            until (i < lastto)
        end;
        if (c <> ENDFILE) then begin
            if (i > 0) and (lastto > 0) then    { translate }
                putc(toset[i])
            else if (i = 0) then                { copy }
                putc(c)
            { else delete }
        end
    until (c = ENDFILE)
end;

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


Additional source files
-----------------------

**makeset.p**

[Page 52](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/52/mode/2up)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

{ makeset -- make set from inset[k] in outset }
function makeset (var inset : string; k : integer;
            var outset : string; maxset : integer) : boolean;
var
    j : integer;
#include "dodash.p"
begin
    j := 1
    dodash(ENDSTR, inset, k, outset, j, maxset);
    makeset := addstr(ENDSTR, outset, j, maxset)
end;

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

**addstr.p**

[Page 53](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/53/mode/1up)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

{ addstr -- put c in outset[j] if it fits, increment j }
function addstr(c : character; var outset : string;
            var j : integer; max : integer) : boolean;
begin
    if (j > maxset) then
        addstr := false
    else begin
        outset[j] := c;
        j := j + 1;
        addstr := true
    end
end;

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

**dodash.p**

[Page 53](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/53/mode/1up)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
{ dodash - expand set at src[i] into dest[j], stop at delimit }
procedure dodash (delim : character; var src : string;
        var i : integer; var dest : string;
        var j : integer; maxset : integer);
var
    k : integer;
    junk : boolean;
begin
    while (arc[i] <> delim) and (src[i] <> ENDSTR) do begin
        if (src[i] = ESCAPE) then
            junk := addstr(esc(src[i], i), dest, j, maxset);
        else if (src[i] <> DASH) then
            junk := addstr(src[i], dest, j, maxset);
        else if (j <= 1) or (src[i+1] = ENDSTR) then
            junk := addstr(DASH, dest, j, maxstr); { literal - }
	else if (isalphanum(src[i-1]))
	    and (isalphanum(src[i+1]))
	    and (src[i-1] <= src[i+1]) then begin
	        for k := src[i-1] + 1 to src[i-1] do
	            junk := addstr(k, dest, j, maxstr);
	        i := i + 1;
	end
	else
	    junk := addstr(DASH, dest, j, maxstr);
	i := i + 1;
    end
end;

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

**isalphanum.p**

[Page 54](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/54/mode/1up)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

{ isalphanum -- true if c is letter or digit }
function isalphanum (c : character) : integer;
begin
    isalphanum := c in [ord('a') .. ord('z'),
                        ord('A') .. ord('Z'),
                        ord('0') .. ord('9')]
end

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

**esc.p**

[Page 55](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/55/mode/1up)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

{ esc -- map s[i] into escape character, increment i }
function esc (var s : string; var i : integer) : character;
begin
    if (s[i] <> ESCAPE) then
        esc := s[i]
    else if (s[i+1] = ENDSTR) then { @ not special ast end }
        esc := ESCAPE
    else begin
        i := i + 1;
        if (s[i] = ord('n')) then
           esc := NEWLINE
        else if (s[i] = ord('t') then
           esc := TAB
        else
           esc := s[i]
    end
end;

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

**length.p**

[Page 46](https://archive.org/stream/softwaretoolsinp00kern?ref=ol#page/46/mode/1up)

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

{ length -- complete length of string }
function length(var s: string) : integer;
var
    n : integer;
begin
    n := 1;
    while (s[n] <> ENDSTR) do
        n := n + 1;
    length := n - 1
end;

~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


The impacts of having a richer language than 1980s ISO Pascal and 
evolution in practice suggest a revision in the K & P approach. I 
have attempted to keep the spirit of their example program while 
reflecting changes in practice that have occurred in the last four 
decades.

Response: text/plain
Original URLgopher://sdf.org/0/users/rsdoiel/blog/2020/10/31/Translit.Mod
Content-Typetext/plain; charset=utf-8