Sat, 07 Apr 2007

> So I thought I'd write a quick little language implementation in
> OCaml, since (a) OCaml is sort of optimized for writing languages and
> (b) I wanted to learn OCaml, including ocamllex and ocamlyacc.

(b) is a big reason for doing things the way you did, but I thought I'd  
point out that, as with LISP or FORTH or dc(1), in APL lexing and parsing  
are nearly trivial* -- possibly due to deliberate design decision,  
possibly to due to the nature of machines (and lack of language theory!)  
at the time.  It might be interesting to see what changes were made to apl  
when it went from being a human-centered didactic notation to a  
machine-implementated executable one.

-Dave

:: :: ::

* subscripting may have been an exception, hence its absence in J?

For instance, if one carries the environment in machine state,

   x*x:a+b

can be scanned a character at a time and directly translated to {load b;  
add a; store x; mul x}, while more modern equivalents,

   x*x where x=a+b
   let x=a+b in x*x

involve some minor parsing first.

So I thought I'd write a quick little language implementation in
OCaml, since (a) OCaml is sort of optimized for writing languages and
(b) I wanted to learn OCaml, including ocamllex and ocamlyacc.  So
here's a tiny, nearly useless subset of APL, supporting only
one-dimensional vectors of floating-point numbers.

Like everything else I post to kragen-hacks without a notice to the
contrary, this code is in the public domain; I relinquish any
copyright.

Here's a sample session:

Beauty:~/devel/toyapl kragen$ ./toyapl_repl
Welcome to toyapl, a tiny APL subset.
Separate numbers by spaces; available ops are + - * / +/ */ % iota
% is modulo; / is division.
This program is not expected to be useful; I wrote it to learn OCaml.
        3150 10000 12000 15000 / 2150
1.46511627907 4.6511627907 5.58139534884 6.97674418605
        (+/ 3150 10000 12000 15000 / 2150) / 4
4.66860465116
        iota 15
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14
        ((iota 10)/10)*((iota 10)/10)
0 0.01 0.04 0.09 0.16 0.25 0.36 0.49 0.64 0.81
        ((1+iota 10)/10)*((1+iota 10)/10)
0.01 0.04 0.09 0.16 0.25 0.36 0.49 0.64 0.81 1
        (iota 15) % 3
0 1 2 0 1 2 0 1 2 0 1 2 0 1 2
        ((iota 20)*1+iota 20)/2
0 1 3 6 10 15 21 28 36 45 55 66 78 91 105 120 136 153 171 190

When I get a chance, I'm putting this online at
http://pobox.com/~kragen/sw/toyapl.html and
http://pobox.com/~kragen/sw/toyapl.tar.gz




(* First, here's an expression type, which I put in toyapl_expr.ml *)
type value = float list ;;
type op = string ;;
type expr = 
    Unary of op * expr 
  | Atom of value 
  | Parenthesized of expr 
  | Binary of expr * op * expr ;;  (* later, add variables *)




(* Then, here's toyapl_lexer.mll: *)
{
open Toyapl_parser ;;
open Toyapl_expr ;;
}
let alus = ['A'-'Z' 'a'-'z' '_']
let alnumus = alus | ['0'-'9']
let ident = alus alnumus*
rule next = parse
     [' ' '\t']  { next lexbuf }  (* skip whitespace; stolen from manual *)
   | ['0'-'9']+ ('.' ['0'-'9']+)? 
                           { Num (float_of_string (Lexing.lexeme lexbuf)) }
   | ['+' '-' '/' '*' '%']+ | ident 
                           { Op (Lexing.lexeme lexbuf) }
   | '('                   { Lparen }
   | ')'                   { Rparen }
   | '\n'                  { Eol }




(* Then, here's toyapl_parser.mly: *)
%{
open Toyapl_expr;;
%}
%token <float> Num
%token <Toyapl_expr.op> Op
%token Lparen Rparen Eol
%right Op
%start parse_line
%type <Toyapl_expr.expr> parse_line
%%
parse_line:
        expr Eol            { $1 }
;
atom:
        Num                 { [ $1 ] }
      | atom Num            { $1 @ [ $2 ] }
expr:
        atom                { Atom $1 }
      | expr Op expr        { Binary($1, $2, $3) }
      | Lparen expr Rparen  { Parenthesized($2) }
      | Op expr             { Unary($1, $2) }
      | Lparen Rparen       { Atom([]) }




(* And here's a top-level driver for the main program, which I put in 
   toyapl_repl.ml: *)
print_string "Welcome to toyapl, a tiny APL subset.
Separate numbers by spaces; available ops are + - * / +/ */ % iota
% is modulo; / is division.
This program is not expected to be useful; I wrote it to learn OCaml.
" ;
try Toyapl.repl stdin stdout with End_of_file -> () ;;




(* And here's the main program, toyapl.ml: *)
(* toy APL as an exercise to learn OCaml *)
(* to do: change values to trees *)
open Toyapl_expr ;;

exception Op_not_found of op ;;

let assoc f list = try List.assoc f list 
    with Not_found -> raise (Op_not_found f) ;;

let rec eval_with unaries binaries = let rec eval = function 
    Unary (f, e) -> (assoc f unaries) (eval e)
  | Atom e -> e 
  | Parenthesized e -> eval e
  | Binary (e1, f, e2) -> (assoc f binaries) (eval e1) (eval e2)
  in eval ;;

let show_num n = if n = float_of_int (int_of_float n) 
    then string_of_int (int_of_float n)
    else string_of_float n ;;

let rec show_atom = function
    [] -> "()"
  | n :: m :: lst -> show_num n ^ " " ^ show_atom (m::lst)
  | [n] -> show_num n ;;

let rec show = function
    Unary (f, e) -> f ^ " " ^ show e
  | Atom e -> show_atom e
  | Parenthesized e -> "(" ^ show e ^ ")" 
  | Binary (Atom _ as e1, f, e2) | Binary (Parenthesized _ as e1, f, e2) -> 
      show e1 ^ " " ^ f ^ " " ^ show e2
  | Binary (e, f, e2) -> 
        show (Parenthesized e) ^ " " ^ f ^ " " ^ show e2 ;;

(* numbers up to n. *)
(* it's not clear what iota should do when applied to a nonscalar *)
let apl_iota [n] = let rec iota start stop = if start = stop then [] 
        else float_of_int start :: iota (start + 1) stop
    in iota 0 (int_of_float n) ;;

let unary_lift = List.map ;;
let reduce op id x = [List.fold_left op id x] ;;
let unaries = ["+", unary_lift (fun x -> x); "-", unary_lift (~-.);
                     "+/", reduce (+.) 0.;
                     "*/", reduce ( *. ) 1.;
                     "iota", apl_iota] ;;

exception Mismatched_list_lengths of value * value ;;

let aplbinarylift f a b =
    let flip f a b = f b a
    in
    match (a, b) with
        ([a1], [b1]) -> [f a1 b1]
      | ([a1], b1 :: b2 :: rest) -> List.map (f a1) b
      | (a1 :: a2 :: rest, [b1]) -> List.map (flip f b1) a
      | (_, _) -> try List.map2 f a b with 
            Invalid_argument _ -> raise (Mismatched_list_lengths (a, b)) ;;

let fmod a b = a -. b *. (floor (a /. b)) ;;

let binaries = ["+", aplbinarylift (+.); "-", aplbinarylift (-.);
                "*", aplbinarylift ( *. ); "/", aplbinarylift (/.);
                "%", aplbinarylift fmod] ;;

let eval = eval_with unaries binaries ;;

let show_value = show_atom;;

let parse value = Toyapl_parser.parse_line Toyapl_lexer.next 
    (Lexing.from_string (value ^ "\n")) ;;

let repl inp outp =
    while true do
        output_string outp "        "; flush outp;
        let inval = input_line inp in
        try output_string outp ((show_value (eval (parse inval))) ^ "\n");
            flush outp
        with Parsing.Parse_error -> output_string outp "?parse error\n"
           | Op_not_found (op) -> output_string outp ("?not found " ^ op ^ "\n")
           | Stack_overflow -> output_string outp "?stack overflow\n"
           | Mismatched_list_lengths(a, b) -> 
               output_string outp ("?length mismatch: " ^ show_value a ^ ", " ^ 
                                   show_value b ^ "\n")
    done ;;

(* exception Test_failure of int * 'a * 'a ;; *)

let test () = 
    (* atoms *)
    assert ([45.] = eval (Atom [45.]));
    assert ("45" = show (Atom [45.]));
    assert ([45.; 50.] = eval (Atom [45.; 50.]));
    assert ("45 50" = show (Atom [45.; 50.]));
    (* parenthesized expressions *)
    let f7 = Parenthesized (Parenthesized (Atom [47.])) in
    assert ("((47))" = show f7);
    assert ([47.] = eval f7);
    (* binary ops (depends on addition) *)
    let seven = Binary ((Atom [3.]), "+", (Atom [4.])) in
    assert ("3 + 4" = show seven);
    assert ([7.] = eval seven);
    assert ([7.] = eval (Parenthesized seven));
    assert ("(3 + 4)" = show (Parenthesized seven));
    (* left operands of binary ops: binary ops *)
    let twelve = Binary (seven, "+", Atom [5.]) in
    assert ([12.] = eval twelve);
    assert ("(3 + 4) + 5" = show twelve);
    assert ("(3 + 4) + 5" = show (Binary (Parenthesized seven, 
						"+", Atom [5.])));
    (* left operands of binary ops: atoms *)
    assert ("5 + 3 + 4" = show (Binary (Atom [5.], "+", seven)));
    (* unary ops (depends on negation) *)
    let minus_one = Binary (Atom [3.], "+", Unary("-",Atom [4.])) in
    assert ("3 + - 4" = show minus_one);
    assert ([-1.] = eval minus_one);
    (* left operands of binary ops: unary ops *)
    assert ("(- 4) + 3" = show (Binary (Unary("-", Atom [4.]), 
		                                    "+", Atom [3.])));
    (* no precedence given to unary ops *)
    assert ("- 3 + 4" = show (Unary ("-", seven)));
    assert ([-7.] = eval (Unary ("-", seven)));

    (* unary ops: identity, sum, product *)
    assert ([7.] = eval (Unary ("+", seven)));
    assert ([202.] = eval (Unary ("+/", Atom [49.; 50.; 51.; 52.])));
    assert ([0.] = eval (Unary ("+/", Atom [])));
    assert ("+/ ()" = show (Unary ("+/", Atom [])));
    assert ([6497400.] = eval (Unary ("*/", Atom [49.; 50.; 51.; 52.])));
    assert ([1.] = eval (Unary ("*/", Atom [])));
    (* simple unary ops on vectors *)
    let minus_nums = Unary ("-", Atom [3.; 4.]) in
    assert ([-3.; -4.] = eval minus_nums);
    assert ("- 3 4" = show minus_nums);
    assert ([-3.; -4.] = eval (Unary ("+", minus_nums)));
    assert ("+ - 3 4" = show (Unary ("+", minus_nums)));

    (* other binary ops *)
    assert([-1.] = eval (Binary (Atom [3.], "-", Atom [4.])));
    assert([12.] = eval (Binary (Atom [3.], "*", Atom [4.])));
    assert([60.5] = eval (Binary (Atom [121.], "/", Atom [2.])));
    assert([2.] = eval (Binary (Atom [122.], "%", Atom [10.])));

    (* pointwise binary ops with vectors *)
    assert([7.; 8.; 9.; 10.] = eval (Binary (Atom [3.], "+", 
                                             Atom [4.; 5.; 6.; 7.])));
    assert([7.; 8.; 9.; 10.] = eval (Binary (Atom [4.; 5.; 6.; 7.], "+",
                                             Atom [3.])));
    assert([3.; 3.; 3.; 4.] = eval (Binary (Atom [7.; 8.; 9.; 10.], "-", 
                                        Atom [4.; 5.; 6.; 6.])));
    try ignore (eval (Binary (Atom [2.; 3.], "+", Atom [5.; 6.; 7.]))); 
               assert false with
        Mismatched_list_lengths (a, b) -> 
            assert (([2.; 3.], [5.; 6.; 7.]) = (a, b));

    (* iota *)
    let iota5 = Unary ("iota", Atom [5.]) in
    assert([0.; 1.; 2.; 3.; 4.] = eval iota5);
    assert("iota 5" = show iota5);
    assert([120.] = eval(Unary("*/", Binary(Atom [1.], "+", iota5))));

    (* show_value *)
    assert("0 1 2 3 4" = show_value (eval iota5));

    (* parsing *)
    assert(Atom [23.] = parse "23");
    assert(Atom [2.;3.] = parse "2 3");
    assert(Unary("+/", Unary("iota", Atom [5.])) = parse "+/iota 5");

    (* total system tests *)
    assert("1 2 0 1 2 0 1 2 0 1" = 
           (show_value (eval (parse "((iota 10) - 5) % 3"))));
    assert("0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1" =
           (show_value (eval (parse "(1 + iota 10) / 10"))));
    assert("15 18 21" = (show_value (eval (parse "3 * 4 5 6 + 1"))));
;;
test () ;;




# Then I wrote a build-script:
#!/bin/sh
set -ve
ocamlc -c toyapl_expr.ml
ocamlyacc toyapl_parser.mly
ocamlc -c toyapl_parser.mli
ocamlc -c toyapl_parser.ml
ocamllex toyapl_lexer.mll
ocamlc -c toyapl_lexer.ml
ocaml toyapl_parser.cmo toyapl_lexer.cmo toyapl.ml  # for regression tests
ocamlc -c toyapl.ml
ocamlc -o toyapl_repl toyapl_parser.cmo toyapl_lexer.cmo toyapl.cmo \
       toyapl_repl.ml




# And a clean script:
#!/bin/sh
set -v
# wow, the build process makes 14 files...
rm *~ *.cmi *.cmo toyapl_lexer.ml toyapl_parser.ml toyapl_parser.mli toyapl_repl




# And a Makefile:
# sorry, I wrote this on a Mac without make (!)
all:
	./build