Base implementation of AVL trees

Author: Martin Clochard

Preliminary definitions

theory SelectionTypes

  use seq.Seq
  use option.Option

  type split 'a = {
    left : seq 'a;
    middle : option 'a;
    right : seq 'a;
  }

Describe a position in the sequence left ++ middle ++ right (see rebuild)

  type part_base 'a = Left 'a
    | Right 'a
    | Here

Describe reduced problem for selection (see selected_part)

  let ghost function option_to_seq (o:option 'a) : seq 'a =
    match o with
    | Some x -> singleton x
    | None -> empty
    end

  let ghost function rebuild (p:split 'a) : seq 'a =
    p.left ++ option_to_seq p.middle ++ p.right

Reconstruct the sequence associated to a split.

  let ghost function left_extend (l:seq 'a) (d:'a) (e:split 'a) : split 'a =
    { e with left = l ++ cons d e.left }

Shortcuts for extending a split on the left/right.

  let ghost function right_extend (e:split 'a) (d:'a) (r:seq 'a) : split 'a =
    { e with right = e.right ++ cons d r }

end

AVL trees

module AVL

Rebalancing code

The first part of the module implements the rebalancing code. It can also be seen as an implementation of logarithmic-time catenable dequeues with a constant-time nearly-fair splitting operation (derived from tree pattern-matching).

(About the time complexity bounds: a call to an abstract routine (monoid operations, element measure,...) is assumed to take constant time)

  use int.Int
  use bool.Bool
  use import seq.Seq as S
  use seq.FreeMonoid
  use option.Option
  use ref.Ref
  use mach.peano.Peano as I

  clone monoid.ComputableMonoid as M with axiom .
  clone monoid.MonoidSum as M with
    (* scope M = M *)
    type M.t = M.t,
    constant M.zero = M.zero,
    function M.op = M.op,
    goal M.assoc,
    goal M.neutral,
    axiom .

The implementation is parameterized by an abstract monoid. The elements of the monoid will be used as summaries of sequence of elements, obtained by aggregation of the elements of the sequence. In other words, M.sum f [a_1;...;a_n] is the monoidal summary of sequence [a_1;...;a_n] with respect to measure f.

  scope D
    type t 'a

Abstract description of the data stored in the tree: measurable elements.

Elements can be measured.

    val function measure (t 'a) : M.t
  end

  val constant balancing : I.t
    ensures { result > 0 }

Actual height difference permitted between subtrees.

The balancing can be any positive integer. This is a trade-off between the cost of balancing and the cost of finding: the bigger the balancing constant is, the lesser is the need for balancing the tree, but the deeper the trees may be.

  type tree 'a =
    | Empty
    | Node (tree 'a) (D.t 'a) (tree 'a) I.t M.t

tree representation. Height and monoidal summary are cached at every node.

  type m 'a = {
    seq : seq (D.t 'a);
    hgt : int;
  }
  meta coercion function seq

Logical model of an AVL tree.

An AVL tree is intended to represent a sequence of elements, which we model using a list. It corresponds to the sequence of elements obtained by left-to-right infix order (note that this sequence is invariant by rebalancing). However, in order to specify rebalancing, the tree structure cannot be completely abstracted away because of the height requirements, so we add the height to the model.

  let ghost function node_model (l:seq 'a) (d:'a) (r:seq 'a) : seq 'a =
    l ++ cons d r

  let rec ghost function seq_model (t:tree 'a) : seq (D.t 'a) =
    match t with
    | Empty -> empty
    | Node l d r _ _ -> node_model (seq_model l) d (seq_model r)
    end

Sequence of elements obtained by infix-order traversal.

  let rec function real_height (t:tree 'a): int =
    match t with
    | Empty -> 0
    | Node l _ r _ _ -> let hl = real_height l in let hr = real_height r in
      1 + if hl < hr then hr else hl
    end

Height of the tree.

  let rec lemma real_height_nonnegative (t:tree 'a) : unit
    ensures { real_height t >= 0 }
    variant { t }
  = let ghost rc = real_height_nonnegative in
    match t with Empty -> () | Node l _ r _ _ -> rc l; rc r end

Height is non-negative.

  predicate balanced (t:tree 'a) = match t with
    | Empty -> true
    | Node l _ r h m ->  h = real_height t /\
      m = M.agg D.measure (seq_model t) /\
      -balancing <= real_height r - real_height l <= balancing /\
      balanced l /\ balanced r
    end

Balanced tree + correctness of cached annotations: at every node,

1) The additional integer is the height of the corresponding subtree

2) The monoidal value corresponds to the summary of the sequence of elements associated with the subtree associated with the node

3) Left and right subtrees are balancing-height-balanced

  lemma rotation_preserve_model : forall ld rd:'a,fl fm fr.
    let a = node_model (node_model fl ld fm) rd fr in
    let b = node_model fl ld (node_model fm rd fr) in
    a = b by a == b

Tree rotations preserve the in-order sequence of element

  type t 'a = {
    (* Representation as a binary tree. *)
    repr : tree 'a;
    (* Model. *)
    ghost m : m 'a;
  } invariant {
    balanced repr /\ m = seq_model repr /\ m.hgt = real_height repr
  } by {
    repr = Empty; m = { seq = empty; hgt = 0 }
  }
  meta coercion function m

Actual program type for AVL trees

  let height (t:t 'a) : I.t
    ensures { result = t.m.hgt }
  = match t.repr with
    | Empty -> I.zero
    | Node _ _ _ h _ -> h
    end

Compute the height of the tree. O(1) since the height is cached.

  let total (t:t 'a) : M.t
    ensures { result = M.agg D.measure t.m.seq }
  = match t.repr with
    | Empty -> M.zero ()
    | Node _ _ _ _ m -> m
    end

Compute the monoidal summary of the elements stored in the structure. constant-time.

  (* TODO: top-level let. *)
  let empty () : t 'a
    ensures { result = empty /\ result.hgt = 0 }
  = { repr = Empty; m = { seq = empty; hgt = 0 } }

Create an empty AVL tree.

  let node (l:t 'a) (d:D.t 'a) (r:t 'a) : t 'a
    requires { -balancing <= l.hgt - r.hgt <= balancing }
    ensures { result = node_model l d r }
    ensures { result.hgt = 1 + if l.hgt < r.hgt then r.hgt else l.hgt }
  = let hl = height l in let hr = height r in
    let h = I.succ (if I.lt hl hr then hr else hl) in
    let s = M.op (total l) (M.op (D.measure d) (total r)) in
    assert { M.agg D.measure (node_model l d r) = s };
    { repr = Node l.repr d r.repr h s;
      m = { seq = node_model l.m.seq d r.m.seq; hgt = h.I.v } }

re-specify the node constructor in terms of the logical model.

  let singleton (d:D.t 'a) : t 'a
    ensures { result = singleton d /\ result.hgt = 1 }
  = { repr = Node Empty d Empty (I.succ I.zero) (D.measure d);
      m = { seq = singleton d; hgt = 1 } }

Create a one-element AVL tree.

  let is_empty (t:t 'a) : bool
    ensures { length t = 0 -> result }
    ensures { result -> t = empty }
  = match t.repr with
    | Empty -> true
    | _ -> false
    end

Emptyness test. constant-time.

  type view 'a =
    | AEmpty
    | ANode (t 'a) (D.t 'a) (t 'a) I.t M.t

View of an AVL tree for pattern-matching.

  let view (t:t 'a) : view 'a
    ensures { match result with
      | AEmpty -> t.hgt = 0 /\ t = empty
      | ANode l d r h s ->
        t = node_model l d r /\ s = M.agg D.measure t /\
        let hl = l.hgt in let hr = r.hgt in
        -balancing <= hl - hr <= balancing /\
        t.hgt = h = 1 + if hl < hr then hr else hl
      end }
  = match t.repr with
    | Empty -> AEmpty
    | Node l d r h s ->
      ANode { repr = l;
              m = { seq = seq_model l; hgt = real_height l } }
            d
            { repr = r;
              m = { seq = seq_model r; hgt = real_height r } }
            h
            s
    end

Specification wrapper around pattern-matching, in terms of the logical model only. In terms of the model, it corresponds to an operation splitting a non-empty list into three parts, with left and right part of similar heights.

  let balance (l:t 'a) (d:D.t 'a) (r:t 'a) : t 'a
    requires { -balancing-1 <= l.hgt - r.hgt <= balancing+1 }
    ensures { result = node_model l d r }
    ensures { let hl = l.hgt in let hr = r.hgt in
      let he = 1 + if hl < hr then hr else hl in
      let hres = result.hgt in
      0 <= he - hres <= 1 /\
      (-balancing <= hl - hr <= balancing -> he = hres) }
  = (* Wonderful case of automatic proof ! *)
    let hl = height l in
    let hr = height r in
    let df = I.sub hl hr (I.neg hr) hl in
    if I.gt df balancing
    then match view l with
      | AEmpty -> absurd
      | ANode ll ld lr _ _ ->
        if I.ge (height ll) (height lr)
        then node ll ld (node lr d r)
        else match view lr with
          | AEmpty -> absurd
          | ANode lrl lrd lrr _ _ ->
            node (node ll ld lrl) lrd (node lrr d r)
          end
      end
    else if I.lt df (I.neg balancing)
    then match view r with
      | AEmpty -> absurd
      | ANode rl rd rr _ _ ->
        if I.ge (height rr) (height rl)
        then node (node l d rl) rd rr
        else match view rl with
          | AEmpty -> absurd
          | ANode rll rld rlr _ _ ->
            node (node l d rll) rld (node rlr rd rr)
          end
      end
    else node l d r

First re-balancing constructor: balance l d r build the same sequence as node l d r, but permit slightly unbalanced input, and might decrement the expected height by one. Also, if the expected input node was already balanced, it is specified to return a value indistinguishable from a value returned by node l d r. This constructor is constant-time.

  let rec decompose_front_node (l:t 'a) (d:D.t 'a) (r:t 'a) : (D.t 'a,t 'a)
    requires { -balancing <= l.hgt - r.hgt <= balancing }
    returns { (d2,res:t 'a) -> node_model l d r = cons d2 res /\
      let hl = l.hgt in let hr = r.hgt in
      let he = 1 + if hl < hr then hr else hl in
      0 <= he - res.hgt <= 1 }
    variant { l.hgt }
  = match view l with
    | AEmpty -> (d,r)
    | ANode l d2 r2 _ _ -> let (d3,left) = decompose_front_node l d2 r2 in
      (d3,balance left d r)
    end

Internal routine. Decompose l ++ [d] ++ r as [head]++tail, with sequences represented by AVL trees.

  let decompose_front (t:t 'a) : option (D.t 'a,t 'a)
    returns { None -> t = empty | Some (hd,tl:t 'a) -> t = cons hd tl }
  = match view t with
    | AEmpty -> None
    | ANode l d r _ _ -> Some (decompose_front_node l d r)
    end

Pattern-matching over the front of the sequence. Time proportional to the height (logarithmic in the size of the tree).

  let rec decompose_back_node (l:t 'a) (d:D.t 'a) (r:t 'a) : (t 'a,D.t 'a)
    requires { -balancing <= l.hgt - r.hgt <= balancing }
    returns { (res:t 'a,d2) -> node_model l d r = snoc res d2 /\
      let hl = l.hgt in let hr = r.hgt in
      let he = 1 + if hl < hr then hr else hl in
      0 <= he - res.hgt <= 1 }
    variant { r.m.hgt }
  = match view r with
    | AEmpty -> (l,d)
    | ANode l2 d2 r _ _ -> let (right,d3) = decompose_back_node l2 d2 r in
      (balance l d right,d3)
    end

Internal routine, mirror of decompose_front_node.

  let decompose_back (t:t 'a) : option (t 'a,D.t 'a)
    returns { None -> t = empty | Some (lt:t 'a,dh) -> t = snoc lt dh }
  = match view t with
    | AEmpty -> None
    | ANode l d r _ _ -> Some (decompose_back_node l d r)
    end

Pattern-matching over the back of the sequence. Time proportional to the height (logarithmic in the size of the tree).

  let rec front_node (l:t 'a) (d:D.t 'a) : D.t 'a
    ensures { result = (snoc l d)[0] }
    variant { l.hgt }
  = match view l with
    | AEmpty -> d
    | ANode l d2 _ _ _ -> front_node l d2
    end

Internal routine.

  let front (t:t 'a) : D.t 'a
    requires { length t <> 0 }
    ensures { result = t[0] }
  = match view t with
    | AEmpty -> absurd
    | ANode l d2 _ _ _ -> front_node l d2
    end

Get the first element of a non-empty sequence. logarithmic-time.

  let rec back_node (d:D.t 'a) (r:t 'a) : D.t 'a
    ensures { result = (cons d r)[length r] }
    variant { r.hgt }
  = match view r with
    | AEmpty -> d
    | ANode _ d2 r _ _ -> back_node d2 r
    end

Internal routine.

  let back (t:t 'a) : D.t 'a
    requires { length t <> 0 }
    ensures { result = t[length t - 1] }
  = match view t with
    | AEmpty -> absurd
    | ANode _ d2 r _ _ -> back_node d2 r
    end

Get the back of a non-empty sequence. logarithmic-time.

  let fuse (l r:t 'a) : t 'a
    requires { -balancing <= l.hgt - r.hgt <= balancing }
    ensures { result = l ++ r }
    ensures { let hl = l.hgt in let hr = r.hgt in
      let he = 1 + if hl < hr then hr else hl in 1 >= he - result.hgt >= 0 }
  =
    match view l with
    | AEmpty -> r
    | ANode _ _ _ _ _ -> match view r with
      | AEmpty -> l
      | ANode rl rd rr _ _ -> let (d0,r') = decompose_front_node rl rd rr in
        balance l d0 r'
      end
    end

Catenation with balanced inputs (like sibling subtrees). logarithmic-time.

  let rec cons (d:D.t 'a) (t:t 'a) : t 'a
    ensures { result = cons d t }
    ensures { 1 >= result.hgt - t.hgt >= 0 }
    variant { t.hgt }
  = match view t with
    | AEmpty -> singleton d
    | ANode l d2 r _ _ -> balance (cons d l) d2 r
    end

List cons: build the sequence cons d t. logarithmic-time.

  let rec snoc (t:t 'a) (d:D.t 'a) : t 'a
    ensures { result = snoc t d }
    ensures { 1 >= result.hgt - t.hgt >= 0 }
    variant { t.hgt }
  = match view t with
    | AEmpty -> singleton d
    | ANode l d2 r _ _ -> balance l d2 (snoc r d)
    end

Reverse cons: build snoc t d. logarithmic-time.

  let rec join (l:t 'a) (d:D.t 'a) (r:t 'a) : t 'a
    ensures { result = node_model l d r }
    ensures { let hl = l.hgt in let hr = r.hgt in
      let he = 1 + if hl < hr then hr else hl in let hres = result.hgt in
      0 <= he - hres <= 1 }
    variant { if l.hgt > r.hgt then l.hgt - r.hgt else r.hgt - l.hgt }
  = match view l with
    | AEmpty -> cons d r
    | ANode ll ld lr lh _ -> match view r with
      | AEmpty -> snoc l d
      | ANode rl rd rr rh _ ->
        let df = I.sub lh rh (I.neg rh) lh in
        if I.gt df balancing
        then balance ll ld (join lr d r)
        else if I.lt df (I.neg balancing)
        then balance (join l d rl) rd rr
        else node l d r
      end
    end

Variant of the node constructor without any height hypothesis. The time complexity is proportional to the height difference between l and r (O(|l.m.hgt-r.m.hgt|)) (in particular, it is logarithmic)

  let concat (l r:t 'a) : t 'a
    ensures { result = l ++ r }
  = match view l with
    | AEmpty -> r
    | ANode _ _ _ _ _ -> match view r with
      | AEmpty -> l
      | ANode rl rd rr _ _ -> let (d0,r') = decompose_front_node rl rd rr in
        join l d0 r'
      end
    end

Catenation, without height hypothesis. logarithmic-time.

  (* FIXME: with real refinement, this should
     be done in a separate module,
     so that a single structure may feature several selection
     mecanisms. *)

Selection of elements

This part of the module provide an implementation of a generalisation of the usual insertion/removal/lookup/spliting/etc algorithms on AVL trees. The generalisation is done with respect to an abstract binary search mechanism. It is based on the observation that all those algorithms have the shame shape:

1) find a particular position in the tree by binary search (either a node, or an empty leaf if finding nothing)

2) extract some piece of information from this position/recompute another tree (which is either trivial or done only using rebalancing)

By using different search mechanisms in step 1), we can get a variety of data structures. Note that thanks to monoidal annotations, efficient search mechanisms other than comparison-based exists. For example, using the integer monoid we can keep track of the size of every subtrees, which can be used to implement efficient random access.

The positions found in step 1) corresponds readily to splits of the sequence, as defined in the SelectionTypes module. We consider that the objective of the search is to find a split with particular properties.

  use SelectionTypes

  type selector

Parameter: selector type. The elements of that type are intended to describe the class of splits we wish to find.

  predicate selected selector (split (D.t 'a))

Parameter: interpretation of the selector.

  predicate selection_possible selector (seq (D.t 'a))

Parameter: correctness predicate for a selector with respect to a list. It is intended to mean that we can actually find such a split in the list using binary search.

  axiom selection_empty : forall s. let nil = (empty:seq (D.t 'a)) in
    selection_possible s nil ->
      selected s { left = nil ; middle = None ; right = nil }

Parameter: a correct selector for the empty list always select its only possible split.

  type part = part_base selector

On nodes, binary search works by reducing the problem of selecting of a split on the whole tree to either: 1) The problem of selecting a split on the left subtree

2) The problem of selecting a split on the right subtree

3) Taking the split induced by the node

  val selected_part (ghost lseq rseq:seq (D.t 'a))
    (s:selector) (l:M.t) (d:D.t 'a) (r:M.t) : part
    requires { selection_possible s (node_model lseq d rseq) }
    requires { l = M.agg D.measure lseq /\ r = M.agg D.measure rseq }
    returns { Here -> let e2 = { left = lseq;
        middle = Some d;
        right = rseq } in selected s e2
      | Left sl -> selection_possible sl lseq /\
        forall e. selected sl e /\ rebuild e = lseq ->
          selected s (right_extend e d rseq)
      | Right sr -> selection_possible sr rseq /\
        forall e. selected sr e /\ rebuild e = rseq ->
          selected s (left_extend lseq d e) }

Parameter: selected_part llis rlis s l d r effectively compute the reduction of the selection to one of the three part of a node. It uses the monoidal summaries to get informations about the left and right subtrees.

  use ref.Ref

  let ghost default_split () : ref (split 'a) =
    ref { left = S.empty; middle = None; right = S.empty }

Create a reference over a dummy split. All the binary-search-based routines take a ghost reference to explicitly return the existential witness corresponding to the found split, this is a shortcut for creating such a reference.

  let rec insert (ghost r:ref (split (D.t 'a))) (s:selector)
    (d:D.t 'a) (t:t 'a) : t 'a
    requires { selection_possible s t }
    ensures { result = node_model !r.left d !r.right }
    ensures { selected s !r /\ rebuild !r = t }
    writes { r }
    ensures { 1 >= result.hgt - t.hgt >= 0 }
    variant { t.hgt }
  = match view t with
    | AEmpty -> r := { left = S.empty; middle = None; right = S.empty };
      singleton d
    | ANode tl td tr _ _ -> match selected_part tl.m.seq tr.m.seq
        s (total tl) td (total tr) with
      | Left sl -> let nl = insert r sl d tl in
        r := right_extend !r td tr.m.seq; balance nl td tr
      | Right sr -> let nr = insert r sr d tr in
        r := left_extend tl.m.seq td !r; balance tl td nr
      | Here -> r := { left = tl.m.seq;
          middle = Some td;
          right = tr.m.seq };
        node tl d tr
      end
    end

Insertion of an element into the sequence. insert r s d t split the sequence t using s as lf ++ o ++ rg, and rebuild it with d in the middle, potentially erasing whatever was there before, as lf ++ [d] ++ rg. The reference r is assigned to the selected position. It is logarithmic-time. Note: the procedure described above match only the specification, not what the actual code does.

  let rec remove (ghost r:ref (split (D.t 'a))) (s:selector) (t:t 'a) : t 'a
    requires { selection_possible s t }
    ensures { result = !r.left ++ !r.right }
    ensures { selected s !r /\ rebuild !r = t }
    ensures { 1 >= t.hgt - result.hgt >= 0 }
    writes { r }
    variant { t.hgt }
  = match view t with
    | AEmpty -> r := { left = S.empty; middle = None; right = S.empty }; t
    | ANode tl td tr _ _ -> match selected_part tl.m.seq tr.m.seq
        s (total tl) td (total tr) with
      | Left sl -> let nl = remove r sl tl in
        r := right_extend !r td tr.m.seq; balance nl td tr
      | Right sr -> let nr = remove r sr tr in
        r := left_extend tl.m.seq td !r; balance tl td nr
      | Here -> r := { left = tl.m.seq;
          middle = Some td;
          right = tr.m.seq };
        fuse tl tr
      end
    end

Remove an element from the sequence. remove r s t Split the sequence t using s into lf ++ o ++ rg, and erase whatever may be in the middle, as lf++rg. logarithmic-time.

  let rec get (ghost r:ref (split (D.t 'a))) (s:selector)
    (t:t 'a) : option (D.t 'a)
    requires { selection_possible s t }
    ensures { selected s !r /\ rebuild !r = t }
    returns { None -> !r.middle = None
       | Some d -> !r.middle = Some d }
    writes { r }
    variant { t.hgt }
  = match view t with
    | AEmpty -> r := { left = S.empty; middle = None; right = S.empty }; None
    | ANode tl td tr _ _ -> match selected_part tl.m.seq tr.m.seq
        s (total tl) td (total tr) with
      | Left sl -> let res = get r sl tl in
        r := right_extend !r td tr.m.seq; res
      | Right sr -> let res = get r sr tr in
        r := left_extend tl.m.seq td !r; res
      | Here -> r := { left = tl.m.seq;
          middle = Some td;
          right = tr.m.seq };
        Some td
      end
    end

Attempt to find an element in the sequence. find r s t return the middle value obtained by splitting the sequence t with respect to the s. logarithmic-time.

  let rec extract (ghost r:ref (split (D.t 'a))) (s:selector)
    (t:t 'a) : (option (D.t 'a),t 'a)
    requires { selection_possible s t }
    ensures { selected s !r /\ rebuild !r = t }
    ensures { let (d,t2:t 'a) = result in
      t2 = !r.left ++ !r.right /\ 1 >= t.hgt - t2.hgt >= 0 /\
      match d with
      | None -> !r.middle = None
      | Some d2 -> !r.middle = Some d2
      end }
    variant { t.hgt }
  = match view t with
    | AEmpty ->
      r := { left = S.empty; middle = None; right = S.empty }; (None,t)
    | ANode tl td tr _ _ -> match selected_part tl.m.seq tr.m.seq
        s (total tl) td (total tr) with
      | Left sl -> let (ol,nl) = extract r sl tl in
        r := right_extend !r td tr.m.seq; (ol,balance nl td tr)
      | Right sr -> let (or,nr) = extract r sr tr in
        r := left_extend tl.m.seq td !r; (or,balance tl td nr)
      | Here -> r := { left = tl.m.seq;
          middle = Some td;
          right = tr.m.seq };
        (Some td,fuse tl tr)
      end
    end

Combine get and remove

  let rec split (ghost r:ref (split (D.t 'a))) (s:selector)
    (t:t 'a) : (t 'a,option (D.t 'a),t 'a)
    requires { selection_possible s t }
    ensures { selected s !r /\ rebuild !r = t }
    returns { (lf:t 'a,o,rg:t 'a) -> lf = !r.left /\ rg = !r.right /\
      match o with
      | None -> !r.middle = None
      | Some d -> !r.middle = Some d
      end }
    writes { r }
    variant { t.hgt }
  = match view t with
    | AEmpty -> r := { left = S.empty; middle = None; right = S.empty };
      (t,None,t)
    | ANode tl td tr _ _ -> match selected_part tl.m.seq tr.m.seq
        s (total tl) td (total tr) with
      | Left sl -> let (tll,tlo,tlr) = split r sl tl in
        r := right_extend !r td tr.m.seq;
          (tll,tlo,join tlr td tr)
      | Right sr -> let (trl,tro,trr) = split r sr tr in
        r := left_extend tl.m.seq td !r;
        (join tl td trl,tro,trr)
      | Here -> r := { left = tl.m.seq;
          middle = Some td;
          right = tr.m.seq };
        (tl,Some td,tr)
      end
    end

Split a sequence. split r s t return a program version of a split found by s, using AVL trees for the sequence. It is logarithmic-time. Note: As their specification suggest, all the binary-search-based routines can be directly implemented in term of a split followed by catenations. Although a constant time factor, the rebalancing work would be much heavier.

end