Edition distance

Correctness of a program computing the minimal distance between two words.

Tools: Why3

see also the index (by topic, by tool, by reference, by year)

(* Correctness of a program computing the minimal distance between
two words (code by Claude MarchÃ©).

This program computes a variant of the Levenshtein distance. Given
two strings [w1] and [w2] of respective lengths [n1] and [n2], it
computes the minimal numbers of insertions and deletions to
perform in one of the strings to get the other one.  (The
traditional edit distance also includes substitutions.)

work in linear space, in an array of min(n1,n2) integers. Time
complexity is O(n1 * n2), as usual.
*)

theory Word

use export int.Int
use export int.MinMax
use export list.List
use export list.Length

type char

val eq (c1 c2:char) : bool
ensures { result <-> c1 = c2 }

type word = list char

inductive dist word word int =
| dist_eps :
dist Nil Nil 0
forall w1 w2: word, n: int.
dist w1 w2 n -> forall a: char. dist (Cons a w1) w2 (n + 1)
forall w1 w2: word, n: int.
dist w1 w2 n -> forall a: char. dist w1 (Cons a w2) (n + 1)
| dist_context :
forall w1 w2:word, n: int.
dist w1 w2 n -> forall a: char. dist (Cons a w1) (Cons a w2) n

predicate min_dist (w1 w2: word) (n: int) =
dist w1 w2 n /\ forall m: int. dist w1 w2 m -> n <= m

(* intermediate lemmas *)

use export list.Append

function last_char (a: char) (u: word) : char = match u with
| Nil -> a
| Cons c u' -> last_char c u'
end

function but_last (a: char) (u: word) : word = match u with
| Nil -> Nil
| Cons c u' -> Cons a (but_last c u')
end

lemma first_last_explicit:
forall u: word, a: char.
but_last a u ++ Cons (last_char a u) Nil = Cons a u

lemma first_last:
forall a: char, u: word. exists v: word, b: char.
v ++ Cons b Nil = Cons a u /\ length v = length u

lemma key_lemma_right:
forall w1 w'2: word, m: int, a: char.
dist w1 w'2 m ->
forall w2: word. w'2 = Cons a w2 ->
exists u1 v1: word, k: int.
w1 = u1 ++ v1 /\ dist v1 w2 k /\ k + length u1 <= m + 1

lemma dist_symetry:
forall w1 w2: word, n: int. dist w1 w2 n -> dist w2 w1 n

lemma key_lemma_left:
forall w1 w2: word, m: int, a: char.
dist (Cons a w1) w2 m ->
exists u2 v2: word, k: int.
w2 = u2 ++ v2 /\ dist w1 v2 k /\ k + length u2 <= m + 1

lemma dist_concat_left:
forall u v w: word, n: int.
dist v w n -> dist (u ++ v) w (length u + n)

lemma dist_concat_right:
forall u v w: word, n: int.
dist v w n -> dist v (u ++ w) (length u + n)

(* main lemmas *)

lemma min_dist_equal:
forall w1 w2: word, a: char, n: int.
min_dist w1 w2 n -> min_dist (Cons a w1) (Cons a w2) n

lemma min_dist_diff:
forall w1 w2: word, a b: char, m p: int.
a <> b ->
min_dist (Cons a w1) w2 p ->
min_dist w1 (Cons b w2) m ->
min_dist (Cons a w1) (Cons b w2) (min m p + 1)

lemma min_dist_eps:
forall w: word, a: char, n: int.
min_dist w Nil n -> min_dist (Cons a w) Nil (n + 1)

lemma min_dist_eps_length:
forall w: word. min_dist Nil w (length w)

end

module EditDistance

use Word
use list.Length as L
use ref.Ref
use array.Array

(* Auxiliary definitions for the program and its specification. *)

function suffix (a: array char) (i: int) : word

axiom suffix_nil:
forall a: array char. suffix a a.length = Nil

axiom suffix_cons:
forall a: array char, i: int.
0 <= i < a.length -> suffix a i = Cons a[i] (suffix a (i+1))

lemma suffix_length:
forall a: array char, i: int.
0 <= i <= a.length -> L.length (suffix a i) = (a.length - i)

predicate min_suffix (a1 a2: array char) (i j n: int) =
min_dist (suffix a1 i) (suffix a2 j) n

function word_of (a: array char) : word = suffix a 0

(* The program. *)

let distance (w1: array char) (w2: array char)
ensures  { min_dist (word_of w1) (word_of w2) result }
= let n1 = length w1 in
let n2 = length w2 in
let t = Array.make (n2+1) 0 in
(* initialization of t *)
for i = 0 to n2 do
invariant { forall j:int. 0 <= j < i -> t[j] = n2 - j }
t[i] <- n2 - i
done;
(* loop over w1 *)
for i = n1 - 1 downto 0 do
invariant { forall j:int. 0 <= j <= n2 -> min_suffix w1 w2 (i+1) j t[j] }
let oldt = ref t[n2] in
t[n2] <- t[n2] + 1;
(* loop over w2 *)
for j = n2 - 1 downto 0 do
invariant { forall k:int. j < k <= n2 -> min_suffix w1 w2 i k t[k] }
invariant { forall k:int. 0 <= k <= j -> min_suffix w1 w2 (i+1) k t[k] }
invariant { min_suffix w1 w2 (i+1) (j+1) !oldt }
let temp = !oldt in
oldt := t[j];
if eq w1[i] w2[j] then
t[j] <- temp
else
t[j] <- (min t[j] t[j + 1]) + 1
done
done;
t[0]

end