Case Study: MapReduce














Case Study: MapReduce




Plane




















Prelude

{-@ LIQUID "--reflection" @-} module MapReduce where import Language.Haskell.Liquid.Equational import Prelude hiding (map, length, (++), take, drop, sum) {-@ infix ++ @-} {-@ infix : @-}




















Implementation

{-@ reflect mapReduce @-} mapReduce :: Int -> ([a]-> b) -> (b -> b -> b) -> [a] -> b mapReduce n f op is = reduce op (f []) (map f (chunk n is)) {-@ reflect reduce @-} reduce :: (a -> a -> a) -> a -> [a] -> a reduce op b [] = b reduce op b (x:xs) = op x (reduce op b xs) {-@ reflect map @-} {-@ map :: (a -> b) -> xs:[a] -> {v:[b] | length v == length xs } @-} map _ [] = [] map f (x:xs) = f x:map f xs {-@ reflect chunk @-} chunk :: Int -> [a] -> [[a]]




















Use Case: Summing List

{-@ reflect sum @-} sum :: [Int] -> Int sum [] = 0 sum (x:xs) = x `plus` sum xs {-@ reflect plus @-} plus :: Int -> Int -> Int plus x y = x + y {-@ reflect psum @-} psum :: Int -> [Int] -> Int psum n is = mapReduce n sum plus is

Question: Is psum equivalent to sum?


















Proving Code Equivalence

{-@ sumEq :: n:Int -> is:[Int] -> { sum is == psum n is } @-} sumEq n is = mRTheorem n -- chunk size sum -- function to map-reduce plus -- reduction operator plusRightId -- plus has "right-identity" sumDistr -- sum is "distributive" is -- input list
Using distribution and right identity of plus
{-@ sumDistr   :: xs:[Int] -> ys:[Int] -> 
      {sum (xs ++ ys) == plus (sum xs) (sum ys)} @-}

{-@ plusRightId :: xs:[Int] -> 
      {plus (sum xs) (sum []) == sum xs} @-}




















Higher Order Map Reduce Theorem


If f distributes and op has right-id, then it is OK to map-reduce.

{-@ mRTheorem :: n:Int -> f:([a] -> b) -> op:(b -> b -> b) -> (xs:[a] -> {op (f xs) (f []) == f xs}) -> (xs:[a] -> ys:[a] -> {f (xs ++ ys) == op (f xs) (f ys)}) -> is:[a] -> { mapReduce n f op is == f is } / [length is] @-}




















Right Identity

Lets prove right identity!

{-@ plusRightId :: xs:[Int] -> {plus (sum xs) (sum []) == sum xs} @-} plusRightId :: [Int] -> () plusRightId xs = undefined




















Warmup: Associativity of append

  • Lets prove associativity of append
{-@ assoc :: xs:[a] -> ys:[a] -> zs:[a] -> { xs ++ (ys ++ zs) == (xs ++ ys) ++ zs } @-} assoc :: [a] -> [a] -> [a] -> () assoc [] ys zs = [] ++ (ys ++ zs) ==. ys ++ zs ==. ([] ++ ys) ++ zs *** QED assoc (x:xs) ys zs = (x:xs) ++ (ys ++ zs) ==. x:(xs ++ (ys ++ zs)) ? assoc xs ys zs ==. x:((xs ++ ys) ++ zs) ==. ((x:(xs ++ ys)) ++ zs) ==. ((x:xs) ++ ys) ++ zs *** QED




















Back to Distribution

  • Lets prove distribution
{- ple sumDistr @-} {-@ sumDistr :: xs:[Int] -> ys:[Int] -> {sum (xs ++ ys) == plus (sum xs) (sum ys)} @-} sumDistr :: [Int] -> [Int] -> () sumDistr xs ys = undefined




















Recap


  • We proved program equivalence of map-reduce.

  • Prove crucial properties for Haskell in Haskell!

where Haskell = a general purspose programming language.










Map Reduce Equivalence

{-@ ple sumEq @-} mRTheorem :: Int -> ([a] -> b) -> (b -> b -> b) -> ([a] -> ()) -> ([a] -> [a] -> ()) -> [a]-> () mRTheorem n f op rightId _ [] = mapReduce n f op [] ==! reduce op (f []) (map f (chunk n [])) ==! reduce op (f []) (map f ([]:[])) ==! reduce op (f []) (f []:map f [] ) ==! reduce op (f []) (f []:[]) ==! op (f []) (reduce op (f []) []) ==! op (f []) (f []) ? rightId [] ==! f [] *** QED mRTheorem n f op rightId _ is@(_:_) | n <= 1 || length is <= n = mapReduce n f op is ==! reduce op (f []) (map f (chunk n is)) ==! reduce op (f []) (map f [is]) ==! reduce op (f []) (f is:map f []) ==! reduce op (f []) (f is:[]) ==! op (f is) (reduce op (f []) []) ==! op (f is) (f []) ? rightId is ==! f is *** QED mRTheorem n f op rightId distrib is = mapReduce n f op is ==! reduce op (f []) (map f (chunk n is)) ==! reduce op (f []) (map f (take n is:chunk n (drop n is))) ==! reduce op (f []) ((f (take n is)):(map f (chunk n (drop n is)))) ==! op (f (take n is)) (reduce op (f []) (map f (chunk n (drop n is)))) ==! op (f (take n is)) (mapReduce n f op (drop n is)) ? mRTheorem n f op rightId distrib (drop n is) ==! op (f (take n is)) (f (drop n is)) ? distrib (take n is) (drop n is) ==! f (take n is ++ drop n is) ? appendTakeDrop n is ==! f is *** QED




















Append of Take and Drop

{-@ appendTakeDrop :: i:Nat -> xs:{[a]| i <= length xs} -> {xs == (take i xs) ++ (drop i xs)} @-} appendTakeDrop :: Int -> [a] -> () appendTakeDrop i [] = take i [] ++ drop i [] ==! [] ++ [] ==! [] *** QED appendTakeDrop i (x:xs) | i == 0 = take 0 (x:xs) ++ drop 0 (x:xs) ==! [] ++ (x:xs) ==! x:xs *** QED | otherwise = (take i (x:xs) ++ drop i (x:xs)) ==! (x:(take (i-1) xs)) ++ (drop (i-1) xs) ==! x:(take (i-1) xs ++ drop (i-1) xs) ? appendTakeDrop (i-1) xs ==! x:xs *** QED




















List Manipulation

{-@ LIQUID "--automatic-instances=liquidinstanceslocal" @-} {-@ chunk :: i:Int -> xs:[a] -> {v:[[a]] | if (i <= 1 || length xs <= i) then (length v == 1) else (length v < length xs) } / [length xs] @-} chunk i xs | i <= 1 = [xs] | length xs <= i = [xs] | otherwise = take i xs:chunk i (drop i xs) {-@ reflect drop @-} {-@ drop :: i:Nat -> xs:{[a] | i <= length xs } -> {v:[a] | length v == length xs - i } @-} drop :: Int -> [a] -> [a] drop i [] = [] drop i (x:xs) | i == 0 = x:xs | otherwise = drop (i-1) xs {-@ reflect take @-} {-@ take :: i:Nat -> xs:{[a] | i <= length xs } -> {v:[a] | length v == i} @-} take :: Int -> [a] -> [a] take i [] = [] take i (x:xs) | i == 0 = [] | otherwise = x:take (i-1) xs {-@ reflect ++ @-} (++) :: [a] -> [a] -> [a] [] ++ ys = ys (x:xs) ++ ys = x:(xs ++ ys)
{-@ measure length @-} {-@ length :: [a] -> Nat @-} length :: [a] -> Int length [] = 0 length (_:xs) = 1 + length xs infixl 3 ==! {-@ assume (==!) :: x:a -> y:a -> {v:a | v == y && v == x} @-} (==!) :: a -> a -> a _ ==! x = x