-----------------------------------------------------------------------------
-- |
-- Module      :  Distribution.Simple.PreProcess.Unlit
-- Copyright   :  ...
--
-- Maintainer  :  cabal-devel@haskell.org
-- Portability :  portable
--
-- Remove the \"literal\" markups from a Haskell source file, including
-- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\"

-- This version is interesting because instead of striping comment lines, it
-- turns them into "-- " style comments. This allows using haddock markup
-- in literate scripts without having to use "> --" prefix.

module Distribution.Simple.PreProcess.Unlit (unlit,plain) where

import Data.Char
import Data.List

data Classified = BirdTrack String | Blank String | Ordinary String
                | Line !Int String | CPP String
                | BeginCode | EndCode
                -- output only:
                | Error String | Comment String

-- | No unliteration.
plain :: String -> String -> String
plain _ hs = hs :: Stringhs

classify :: String -> Classified
classify ('>':s) = BirdTrack :: String -> ClassifiedBirdTrack s :: Strings
classify ('#':s) = case tokens :: String -> [String]tokens s :: Strings of
                     (line:file:_) | all :: (a -> Bool) -> [a] -> Boolall isDigit :: Char -> BoolisDigit line :: Stringline
                                  (&&) :: Bool -> Bool -> Bool&& length :: [a] -> Intlength file :: FilePathfile (>=) :: Ord a => a -> a -> Bool>= 2
                                  (&&) :: Bool -> Bool -> Bool&& head :: [a] -> ahead file :: FilePathfile (==) :: Eq a => a -> a -> Bool== '"'
                                  (&&) :: Bool -> Bool -> Bool&& last :: [a] -> alast file :: FilePathfile (==) :: Eq a => a -> a -> Bool== '"'
                                -> ($WLine) :: Int -> String -> ClassifiedLine (read :: Read a => String -> aread line :: Stringline) (tail :: [a] -> [a]tail (init :: [a] -> [a]init file :: FilePathfile))
                     _          -> CPP :: String -> ClassifiedCPP s :: Strings
  where tokens = unfoldr :: (b -> Maybe (a, b)) -> b -> [a]unfoldr ($) :: (a -> b) -> a -> b$ \str -> case lex :: ReadS Stringlex str :: Stringstr of
                                   (t@(_:_), str'):_ -> Just :: a -> Maybe aJust (t :: Stringt, str' :: Stringstr')
                                   _                 -> Nothing :: Maybe aNothing
classify ('\\':s)
  | "begin{code}" isPrefixOf :: Eq a => [a] -> [a] -> Bool`isPrefixOf` s :: Strings = BeginCode :: ClassifiedBeginCode
  | "end{code}"   isPrefixOf :: Eq a => [a] -> [a] -> Bool`isPrefixOf` s :: Strings = EndCode :: ClassifiedEndCode
classify s | all :: (a -> Bool) -> [a] -> Boolall isSpace :: Char -> BoolisSpace s :: Strings       = Blank :: String -> ClassifiedBlank s :: Strings
classify s                       = Ordinary :: String -> ClassifiedOrdinary s :: Strings

-- So the weird exception for comment indenting is to make things work with
-- haddock, see classifyAndCheckForBirdTracks below.
unclassify :: Bool -> Classified -> String
unclassify _     (BirdTrack s) = ' '(:) :: a -> [a] -> [a]:s :: Strings
unclassify _     (Blank s)     = s :: Strings
unclassify _     (Ordinary s)  = s :: Strings
unclassify _     (Line n file) = "# " (++) :: [a] -> [a] -> [a]++ show :: Show a => a -> Stringshow n :: Intn (++) :: [a] -> [a] -> [a]++ " " (++) :: [a] -> [a] -> [a]++ show :: Show a => a -> Stringshow file :: FilePathfile
unclassify _     (CPP s)       = '#'(:) :: a -> [a] -> [a]:s :: Strings
unclassify True  (Comment "")  = "  --"
unclassify True  (Comment s)   = "  -- " (++) :: [a] -> [a] -> [a]++ s :: Strings
unclassify False (Comment "")  = "--"
unclassify False (Comment s)   = "-- " (++) :: [a] -> [a] -> [a]++ s :: Strings
unclassify _     _             = internalError :: ainternalError

-- | 'unlit' takes a filename (for error reports), and transforms the
--   given string, to eliminate the literate comments from the program text.
unlit :: FilePath -> String -> Either String String
unlit file input =
  let (usesBirdTracks, classified) = classifyAndCheckForBirdTracks :: [String] -> (Bool, [Classified])classifyAndCheckForBirdTracks
                                   (.) :: (b -> c) -> (a -> b) -> a -> c. inlines :: String -> [String]inlines
                                   ($) :: (a -> b) -> a -> b$ input :: Stringinput
   in either :: (a -> c) -> (b -> c) -> Either a b -> ceither (Left :: a -> Either a bLeft (.) :: (b -> c) -> (a -> b) -> a -> c. unlines :: [String] -> Stringunlines (.) :: (b -> c) -> (a -> b) -> a -> c. map :: (a -> b) -> [a] -> [b]map (unclassify :: Bool -> Classified -> Stringunclassify usesBirdTracks :: BoolusesBirdTracks))
              Right :: b -> Either a bRight
    (.) :: (b -> c) -> (a -> b) -> a -> c. checkErrors :: [Classified] -> Either [Classified] [Char]checkErrors
    (.) :: (b -> c) -> (a -> b) -> a -> c. reclassify :: [Classified] -> [Classified]reclassify
    ($) :: (a -> b) -> a -> b$ classified :: [Classified]classified

  where
    -- So haddock requires comments and code to align, since it treats comments
    -- as following the layout rule. This is a pain for us since bird track
    -- style literate code typically gets indented by two since ">" is replaced
    -- by " " and people usually use one additional space of indent ie
    -- "> then the code". On the other hand we cannot just go and indent all
    -- the comments by two since that does not work for latex style literate
    -- code. So the hacky solution we use here is that if we see any bird track
    -- style code then we'll indent all comments by two, otherwise by none.
    -- Of course this will not work for mixed latex/bird track .lhs files but
    -- nobody does that, it's silly and specifically recommended against in the
    -- H98 unlit spec.
    --
    classifyAndCheckForBirdTracks =
      flip :: (a -> b -> c) -> b -> a -> cflip mapAccumL :: (acc -> x -> (acc, y)) -> acc -> [x] -> (acc, [y])mapAccumL False :: BoolFalse ($) :: (a -> b) -> a -> b$ \seenBirdTrack line ->
        let classification = classify :: String -> Classifiedclassify line :: Stringline
         in (seenBirdTrack :: BoolseenBirdTrack (||) :: Bool -> Bool -> Bool|| isBirdTrack :: Classified -> BoolisBirdTrack classification :: Classifiedclassification, classification :: Classifiedclassification)

    isBirdTrack (BirdTrack _) = True :: BoolTrue
    isBirdTrack _             = False :: BoolFalse

    checkErrors ls = case [ e :: Stringe | Error e <- ls :: [Classified]ls ] of
      []          -> Left :: a -> Either a bLeft  ls :: [Classified]ls
      (message:_) -> Right :: b -> Either a bRight (f :: Stringf (++) :: [a] -> [a] -> [a]++ ":" (++) :: [a] -> [a] -> [a]++ show :: Show a => a -> Stringshow n :: Intn (++) :: [a] -> [a] -> [a]++ ": " (++) :: [a] -> [a] -> [a]++ message :: Stringmessage)
        where (f, n) = errorPos :: String -> Int -> [Classified] -> (String, Int)errorPos file :: FilePathfile 1 ls :: [Classified]ls
    errorPos f n []              = (f :: Stringf, n :: Intn)
    errorPos f n (Error _:_)     = (f :: Stringf, n :: Intn)
    errorPos _ _ (Line n' f':ls) = errorPos :: String -> Int -> [Classified] -> (String, Int)errorPos f' :: Stringf' n' :: Intn' ls :: [Classified]ls
    errorPos f n (_         :ls) = errorPos :: String -> Int -> [Classified] -> (String, Int)errorPos f :: Stringf  (n :: Intn(+) :: Num a => a -> a -> a+1) ls :: [Classified]ls

-- Here we model a state machine, with each state represented by
-- a local function. We only have four states (well, five,
-- if you count the error state), but the rules
-- to transition between then are not so simple.
-- Would it be simpler to have more states?
--
-- Each state represents the type of line that was last read
-- i.e. are we in a comment section, or a latex-code section,
-- or a bird-code section, etc?
reclassify :: [Classified] -> [Classified]
reclassify = blank :: [Classified] -> [Classified]blank -- begin in blank state
  where
    latex []               = [] :: [a][]
    latex (EndCode    :ls) = Blank :: String -> ClassifiedBlank "" (:) :: a -> [a] -> [a]: comment :: [Classified] -> [Classified]comment ls :: [Classified]ls
    latex (BeginCode  :_ ) = [Error :: String -> ClassifiedError "\\begin{code} in code section"]
    latex (BirdTrack l:ls) = Ordinary :: String -> ClassifiedOrdinary ('>'(:) :: a -> [a] -> [a]:l :: Classifiedl) (:) :: a -> [a] -> [a]: latex :: [Classified] -> [Classified]latex ls :: [Classified]ls
    latex (          l:ls) = l :: Classifiedl (:) :: a -> [a] -> [a]: latex :: [Classified] -> [Classified]latex ls :: [Classified]ls

    blank []               = [] :: [a][]
    blank (EndCode    :_ ) = [Error :: String -> ClassifiedError "\\end{code} without \\begin{code}"]
    blank (BeginCode  :ls) = Blank :: String -> ClassifiedBlank ""    (:) :: a -> [a] -> [a]: latex :: [Classified] -> [Classified]latex ls :: [Classified]ls
    blank (BirdTrack l:ls) = BirdTrack :: String -> ClassifiedBirdTrack l :: Classifiedl (:) :: a -> [a] -> [a]: bird :: [Classified] -> [Classified]bird ls :: [Classified]ls
    blank (Ordinary  l:ls) = Comment :: String -> ClassifiedComment   l :: Classifiedl (:) :: a -> [a] -> [a]: comment :: [Classified] -> [Classified]comment ls :: [Classified]ls
    blank (          l:ls) =           l :: Classifiedl (:) :: a -> [a] -> [a]: blank :: [Classified] -> [Classified]blank ls :: [Classified]ls

    bird []              = [] :: [a][]
    bird (EndCode   :_ ) = [Error :: String -> ClassifiedError "\\end{code} without \\begin{code}"]
    bird (BeginCode :ls) = Blank :: String -> ClassifiedBlank "" (:) :: a -> [a] -> [a]: latex :: [Classified] -> [Classified]latex ls :: [Classified]ls
    bird (Blank l   :ls) = Blank :: String -> ClassifiedBlank l :: Classifiedl  (:) :: a -> [a] -> [a]: blank :: [Classified] -> [Classified]blank ls :: [Classified]ls
    bird (Ordinary _:_ ) = [Error :: String -> ClassifiedError "program line before comment line"]
    bird (         l:ls) = l :: Classifiedl (:) :: a -> [a] -> [a]: bird :: [Classified] -> [Classified]bird ls :: [Classified]ls

    comment []               = [] :: [a][]
    comment (EndCode    :_ ) = [Error :: String -> ClassifiedError "\\end{code} without \\begin{code}"]
    comment (BeginCode  :ls) = Blank :: String -> ClassifiedBlank "" (:) :: a -> [a] -> [a]: latex :: [Classified] -> [Classified]latex ls :: [Classified]ls
    comment (CPP l      :ls) = CPP :: String -> ClassifiedCPP l :: Classifiedl (:) :: a -> [a] -> [a]: comment :: [Classified] -> [Classified]comment ls :: [Classified]ls
    comment (BirdTrack _:_ ) = [Error :: String -> ClassifiedError "comment line before program line"]
    -- a blank line and another ordinary line following a comment
    -- will be treated as continuing the comment. Otherwise it's
    -- then end of the comment, with a blank line.
    comment (Blank     l:ls@(Ordinary  _:_)) = Comment :: String -> ClassifiedComment l :: Classifiedl (:) :: a -> [a] -> [a]: comment :: [Classified] -> [Classified]comment ls :: [Classified]ls
    comment (Blank     l:ls) = Blank :: String -> ClassifiedBlank l :: Classifiedl   (:) :: a -> [a] -> [a]: blank :: [Classified] -> [Classified]blank ls :: [Classified]ls
    comment (Line n f   :ls) = ($WLine) :: Int -> String -> ClassifiedLine n :: Intn f :: Stringf  (:) :: a -> [a] -> [a]: comment :: [Classified] -> [Classified]comment ls :: [Classified]ls
    comment (Ordinary  l:ls) = Comment :: String -> ClassifiedComment l :: Classifiedl (:) :: a -> [a] -> [a]: comment :: [Classified] -> [Classified]comment ls :: [Classified]ls
    comment (Comment   _: _) = internalError :: ainternalError
    comment (Error     _: _) = internalError :: ainternalError

-- Re-implementation of 'lines', for better efficiency (but decreased laziness).
-- Also, importantly, accepts non-standard DOS and Mac line ending characters.
inlines :: String -> [String]
inlines xs = lines' :: [Char] -> ([Char] -> [Char]) -> [[Char]]lines' xs :: Stringxs id :: a -> aid
  where
  lines' []             acc = [acc :: [Char] -> [Char]acc [] :: [a][]]
  lines' ('\^M':'\n':s) acc = acc :: [Char] -> [Char]acc [] :: [a][] (:) :: a -> [a] -> [a]: lines' :: [Char] -> ([Char] -> [Char]) -> [[Char]]lines' s :: Strings id :: a -> aid    -- DOS
  lines' ('\^M':s)      acc = acc :: [Char] -> [Char]acc [] :: [a][] (:) :: a -> [a] -> [a]: lines' :: [Char] -> ([Char] -> [Char]) -> [[Char]]lines' s :: Strings id :: a -> aid    -- MacOS
  lines' ('\n':s)       acc = acc :: [Char] -> [Char]acc [] :: [a][] (:) :: a -> [a] -> [a]: lines' :: [Char] -> ([Char] -> [Char]) -> [[Char]]lines' s :: Strings id :: a -> aid    -- Unix
  lines' (c:s)          acc = lines' :: [Char] -> ([Char] -> [Char]) -> [[Char]]lines' s :: Strings (acc :: [Char] -> [Char]acc (.) :: (b -> c) -> (a -> b) -> a -> c. (c :: Charc(:) :: a -> [a] -> [a]:))

internalError :: a
internalError = error :: [Char] -> aerror "unlit: internal error"