($con2tag_TokenType) :: TokenType -> Int#module Language.Haskell.HsColour.Classify
  ( TokenType(..)
  , tokenise
  ) where

import Char
import List

-- | Lex Haskell source code into an annotated token stream, without
--   discarding any characters or layout.
tokenise :: String -> [(TokenType,String)]
tokenise :: String -> [(TokenType, String)]tokenise str :: Stringstr = 
    let chunks :: [[Char]]chunks = glue :: [[Char]] -> [[Char]]glue (.) :: (b -> c) -> (a -> b) -> a -> c. chunk :: String -> [String]chunk ($) :: (a -> b) -> a -> b$ str :: Stringstr 
    in markDefs :: [(TokenType, String)] -> [(TokenType, String)]markDefs ($) :: (a -> b) -> a -> b$ map :: (a -> b) -> [a] -> [b]map (\s :: Strings-> (classify :: String -> TokenTypeclassify s :: Strings,s :: Strings)) chunks :: [[Char]]chunks

markDefs :: [(TokenType, String)] -> [(TokenType, String)]
markDefs :: [(TokenType, String)] -> [(TokenType, String)]markDefs [] = [] :: [a][]
markDefs ((Varid, s :: Strings) : rest :: [(TokenType, String)]rest) = (Definition :: TokenTypeDefinition, s :: Strings) (:) :: a -> [a] -> [a]: continue :: [(TokenType, [Char])] -> [(TokenType, [Char])]continue rest :: [(TokenType, String)]rest
markDefs ((Varop, ">") : (Space, " ") : (Varid, d :: Stringd) : rest :: [(TokenType, String)]rest) =
    (Varop :: TokenTypeVarop, ">") (:) :: a -> [a] -> [a]: (Space :: TokenTypeSpace, " ") (:) :: a -> [a] -> [a]: (Definition :: TokenTypeDefinition, d :: Stringd) (:) :: a -> [a] -> [a]: continue :: [(TokenType, [Char])] -> [(TokenType, [Char])]continue rest :: [(TokenType, String)]rest
markDefs rest :: [(TokenType, String)]rest = continue :: [(TokenType, [Char])] -> [(TokenType, [Char])]continue rest :: [(TokenType, String)]rest

continue :: [(TokenType, [Char])] -> [(TokenType, [Char])]continue rest :: [(TokenType, [Char])]rest 
    = let (thisLine :: [(TokenType, [Char])]thisLine, nextLine :: [(TokenType, [Char])]nextLine) = span :: (a -> Bool) -> [a] -> ([a], [a])span ((/=) :: Eq a => a -> a -> Bool/= (Space :: TokenTypeSpace, "\n")) rest :: [(TokenType, String)]rest
      in
        case nextLine :: [(TokenType, [Char])]nextLine of
          [] -> thisLine :: [(TokenType, [Char])]thisLine
          ((Space, "\n"):nextLine' :: [(TokenType, [Char])]nextLine') -> (thisLine :: [(TokenType, [Char])]thisLine (++) :: [a] -> [a] -> [a]++ ((Space :: TokenTypeSpace, "\n") (:) :: a -> [a] -> [a]: (markDefs :: [(TokenType, String)] -> [(TokenType, String)]markDefs nextLine' :: [(TokenType, [Char])]nextLine')))


-- Basic Haskell lexing, except we keep whitespace.
chunk :: String -> [String]
chunk :: String -> [String]chunk []    = [] :: [a][]
chunk ('\r':s :: [Char]s) = chunk :: String -> [String]chunk s :: Strings -- get rid of DOS newline stuff
chunk ('\n':s :: [Char]s) = "\n"(:) :: a -> [a] -> [a]: chunk :: String -> [String]chunk s :: Strings
chunk (c :: Charc:s :: [Char]s) | isLinearSpace :: Char -> BoolisLinearSpace c :: Charc
            = (c :: Charc(:) :: a -> [a] -> [a]:ss :: [Char]ss)(:) :: a -> [a] -> [a]: chunk :: String -> [String]chunk rest :: [(TokenType, String)]rest where (ss :: [Char]ss,rest :: [Char]rest) = span :: (a -> Bool) -> [a] -> ([a], [a])span isLinearSpace :: Char -> BoolisLinearSpace s :: Strings
chunk ('{':'-':s :: [Char]s) = let (com :: Stringcom,s' :: Strings') = nestcomment :: Int -> String -> (String, String)nestcomment 0 s :: Strings
                    in ('{'(:) :: a -> [a] -> [a]:'-'(:) :: a -> [a] -> [a]:com :: Stringcom) (:) :: a -> [a] -> [a]: chunk :: String -> [String]chunk s' :: Strings'
chunk s :: Strings = case lex :: ReadS StringPrelude.lex s :: Strings of
              []             -> [head :: [a] -> ahead s :: Strings](:) :: a -> [a] -> [a]: chunk :: String -> [String]chunk (tail :: [a] -> [a]tail s :: Strings) -- e.g. inside comment
              ((tok :: Stringtok@('-':'-':_),rest :: Stringrest):_)
                  | all :: (a -> Bool) -> [a] -> Boolall ((==) :: Eq a => a -> a -> Bool=='-') tok :: Stringtok -> (tok :: Stringtok(++) :: [a] -> [a] -> [a]++com :: Stringcom)(:) :: a -> [a] -> [a]: chunk :: String -> [String]chunk s' :: Strings'
                                       where (com :: Stringcom,s' :: Strings') = eolcomment :: String -> (String, String)eolcomment rest :: [(TokenType, String)]rest
              ((tok :: Stringtok,rest :: Stringrest):_) -> tok :: Stringtok(:) :: a -> [a] -> [a]: chunk :: String -> [String]chunk rest :: [(TokenType, String)]rest

isLinearSpace :: Char -> BoolisLinearSpace c :: Charc = c :: Charc elem :: Eq a => a -> [a] -> Bool`elem` " \t\f" -- " \t\xa0"

-- Glue sequences of tokens into more useful blobs
--glue (q:".":n:rest) | Char.isUpper (head q)	-- qualified names
--                    = glue ((q++"."++n): rest)
glue :: [[Char]] -> [[Char]]glue ("`":rest :: [[Char]]rest) =				-- `varid` -> varop
  case glue :: [[Char]] -> [[Char]]glue rest :: [(TokenType, String)]rest of
    (qn :: [Char]qn:"`":rest :: [[Char]]rest) -> ("`"(++) :: [a] -> [a] -> [a]++qn :: [Char]qn(++) :: [a] -> [a] -> [a]++"`")(:) :: a -> [a] -> [a]: glue :: [[Char]] -> [[Char]]glue rest :: [(TokenType, String)]rest
    _             -> "`"(:) :: a -> [a] -> [a]: glue :: [[Char]] -> [[Char]]glue rest :: [(TokenType, String)]rest
glue (s :: [Char]s:ss :: [[Char]]ss)       | all :: (a -> Bool) -> [a] -> Boolall ((==) :: Eq a => a -> a -> Bool=='-') s :: Strings (&&) :: Bool -> Bool -> Bool&& length :: [a] -> Intlength s :: Strings (>=) :: Ord a => a -> a -> Bool>=2	-- eol comment
                  = (s :: Strings(++) :: [a] -> [a] -> [a]++concat :: [[a]] -> [a]concat c :: Charc)(:) :: a -> [a] -> [a]: glue :: [[Char]] -> [[Char]]glue rest :: [(TokenType, String)]rest
                  where (c :: [[Char]]c,rest :: [[Char]]rest) = break :: (a -> Bool) -> [a] -> ([a], [a])break ('\n'elem :: Eq a => a -> [a] -> Bool`elem`) ss :: [Char]ss
--glue ("{":"-":ss)  = ("{-"++c): glue rest	-- nested comment
--                  where (c,rest) = nestcomment 0 ss
glue ("(":ss :: [[Char]]ss) = case rest :: [(TokenType, String)]rest of
                ")":rest :: [[Char]]rest -> ("(" (++) :: [a] -> [a] -> [a]++ concat :: [[a]] -> [a]concat tuple :: [[Char]]tuple (++) :: [a] -> [a] -> [a]++ ")") (:) :: a -> [a] -> [a]: glue :: [[Char]] -> [[Char]]glue rest :: [(TokenType, String)]rest
                _         -> "(" (:) :: a -> [a] -> [a]: glue :: [[Char]] -> [[Char]]glue ss :: [Char]ss
              where (tuple :: [[Char]]tuple,rest :: [[Char]]rest) = span :: (a -> Bool) -> [a] -> ([a], [a])span ((==) :: Eq a => a -> a -> Bool==",") ss :: [Char]ss
glue ("[":"]":ss :: [[Char]]ss) = "[]" (:) :: a -> [a] -> [a]: glue :: [[Char]] -> [[Char]]glue ss :: [Char]ss
glue ("\n":"#":ss :: [[Char]]ss)= "\n" (:) :: a -> [a] -> [a]: ('#'(:) :: a -> [a] -> [a]:concat :: [[a]] -> [a]concat line :: [[Char]]line) (:) :: a -> [a] -> [a]: glue :: [[Char]] -> [[Char]]glue rest :: [(TokenType, String)]rest
                  where (line :: [[Char]]line,rest :: [[Char]]rest) = break :: (a -> Bool) -> [a] -> ([a], [a])break ('\n'elem :: Eq a => a -> [a] -> Bool`elem`) ss :: [Char]ss
glue (s :: [Char]s:ss :: [[Char]]ss)       = s :: Strings(:) :: a -> [a] -> [a]: glue :: [[Char]] -> [[Char]]glue ss :: [Char]ss
glue []           = [] :: [a][]

-- Deal with comments.
nestcomment :: Int -> String -> (String,String)
nestcomment :: Int -> String -> (String, String)nestcomment n :: Intn ('{':'-':ss :: [Char]ss) | n :: Intn(>=) :: Ord a => a -> a -> Bool>=0 = (("{-"(++) :: [a] -> [a] -> [a]++cs :: Stringcs),rm :: Stringrm)
                                  where (cs :: Stringcs,rm :: Stringrm) = nestcomment :: Int -> String -> (String, String)nestcomment (n :: Intn(+) :: Num a => a -> a -> a+1) ss :: [Char]ss
nestcomment n :: Intn ('-':'}':ss :: [Char]ss) | n :: Intn(>) :: Ord a => a -> a -> Bool>0  = (("-}"(++) :: [a] -> [a] -> [a]++cs :: Stringcs),rm :: Stringrm)
                                  where (cs :: Stringcs,rm :: Stringrm) = nestcomment :: Int -> String -> (String, String)nestcomment (n :: Intn(-) :: Num a => a -> a -> a-1) ss :: [Char]ss
nestcomment n :: Intn ('-':'}':ss :: [Char]ss) | n :: Intn(==) :: Eq a => a -> a -> Bool==0 = ("-}",ss :: [Char]ss)
nestcomment n :: Intn (s :: Chars:ss :: [Char]ss)       | n :: Intn(>=) :: Ord a => a -> a -> Bool>=0 = ((s :: Strings(:) :: a -> [a] -> [a]:cs :: Stringcs),rm :: Stringrm)
                                  where (cs :: Stringcs,rm :: Stringrm) = nestcomment :: Int -> String -> (String, String)nestcomment n :: Intn ss :: [Char]ss
nestcomment n :: Intn [] = ([] :: [a][],[] :: [a][])

eolcomment :: String -> (String,String)
eolcomment :: String -> (String, String)eolcomment s :: Strings@('\n':_) = ([] :: [a][], s :: Strings)
eolcomment ('\r':s :: [Char]s)   = eolcomment :: String -> (String, String)eolcomment s :: Strings
eolcomment (c :: Charc:s :: [Char]s)      = (c :: Charc(:) :: a -> [a] -> [a]:cs :: Stringcs, s' :: Strings') where (cs :: Stringcs,s' :: Strings') = eolcomment :: String -> (String, String)eolcomment s :: Strings
eolcomment []         = ([] :: [a][],[] :: [a][])

-- | Classification of tokens as lexical entities
data TokenType =
  Space :: TokenTypeSpace | Keyword :: TokenTypeKeyword | Keyglyph :: TokenTypeKeyglyph | Layout :: TokenTypeLayout | Comment :: TokenTypeComment | Conid :: TokenTypeConid | Varid :: TokenTypeVarid |
  Conop :: TokenTypeConop | Varop :: TokenTypeVarop   | String :: TokenTypeString   | Char :: TokenTypeChar   | Number :: TokenTypeNumber  | Cpp :: TokenTypeCpp   | Error :: TokenTypeError |
  Definition :: TokenTypeDefinition
  deriving (D:Eq :: (a -> a -> Bool) -> (a -> a -> Bool) -> T:Eq aEq,D:Show ::
  (Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> T:Show aShow)

classify :: String -> TokenType
classify :: String -> TokenTypeclassify s :: Strings@(h :: Charh:t :: [Char]t)
    | isSpace :: Char -> BoolisSpace h :: Charh              = Space :: TokenTypeSpace
    | all :: (a -> Bool) -> [a] -> Boolall ((==) :: Eq a => a -> a -> Bool=='-') s :: Strings          = Comment :: TokenTypeComment
    | "--" isPrefixOf :: Eq a => [a] -> [a] -> Bool`isPrefixOf` s :: Strings
      (&&) :: Bool -> Bool -> Bool&& any :: (a -> Bool) -> [a] -> Boolany isSpace :: Char -> BoolisSpace s :: Strings       = Comment :: TokenTypeComment		-- not fully correct
    | "{-" isPrefixOf :: Eq a => [a] -> [a] -> Bool`isPrefixOf` s :: Strings    = Comment :: TokenTypeComment
    | s :: Strings elem :: Eq a => a -> [a] -> Bool`elem` keywords :: [[Char]]keywords      = Keyword :: TokenTypeKeyword
    | s :: Strings elem :: Eq a => a -> [a] -> Bool`elem` keyglyphs :: [[Char]]keyglyphs     = Keyglyph :: TokenTypeKeyglyph
    | s :: Strings elem :: Eq a => a -> [a] -> Bool`elem` layoutchars :: [[Char]]layoutchars   = Layout :: TokenTypeLayout
    | isUpper :: Char -> BoolisUpper h :: Charh              = Conid :: TokenTypeConid
    | s :: Strings (==) :: Eq a => a -> a -> Bool== "[]"              = Conid :: TokenTypeConid
    | h :: Charh (==) :: Eq a => a -> a -> Bool== '(' (&&) :: Bool -> Bool -> Bool&& isTupleTail :: [Char] -> BoolisTupleTail t :: [Char]t = Conid :: TokenTypeConid
    | h :: Charh (==) :: Eq a => a -> a -> Bool== '#'               = Cpp :: TokenTypeCpp
    | isLower :: Char -> BoolisLower h :: Charh              = Varid :: TokenTypeVarid
    | h :: Charh elem :: Eq a => a -> [a] -> Bool`elem` symbols :: [Char]symbols       = Varop :: TokenTypeVarop
    | h :: Charh(==) :: Eq a => a -> a -> Bool==':'                 = Conop :: TokenTypeConop
    | h :: Charh(==) :: Eq a => a -> a -> Bool=='`'                 = Varop :: TokenTypeVarop
    | h :: Charh(==) :: Eq a => a -> a -> Bool=='"'                 = String :: TokenTypeString
    | h :: Charh(==) :: Eq a => a -> a -> Bool=='\''                = Char :: TokenTypeChar
    | isDigit :: Char -> BoolisDigit h :: Charh              = Number :: TokenTypeNumber
    | otherwise :: Boolotherwise              = Error :: TokenTypeError
classify _ = Space :: TokenTypeSpace

isTupleTail :: [Char] -> BoolisTupleTail [')'] = True :: BoolTrue
isTupleTail (',':xs :: [Char]xs) = isTupleTail :: [Char] -> BoolisTupleTail xs :: [Char]xs
isTupleTail _ = False :: BoolFalse


-- Haskell keywords
keywords :: [[Char]]keywords =
  ["case","class","data","default","deriving","do","else","forall"
  ,"if","import","in","infix","infixl","infixr","instance","let","module"
  ,"newtype","of","qualified","then","type","where","_"
  ,"foreign","ccall","as","safe","unsafe"]
keyglyphs :: [[Char]]keyglyphs =
  ["..","::","=","\\","|","<-","->","@","~","=>","[","]"]
layoutchars :: [[Char]]layoutchars =
  map :: (a -> b) -> [a] -> [b]map ((:) :: a -> [a] -> [a]:[] :: [a][]) ";{}(),"
symbols :: [Char]symbols =
  "!#$%&*+./<=>?@\\^|-~"