module Language.Haskell.HsColour.Anchors
  ( insertAnchors
  ) where

import Language.Haskell.HsColour.Classify
import Language.Haskell.HsColour.General
import List
import Char

-- This is an attempt to find the first defining occurrence of an
-- identifier (function, datatype, class) in a Haskell source file.
-- Rather than parse the module properly, we try to get by with just
-- a finite state automaton.  Keeping a record of identifiers we
-- have already seen, we look at the beginning of every line to see
-- if it starts with the right tokens to introduce a defn.  If so,
-- we look a little bit further until we can be certain.  Then plonk
-- (or not) an anchor at the beginning of the line.

type Anchor = String

-- | 'insertAnchors' places an anchor marker in the token stream before the
--   first defining occurrence of any identifier.  Here, /before/ means
--   immediately preceding its type signature, or preceding a (haddock)
--   comment that comes immediately before the type signature, or failing
--   either of those, before the first equation.
insertAnchors :: [(TokenType,String)] -> [Either Anchor (TokenType,String)]
insertAnchors ::
  [(TokenType, String)] -> [Either Anchor (TokenType, String)]insertAnchors = anchor ::
  ST -> [(TokenType, String)] -> [Either String (TokenType, String)]anchor emptyST :: STemptyST

-- looks at first token in the left-most position of each line
-- precondition: have just seen a newline token.
anchor :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
anchor ::
  ST -> [(TokenType, String)] -> [Either String (TokenType, String)]anchor st :: STst s :: [(TokenType, String)]s = case identifier :: ST -> [(TokenType, String)] -> Maybe Stringidentifier st :: STst s :: [(TokenType, String)]s of
                Nothing -> emit ::
  ST -> [(TokenType, String)] -> [Either String (TokenType, String)]emit st :: STst s :: [(TokenType, String)]s
                Just v :: Stringv  -> Left :: a -> Either a bLeft (escape :: String -> Stringescape v :: Stringv)(:) :: a -> [a] -> [a]: emit ::
  ST -> [(TokenType, String)] -> [Either String (TokenType, String)]emit (insertST :: String -> ST -> STinsertST v :: Stringv st :: STst) s :: [(TokenType, String)]s

-- some chars are not valid in anchor URIs: http://www.ietf.org/rfc/rfc3986
-- NOTE: This code assumes characters are 8-bit.
--       Ideally, it should transcode to utf8 octets first.
escape :: String -> String
escape :: String -> Stringescape = concatMap :: (a -> [b]) -> [a] -> [b]concatMap enc :: Char -> [Char]enc
    where enc :: Char -> [Char]enc x :: Charx | isDigit :: Char -> BoolisDigit x :: Charx
                (||) :: Bool -> Bool -> Bool|| isURIFragmentValid :: Char -> BoolisURIFragmentValid x :: Charx
                (||) :: Bool -> Bool -> Bool|| isLower :: Char -> BoolisLower x :: Charx
                (||) :: Bool -> Bool -> Bool|| isUpper :: Char -> BoolisUpper x :: Charx = [x :: Charx]
                | otherwise :: Boolotherwise  = ['%',hexHi :: Int -> CharhexHi (ord :: Char -> Intord x :: Charx), hexLo :: Int -> CharhexLo (ord :: Char -> Intord x :: Charx)]
          hexHi :: Int -> CharhexHi d :: Intd = intToDigit :: Int -> CharintToDigit (d :: Intddiv :: Integral a => a -> a -> a`div`16)
          hexLo :: Int -> CharhexLo d :: Intd = intToDigit :: Int -> CharintToDigit (d :: Intdmod :: Integral a => a -> a -> a`mod`16)
          isURIFragmentValid :: Char -> BoolisURIFragmentValid x :: Charx = x :: Charx elem :: Eq a => a -> [a] -> Bool`elem` "!$&'()*+,;=/?-._~:@"

-- emit passes stuff through until the next newline has been encountered,
-- then jumps back into the anchor function
-- pre-condition: newlines are explicitly single tokens
emit :: ST -> [(TokenType, String)] -> [Either String (TokenType, String)]
emit ::
  ST -> [(TokenType, String)] -> [Either String (TokenType, String)]emit st :: STst (t :: (TokenType, String)t@(Space,"\n"):stream :: [(TokenType, String)]stream) = Right :: b -> Either a bRight t :: (TokenType, String)t(:) :: a -> [a] -> [a]: anchor ::
  ST -> [(TokenType, String)] -> [Either String (TokenType, String)]anchor st :: STst stream :: [(TokenType, String)]stream
emit st :: STst (t :: (TokenType, String)t:stream :: [(TokenType, String)]stream)              = Right :: b -> Either a bRight t :: (TokenType, String)t(:) :: a -> [a] -> [a]: emit ::
  ST -> [(TokenType, String)] -> [Either String (TokenType, String)]emit st :: STst stream :: [(TokenType, String)]stream
emit _  []                      = [] :: [a][]

-- Given that we are at the beginning of a line, determine whether there
-- is an identifier defined here, and if so, return it.
-- precondition: have just seen a newline token.
identifier ::  ST -> [(TokenType, String)] -> Maybe String
identifier :: ST -> [(TokenType, String)] -> Maybe Stringidentifier st :: STst t :: [(TokenType, String)]t@((kind :: TokenTypekind,v :: Stringv):stream :: [(TokenType, String)]stream) | kind :: TokenTypekindelem :: Eq a => a -> [a] -> Bool`elem`[Varid :: TokenTypeVarid,Definition :: TokenTypeDefinition] =
    case skip :: [(TokenType, t)] -> [(TokenType, t)]skip stream :: [(TokenType, String)]stream of
        ((Varop,v :: Stringv):_) | not :: Bool -> Boolnot (v :: StringvinST :: String -> ST -> Bool`inST`st :: STst) -> Just :: a -> Maybe aJust (fix :: String -> Stringfix v :: Stringv)
        notVarop :: [(TokenType, String)]notVarop  --  | typesig stream  -> Nothing    -- not a defn
                      | v :: Stringv inST :: String -> ST -> Bool`inST` st :: STst     -> Nothing :: Maybe aNothing    -- already defined
                      | otherwise :: Boolotherwise       -> Just :: a -> Maybe aJust v :: Stringv
identifier st :: STst t :: [(TokenType, String)]t@((Layout,"("):stream :: [(TokenType, String)]stream) =
    case stream :: [(TokenType, String)]stream of
      ((Varop,v :: Stringv):(Layout,")"):_)
                  --  | typesig stream  -> Nothing
	              | v `inST` inST :: String -> ST -> Boolst     st :: ST-> NothingNothing :: Maybe a
	              | otherwise	-> Just (fix v)
      notVarop :: [(TokenType, String)]notVarop -> case skip :: [(TokenType, t)] -> [(TokenType, t)]skip (munchParens :: [(TokenType, String)] -> [(TokenType, String)]munchParens stream :: [(TokenType, String)]stream) of
          ((Varop,v :: Stringv):_) | not :: Bool -> Boolnot (v :: StringvinST :: String -> ST -> Bool`inST`st :: STst) -> Just :: a -> Maybe aJust (fix :: String -> Stringfix v :: Stringv)
          _             -> Nothing :: Maybe aNothing
identifier st :: STst t :: [(TokenType, String)]t@((Keyword,"foreign"):stream :: [(TokenType, String)]stream) = Nothing :: Maybe aNothing -- not yet implemented
identifier st :: STst t :: [(TokenType, String)]t@((Keyword,"data"):stream :: [(TokenType, String)]stream)    = getConid :: [(TokenType, String)] -> Maybe StringgetConid stream :: [(TokenType, String)]stream
identifier st :: STst t :: [(TokenType, String)]t@((Keyword,"newtype"):stream :: [(TokenType, String)]stream) = getConid :: [(TokenType, String)] -> Maybe StringgetConid stream :: [(TokenType, String)]stream
identifier st :: STst t :: [(TokenType, String)]t@((Keyword,"type"):stream :: [(TokenType, String)]stream)    = getConid :: [(TokenType, String)] -> Maybe StringgetConid stream :: [(TokenType, String)]stream
identifier st :: STst t :: [(TokenType, String)]t@((Keyword,"class"):stream :: [(TokenType, String)]stream)   = getConid :: [(TokenType, String)] -> Maybe StringgetConid stream :: [(TokenType, String)]stream
identifier st :: STst t :: [(TokenType, String)]t@((Comment,_):(Space,"\n"):stream :: [(TokenType, String)]stream) = identifier :: ST -> [(TokenType, String)] -> Maybe Stringidentifier st :: STst stream :: [(TokenType, String)]stream
identifier st :: STst stream :: [(TokenType, String)]stream = Nothing :: Maybe aNothing

-- Is this really a type signature?  (no longer used)
typesig :: [(TokenType,String)] -> Bool
typesig :: [(TokenType, String)] -> Booltypesig ((Keyglyph,"::"):_)   = True :: BoolTrue
typesig ((Varid,_):stream :: [(TokenType, String)]stream)    = typesig :: [(TokenType, String)] -> Booltypesig stream :: [(TokenType, String)]stream
typesig ((Layout,"("):(Varop,_):(Layout,")"):stream :: [(TokenType, String)]stream)    = typesig :: [(TokenType, String)] -> Booltypesig stream :: [(TokenType, String)]stream
typesig ((Layout,","):stream :: [(TokenType, String)]stream) = typesig :: [(TokenType, String)] -> Booltypesig stream :: [(TokenType, String)]stream
typesig ((Space,_):stream :: [(TokenType, String)]stream)    = typesig :: [(TokenType, String)] -> Booltypesig stream :: [(TokenType, String)]stream
typesig ((Comment,_):stream :: [(TokenType, String)]stream)  = typesig :: [(TokenType, String)] -> Booltypesig stream :: [(TokenType, String)]stream
typesig _                     = False :: BoolFalse

-- throw away everything from opening paren to matching close
munchParens ::  [(TokenType, String)] -> [(TokenType, String)]
munchParens :: [(TokenType, String)] -> [(TokenType, String)]munchParens =  munch :: a -> [(TokenType, [Char])] -> [(TokenType, [Char])]munch (0::Int)	-- already seen open paren
  where munch ::
  Num a => a -> [(TokenType, [Char])] -> [(TokenType, [Char])]munch 0 ((Layout,")"):rest :: [(TokenType, [Char])]rest) = rest :: [(TokenType, [Char])]rest
        munch n :: an ((Layout,")"):rest :: [(TokenType, [Char])]rest) = munch :: a -> [(TokenType, [Char])] -> [(TokenType, [Char])]munch (n :: an(-) :: Num a => a -> a -> a-1) rest :: [(TokenType, [Char])]rest
        munch n :: an ((Layout,"("):rest :: [(TokenType, [Char])]rest) = munch :: a -> [(TokenType, [Char])] -> [(TokenType, [Char])]munch (n :: an(+) :: Num a => a -> a -> a+1) rest :: [(TokenType, [Char])]rest
        munch n :: an (_:rest :: [(TokenType, [Char])]rest)            = munch :: a -> [(TokenType, [Char])] -> [(TokenType, [Char])]munch n :: an rest :: [(TokenType, [Char])]rest
        munch _ []                  = [] :: [a][]	-- source is ill-formed

-- ensure anchor name is correct for a Varop
fix ::  String -> String
fix :: String -> Stringfix ('`':v :: [Char]v) = dropLast :: Eq a => a -> [a] -> [a]dropLast '`' v :: Stringv
fix v :: Stringv       = v :: Stringv

-- look past whitespace and comments to next "real" token
skip ::  [(TokenType, t)] -> [(TokenType, t)]
skip :: [(TokenType, t)] -> [(TokenType, t)]skip ((Space,_):stream :: [(TokenType, t)]stream)   = skip :: [(TokenType, t)] -> [(TokenType, t)]skip stream :: [(TokenType, String)]stream
skip ((Comment,_):stream :: [(TokenType, t)]stream) = skip :: [(TokenType, t)] -> [(TokenType, t)]skip stream :: [(TokenType, String)]stream
skip stream :: [(TokenType, t)]stream               = stream :: [(TokenType, String)]stream

-- skip possible context up to and including "=>", returning next Conid token
-- (this function is highly partial - relies on source being parse-correct)
getConid ::  [(TokenType, String)] -> Maybe String
getConid :: [(TokenType, String)] -> Maybe StringgetConid stream :: [(TokenType, String)]stream =
    case skip :: [(TokenType, t)] -> [(TokenType, t)]skip stream :: [(TokenType, String)]stream of
        ((Conid,c :: Stringc):rest :: [(TokenType, String)]rest) -> case context :: [(TokenType, String)] -> [(TokenType, String)]context rest :: [(TokenType, [Char])]rest of
                              ((Keyglyph,"="):_)     -> Just :: a -> Maybe aJust c :: Stringc
                              ((Keyglyph,"=>"):more :: [(TokenType, String)]more) ->
                                  case skip :: [(TokenType, t)] -> [(TokenType, t)]skip more :: [(TokenType, String)]more of
                                      ((Conid,c' :: Stringc'):_) -> Just :: a -> Maybe aJust c' :: Stringc'
                                      v :: [(TokenType, String)]v -> debug :: t -> t -> Maybe adebug v :: Stringv ("Conid "(++) :: [a] -> [a] -> [a]++c :: Stringc(++) :: [a] -> [a] -> [a]++" =>")
                              v :: [(TokenType, String)]v -> debug :: t -> t -> Maybe adebug v :: Stringv ("Conid "(++) :: [a] -> [a] -> [a]++c :: Stringc(++) :: [a] -> [a] -> [a]++" no = or =>")
        ((Layout,"("):rest :: [(TokenType, String)]rest) -> case context :: [(TokenType, String)] -> [(TokenType, String)]context rest :: [(TokenType, [Char])]rest of
                                   ((Keyglyph,"=>"):more :: [(TokenType, String)]more) ->
                                       case skip :: [(TokenType, t)] -> [(TokenType, t)]skip more :: [(TokenType, String)]more of
                                           ((Conid,c' :: Stringc'):_) -> Just :: a -> Maybe aJust c' :: Stringc'
                                           v :: [(TokenType, String)]v -> debug :: t -> t -> Maybe adebug v :: Stringv ("(...) =>")
                                   v :: [(TokenType, String)]v -> debug :: t -> t -> Maybe adebug v :: Stringv ("(...) no =>")
        v :: [(TokenType, String)]v -> debug :: t -> t -> Maybe adebug v :: Stringv ("no Conid or (...)")
    where debug :: t -> t1 -> Maybe adebug   _   _ = Nothing :: Maybe aNothing
       -- debug (s:t) c = error ("HsColour: getConid failed: "++show s
       --                       ++"\n  in the context of: "++c)

-- jump past possible class context
context ::  [(TokenType, String)] -> [(TokenType, String)]
context :: [(TokenType, String)] -> [(TokenType, String)]context stream :: [(TokenType, String)]stream@((Keyglyph,"="):_) = stream :: [(TokenType, String)]stream
context stream :: [(TokenType, String)]stream@((Keyglyph,"=>"):_) = stream :: [(TokenType, String)]stream
context (_:stream :: [(TokenType, String)]stream) = context :: [(TokenType, String)] -> [(TokenType, String)]context stream :: [(TokenType, String)]stream
context [] = [] :: [a][]

-- simple implementation of a string lookup table.
-- replace this with something more sophisticated if needed.
type ST = [String]

emptyST :: ST
emptyST :: STemptyST = [] :: [a][]

insertST :: String -> ST -> ST
insertST :: String -> ST -> STinsertST k :: Stringk st :: STst = insert :: Ord a => a -> [a] -> [a]insert k :: Stringk st :: STst

inST :: String -> ST -> Bool
inST :: String -> ST -> BoolinST k :: Stringk st :: STst = k :: Stringk elem :: Eq a => a -> [a] -> Bool`elem` st :: STst