{-# LANGUAGE DefaultSignatures #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections     #-}
{-# LANGUAGE TypeApplications  #-}

{- | A typeclass which provides a `parseConstant` method to convert Text strings
into objects of the appropriate type. This allows one to define parsers for
literal constants belonging to extensible built-in types without having to
modify the main Plutus Core parser. We expect parseConstant . prettyConst to be
the identity function. -}

module PlutusCore.Parsable
    (
     Parsable(..)
    )
where

import PlutusPrelude

import PlutusCore.Data

import Data.Bits (shiftL, (.|.))
import Data.ByteString (ByteString)
import Data.ByteString qualified as BS (pack)
import Data.Char (ord)
import Data.Text qualified as T
import Text.Read

{- Note [Parsing horribly broken]
As the title suggests, at the moment parsing is horribly broken. 'parse' expects a closed chunk of
text, but in order to provide one we need to determine in the main parsing pipeline (which can be
Happy-based or megaparsec-based) where that closed chunk ends (determining where it starts is easy).
So we need either of these two:

1. perform lexical analysis, cut the right piece and feed it to 'parse'
2. make 'parse' take as much as it needs and return the rest to the main parsing machinery

The latter option is quite non-trivial, because we have that Happy parser and it's hard to poke a
hole in it to get some custom parsing of constants consuming an arbitrary amount of symbols
(we don't even have symbols in Happy -- it's tokens there). So if we wanted to do the latter,
it would probably be the easiest option to just remove the Happy parser and replace it with a
megaparsec-based one (should be the simplest thing in the world, given that PIR is a superset of
PLC and already has a megaparsec-based parser).

There are arguments in favor of the former option (https://github.com/input-output-hk/plutus/pull/2458#discussion_r522091227):

> If you look in Lexer.x there's extensive commentary about how it parses constants. We could steal
the relevant regular expressions from there or reuse the entire lexer. I'm not convinced that
letting types do their own lexical analysis is a good idea: doing these things correctly can be
quite tricky and if you got it wrong it might mess up the main parser in some subtle way that
doesn't show up until you encounter an unexpected situation. The way that it's done at the moment
provides reasonably general syntax for constants and I'm fairly confident it's correct. I guess I
feel that this is low-level stuff that should be done once and then shut away in a box where it
won't cause any trouble.

however it's not that simple. Consider a list of lists: let's say the lexer determines what
constitutes a closed chunk of text representing such a value. And we feed that chunk to 'parse'.
But now we need to do lexical analysis again, but this time in 'parse' to determine whether a @,@
is a part of formatting or is inside an element (being, say, a string or a list of strings) of the
list. I.e. our constant parsers can be recursive and it's a pain to deal with when you do all the
lexical analysis upfront.

So at the moment we don't do anything correctly. Neither PLC nor PIR can handle lists of lists
and none of them can handle tuples at all ('Read' provides us with overloaded parsing for lists
but not tuples, hence the difference) and PIR, doing its own broken lexical analysis, fails on
things like @(con string "yes (no)")@.

This mess needs to be fixed at some point. It seems that dumping Happy and using megaparsec
eveywhere and making 'Parsable' megaparsec-based would be the simplest option and we don't
really care about the efficiency of the parser.
-}

parseDefault :: Read a => T.Text -> Maybe a
parseDefault :: Text -> Maybe a
parseDefault = String -> Maybe a
forall a. Read a => String -> Maybe a
readMaybe (String -> Maybe a) -> (Text -> String) -> Text -> Maybe a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> String
T.unpack

-- | A class for things that are parsable. Those include tags for built-in types and constants of
-- such types.
class Parsable a where
    -- | Return Nothing if the string is invalid, otherwise the corresponding value of type a.
    parse :: T.Text -> Maybe a

    -- | Overloading parsing for lists to special-case 'String' (the GHC's billion dollar mistake).
    parseList :: T.Text -> Maybe [a]
    parseList Text
text =
        String -> Maybe [a]
forall a. HasCallStack => String -> a
error (String -> Maybe [a]) -> String -> Maybe [a]
forall a b. (a -> b) -> a -> b
$ String
"Parsing of lists of this type is not implemented. Caused by: " String -> String -> String
forall a. [a] -> [a] -> [a]
++
            Text -> String
T.unpack Text
text

newtype AsRead a = AsRead a
instance Read a => Parsable (AsRead a) where
    parse :: Text -> Maybe (AsRead a)
parse     = (Text -> Maybe a) -> Text -> Maybe (AsRead a)
coerce ((Text -> Maybe a) -> Text -> Maybe (AsRead a))
-> (Text -> Maybe a) -> Text -> Maybe (AsRead a)
forall a b. (a -> b) -> a -> b
$ Read a => Text -> Maybe a
forall a. Read a => Text -> Maybe a
parseDefault @a
    parseList :: Text -> Maybe [AsRead a]
parseList = (Text -> Maybe [a]) -> Text -> Maybe [AsRead a]
coerce ((Text -> Maybe [a]) -> Text -> Maybe [AsRead a])
-> (Text -> Maybe [a]) -> Text -> Maybe [AsRead a]
forall a b. (a -> b) -> a -> b
$ Read [a] => Text -> Maybe [a]
forall a. Read a => Text -> Maybe a
parseDefault @[a]

instance Parsable a => Parsable [a] where
    parse :: Text -> Maybe [a]
parse = Text -> Maybe [a]
forall a. Parsable a => Text -> Maybe [a]
parseList

-- >>> :set -XOverloadedStrings
-- >>> parse "[False, True]" :: Maybe [Bool]
-- Just [False,True]
-- >>> parse "\"abc\"" :: Maybe String
-- Just "abc"
-- >>> parse "[\"abc\"]" :: Maybe [String]
-- *** Exception: Parsing of lists of this type is not implemented. Caused by: ["abc"]
-- >>> parse "(1, False)" :: Maybe (Integer, Bool)
-- *** Exception: Parsing for tuples is not implemented
deriving via AsRead Bool    instance Parsable Bool
deriving via AsRead Char    instance Parsable Char
deriving via AsRead Integer instance Parsable Integer
deriving via AsRead ()      instance Parsable ()
deriving via AsRead T.Text instance Parsable T.Text

instance Parsable (a, b) where
    parse :: Text -> Maybe (a, b)
parse = String -> Text -> Maybe (a, b)
forall a. HasCallStack => String -> a
error String
"Parsing for tuples is not implemented"

instance Parsable ByteString where
    parse :: Text -> Maybe ByteString
parse = Text -> Maybe ByteString
parseByteStringConstant

instance Parsable Data where
    parse :: Text -> Maybe Data
parse = String -> Text -> Maybe Data
forall a. HasCallStack => String -> a
error String
"Implement me"


--- Parsing bytestrings ---

-- A literal bytestring consists of the '#' character followed immediately by
-- an even number of upper- or lower-case hex digits.
parseByteStringConstant :: T.Text -> Maybe ByteString
parseByteStringConstant :: Text -> Maybe ByteString
parseByteStringConstant Text
lit = do
    case Text -> String
T.unpack Text
lit of
        Char
'#':String
body -> String -> Maybe ByteString
asBSLiteral String
body
        String
_        -> Maybe ByteString
forall a. Maybe a
Nothing

-- | Convert a list to a list of pairs, failing if the input list has an odd number of elements
pairs :: [a] -> Maybe [(a,a)]
pairs :: [a] -> Maybe [(a, a)]
pairs []         = [(a, a)] -> Maybe [(a, a)]
forall a. a -> Maybe a
Just []
pairs (a
a:a
b:[a]
rest) = ((a
a,a
b)(a, a) -> [(a, a)] -> [(a, a)]
forall a. a -> [a] -> [a]
:) ([(a, a)] -> [(a, a)]) -> Maybe [(a, a)] -> Maybe [(a, a)]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [a] -> Maybe [(a, a)]
forall a. [a] -> Maybe [(a, a)]
pairs [a]
rest
pairs [a]
_          = Maybe [(a, a)]
forall a. Maybe a
Nothing

hexDigitToWord8 :: Char -> Maybe Word8
hexDigitToWord8 :: Char -> Maybe Word8
hexDigitToWord8 Char
c =
    let x :: Word8
x = Char -> Word8
ord8 Char
c
    in    if Char
'0' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'9'  then  Word8 -> Maybe Word8
forall a. a -> Maybe a
Just (Word8 -> Maybe Word8) -> Word8 -> Maybe Word8
forall a b. (a -> b) -> a -> b
$ Word8
x Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0'
    else  if Char
'a' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'f'  then  Word8 -> Maybe Word8
forall a. a -> Maybe a
Just (Word8 -> Maybe Word8) -> Word8 -> Maybe Word8
forall a b. (a -> b) -> a -> b
$ Word8
x Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'a' Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
10
    else  if Char
'A' Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'F'  then  Word8 -> Maybe Word8
forall a. a -> Maybe a
Just (Word8 -> Maybe Word8) -> Word8 -> Maybe Word8
forall a b. (a -> b) -> a -> b
$ Word8
x Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'A' Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
10
    else  Maybe Word8
forall a. Maybe a
Nothing

    where ord8 :: Char -> Word8
          ord8 :: Char -> Word8
ord8 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
Data.Char.ord

-- | Convert a String into a ByteString, failing if the string has odd length
-- or any of its characters are not hex digits
asBSLiteral :: String -> Maybe ByteString
asBSLiteral :: String -> Maybe ByteString
asBSLiteral String
s =
    (Char -> Maybe Word8) -> String -> Maybe [Word8]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM Char -> Maybe Word8
hexDigitToWord8 String
s Maybe [Word8]
-> ([Word8] -> Maybe [(Word8, Word8)]) -> Maybe [(Word8, Word8)]
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= [Word8] -> Maybe [(Word8, Word8)]
forall a. [a] -> Maybe [(a, a)]
pairs      -- convert s into a list of pairs of Word8 values in [0..0xF]
    Maybe [(Word8, Word8)]
-> ([(Word8, Word8)] -> [Word8]) -> Maybe [Word8]
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> ((Word8, Word8) -> Word8) -> [(Word8, Word8)] -> [Word8]
forall a b. (a -> b) -> [a] -> [b]
map (\(Word8
a,Word8
b) -> Word8 -> Int -> Word8
forall a. Bits a => a -> Int -> a
shiftL Word8
a Int
4 Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.|. Word8
b)  -- convert pairs of values in [0..0xF] to values in [0..xFF]
    Maybe [Word8] -> ([Word8] -> ByteString) -> Maybe ByteString
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> [Word8] -> ByteString
BS.pack