{-# LANGUAGE DefaultSignatures #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TupleSections #-} {-# LANGUAGE TypeApplications #-} {- | A typeclass which provides a `parseConstant` method to convert Text strings into objects of the appropriate type. This allows one to define parsers for literal constants belonging to extensible built-in types without having to modify the main Plutus Core parser. We expect parseConstant . prettyConst to be the identity function. -} module PlutusCore.Parsable ( Parsable(..) ) where import PlutusPrelude import PlutusCore.Data import Data.Bits (shiftL, (.|.)) import Data.ByteString (ByteString) import Data.ByteString qualified as BS (pack) import Data.Char (ord) import Data.Text qualified as T import Text.Read {- Note [Parsing horribly broken] As the title suggests, at the moment parsing is horribly broken. 'parse' expects a closed chunk of text, but in order to provide one we need to determine in the main parsing pipeline (which can be Happy-based or megaparsec-based) where that closed chunk ends (determining where it starts is easy). So we need either of these two: 1. perform lexical analysis, cut the right piece and feed it to 'parse' 2. make 'parse' take as much as it needs and return the rest to the main parsing machinery The latter option is quite non-trivial, because we have that Happy parser and it's hard to poke a hole in it to get some custom parsing of constants consuming an arbitrary amount of symbols (we don't even have symbols in Happy -- it's tokens there). So if we wanted to do the latter, it would probably be the easiest option to just remove the Happy parser and replace it with a megaparsec-based one (should be the simplest thing in the world, given that PIR is a superset of PLC and already has a megaparsec-based parser). There are arguments in favor of the former option (https://github.com/input-output-hk/plutus/pull/2458#discussion_r522091227): > If you look in Lexer.x there's extensive commentary about how it parses constants. We could steal the relevant regular expressions from there or reuse the entire lexer. I'm not convinced that letting types do their own lexical analysis is a good idea: doing these things correctly can be quite tricky and if you got it wrong it might mess up the main parser in some subtle way that doesn't show up until you encounter an unexpected situation. The way that it's done at the moment provides reasonably general syntax for constants and I'm fairly confident it's correct. I guess I feel that this is low-level stuff that should be done once and then shut away in a box where it won't cause any trouble. however it's not that simple. Consider a list of lists: let's say the lexer determines what constitutes a closed chunk of text representing such a value. And we feed that chunk to 'parse'. But now we need to do lexical analysis again, but this time in 'parse' to determine whether a @,@ is a part of formatting or is inside an element (being, say, a string or a list of strings) of the list. I.e. our constant parsers can be recursive and it's a pain to deal with when you do all the lexical analysis upfront. So at the moment we don't do anything correctly. Neither PLC nor PIR can handle lists of lists and none of them can handle tuples at all ('Read' provides us with overloaded parsing for lists but not tuples, hence the difference) and PIR, doing its own broken lexical analysis, fails on things like @(con string "yes (no)")@. This mess needs to be fixed at some point. It seems that dumping Happy and using megaparsec eveywhere and making 'Parsable' megaparsec-based would be the simplest option and we don't really care about the efficiency of the parser. -} parseDefault :: Read a => T.Text -> Maybe a parseDefault :: Text -> Maybe a parseDefault = String -> Maybe a forall a. Read a => String -> Maybe a readMaybe (String -> Maybe a) -> (Text -> String) -> Text -> Maybe a forall b c a. (b -> c) -> (a -> b) -> a -> c . Text -> String T.unpack -- | A class for things that are parsable. Those include tags for built-in types and constants of -- such types. class Parsable a where -- | Return Nothing if the string is invalid, otherwise the corresponding value of type a. parse :: T.Text -> Maybe a -- | Overloading parsing for lists to special-case 'String' (the GHC's billion dollar mistake). parseList :: T.Text -> Maybe [a] parseList Text text = String -> Maybe [a] forall a. HasCallStack => String -> a error (String -> Maybe [a]) -> String -> Maybe [a] forall a b. (a -> b) -> a -> b $ String "Parsing of lists of this type is not implemented. Caused by: " String -> String -> String forall a. [a] -> [a] -> [a] ++ Text -> String T.unpack Text text newtype AsRead a = AsRead a instance Read a => Parsable (AsRead a) where parse :: Text -> Maybe (AsRead a) parse = (Text -> Maybe a) -> Text -> Maybe (AsRead a) coerce ((Text -> Maybe a) -> Text -> Maybe (AsRead a)) -> (Text -> Maybe a) -> Text -> Maybe (AsRead a) forall a b. (a -> b) -> a -> b $ Read a => Text -> Maybe a forall a. Read a => Text -> Maybe a parseDefault @a parseList :: Text -> Maybe [AsRead a] parseList = (Text -> Maybe [a]) -> Text -> Maybe [AsRead a] coerce ((Text -> Maybe [a]) -> Text -> Maybe [AsRead a]) -> (Text -> Maybe [a]) -> Text -> Maybe [AsRead a] forall a b. (a -> b) -> a -> b $ Read [a] => Text -> Maybe [a] forall a. Read a => Text -> Maybe a parseDefault @[a] instance Parsable a => Parsable [a] where parse :: Text -> Maybe [a] parse = Text -> Maybe [a] forall a. Parsable a => Text -> Maybe [a] parseList -- >>> :set -XOverloadedStrings -- >>> parse "[False, True]" :: Maybe [Bool] -- Just [False,True] -- >>> parse "\"abc\"" :: Maybe String -- Just "abc" -- >>> parse "[\"abc\"]" :: Maybe [String] -- *** Exception: Parsing of lists of this type is not implemented. Caused by: ["abc"] -- >>> parse "(1, False)" :: Maybe (Integer, Bool) -- *** Exception: Parsing for tuples is not implemented deriving via AsRead Bool instance Parsable Bool deriving via AsRead Char instance Parsable Char deriving via AsRead Integer instance Parsable Integer deriving via AsRead () instance Parsable () deriving via AsRead T.Text instance Parsable T.Text instance Parsable (a, b) where parse :: Text -> Maybe (a, b) parse = String -> Text -> Maybe (a, b) forall a. HasCallStack => String -> a error String "Parsing for tuples is not implemented" instance Parsable ByteString where parse :: Text -> Maybe ByteString parse = Text -> Maybe ByteString parseByteStringConstant instance Parsable Data where parse :: Text -> Maybe Data parse = String -> Text -> Maybe Data forall a. HasCallStack => String -> a error String "Implement me" --- Parsing bytestrings --- -- A literal bytestring consists of the '#' character followed immediately by -- an even number of upper- or lower-case hex digits. parseByteStringConstant :: T.Text -> Maybe ByteString parseByteStringConstant :: Text -> Maybe ByteString parseByteStringConstant Text lit = do case Text -> String T.unpack Text lit of Char '#':String body -> String -> Maybe ByteString asBSLiteral String body String _ -> Maybe ByteString forall a. Maybe a Nothing -- | Convert a list to a list of pairs, failing if the input list has an odd number of elements pairs :: [a] -> Maybe [(a,a)] pairs :: [a] -> Maybe [(a, a)] pairs [] = [(a, a)] -> Maybe [(a, a)] forall a. a -> Maybe a Just [] pairs (a a:a b:[a] rest) = ((a a,a b)(a, a) -> [(a, a)] -> [(a, a)] forall a. a -> [a] -> [a] :) ([(a, a)] -> [(a, a)]) -> Maybe [(a, a)] -> Maybe [(a, a)] forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b <$> [a] -> Maybe [(a, a)] forall a. [a] -> Maybe [(a, a)] pairs [a] rest pairs [a] _ = Maybe [(a, a)] forall a. Maybe a Nothing hexDigitToWord8 :: Char -> Maybe Word8 hexDigitToWord8 :: Char -> Maybe Word8 hexDigitToWord8 Char c = let x :: Word8 x = Char -> Word8 ord8 Char c in if Char '0' Char -> Char -> Bool forall a. Ord a => a -> a -> Bool <= Char c Bool -> Bool -> Bool && Char c Char -> Char -> Bool forall a. Ord a => a -> a -> Bool <= Char '9' then Word8 -> Maybe Word8 forall a. a -> Maybe a Just (Word8 -> Maybe Word8) -> Word8 -> Maybe Word8 forall a b. (a -> b) -> a -> b $ Word8 x Word8 -> Word8 -> Word8 forall a. Num a => a -> a -> a - Char -> Word8 ord8 Char '0' else if Char 'a' Char -> Char -> Bool forall a. Ord a => a -> a -> Bool <= Char c Bool -> Bool -> Bool && Char c Char -> Char -> Bool forall a. Ord a => a -> a -> Bool <= Char 'f' then Word8 -> Maybe Word8 forall a. a -> Maybe a Just (Word8 -> Maybe Word8) -> Word8 -> Maybe Word8 forall a b. (a -> b) -> a -> b $ Word8 x Word8 -> Word8 -> Word8 forall a. Num a => a -> a -> a - Char -> Word8 ord8 Char 'a' Word8 -> Word8 -> Word8 forall a. Num a => a -> a -> a + Word8 10 else if Char 'A' Char -> Char -> Bool forall a. Ord a => a -> a -> Bool <= Char c Bool -> Bool -> Bool && Char c Char -> Char -> Bool forall a. Ord a => a -> a -> Bool <= Char 'F' then Word8 -> Maybe Word8 forall a. a -> Maybe a Just (Word8 -> Maybe Word8) -> Word8 -> Maybe Word8 forall a b. (a -> b) -> a -> b $ Word8 x Word8 -> Word8 -> Word8 forall a. Num a => a -> a -> a - Char -> Word8 ord8 Char 'A' Word8 -> Word8 -> Word8 forall a. Num a => a -> a -> a + Word8 10 else Maybe Word8 forall a. Maybe a Nothing where ord8 :: Char -> Word8 ord8 :: Char -> Word8 ord8 = Int -> Word8 forall a b. (Integral a, Num b) => a -> b fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8 forall b c a. (b -> c) -> (a -> b) -> a -> c . Char -> Int Data.Char.ord -- | Convert a String into a ByteString, failing if the string has odd length -- or any of its characters are not hex digits asBSLiteral :: String -> Maybe ByteString asBSLiteral :: String -> Maybe ByteString asBSLiteral String s = (Char -> Maybe Word8) -> String -> Maybe [Word8] forall (t :: * -> *) (m :: * -> *) a b. (Traversable t, Monad m) => (a -> m b) -> t a -> m (t b) mapM Char -> Maybe Word8 hexDigitToWord8 String s Maybe [Word8] -> ([Word8] -> Maybe [(Word8, Word8)]) -> Maybe [(Word8, Word8)] forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b >>= [Word8] -> Maybe [(Word8, Word8)] forall a. [a] -> Maybe [(a, a)] pairs -- convert s into a list of pairs of Word8 values in [0..0xF] Maybe [(Word8, Word8)] -> ([(Word8, Word8)] -> [Word8]) -> Maybe [Word8] forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b <&> ((Word8, Word8) -> Word8) -> [(Word8, Word8)] -> [Word8] forall a b. (a -> b) -> [a] -> [b] map (\(Word8 a,Word8 b) -> Word8 -> Int -> Word8 forall a. Bits a => a -> Int -> a shiftL Word8 a Int 4 Word8 -> Word8 -> Word8 forall a. Bits a => a -> a -> a .|. Word8 b) -- convert pairs of values in [0..0xF] to values in [0..xFF] Maybe [Word8] -> ([Word8] -> ByteString) -> Maybe ByteString forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b <&> [Word8] -> ByteString BS.pack