我为INI文件编写了一个基本的解析器:
{-# LANGUAGE OverloadedStrings #-}
import qualified Data.Map as M
import Data.Maybe (fromMaybe)
import qualified Data.Text as T
type Ini = M.Map T.Text Section
data Section =
Section
{ name :: T.Text
, properties :: M.Map T.Text T.Text
}
deriving (Show)
main :: IO ()
main = parseIni iniFilePath >>= \ini -> putStrLn $ "Parsed INI: " ++ show ini
where
iniFilePath = "/home/me/test.ini"
parseIni :: FilePath -> IO Ini
parseIni iniFilePath = parseToIni . T.pack <$> readFile iniFilePath
parseToIni :: T.Text -> Ini
parseToIni stringToParse =
-- We return the parsed Ini, not the helper values
firstOfTriple $
foldr
(\line (ini, currentSectionMaybe, lineIndex) ->
-- We're at a new section start or the end of the file → add the previous section
-- to the parsed Ini value and create a new section
if isSectionHeader line || lineIndex >= length lines - 1
then let updatedIni = addSection ini currentSectionMaybe
in (updatedIni, Just $ Section (getSectionName line) M.empty, 1 + lineIndex)
else (ini, updateSection currentSectionMaybe line, 1 + lineIndex))
(M.empty, Nothing, 0) $
-- Since foldr is right associative we would process the lines starting with the last one, that's
-- why we reverse the list of lines
reverse lines
where
lines :: [T.Text]
lines = T.splitOn "\n" stringToParse
firstOfTriple :: (a, b, c) -> a
firstOfTriple (x, _, _) = x
parseProperty :: T.Text -> Maybe (T.Text, T.Text)
parseProperty line =
case T.splitOn "=" line of
[name, value] -> Just (T.strip name, T.strip value)
_ -> Nothing
updateSection :: Maybe Section -> T.Text -> Maybe Section
updateSection sectionMaybe line = fmap updateSection' sectionMaybe
where
updateSection' :: Section -> Section
updateSection' section =
-- Add the property to the section if the property can be parsed.
-- Otherwise, leave the section as it were
maybe
section
(\(propName, value) -> Section (name section) (M.insert propName value (properties section)))
(parseProperty line)
getSectionName :: T.Text -> T.Text
getSectionName line = fromMaybe line headerWithoutBracketsMaybe
where
headerWithoutBracketsMaybe = T.stripPrefix "[" line >>= T.stripSuffix "]"
isSectionHeader :: T.Text -> Bool
isSectionHeader line = T.isPrefixOf "[" strippedLine && T.isSuffixOf "]" strippedLine
where
strippedLine = T.strip line
addSection :: Ini -> Maybe Section -> Ini
addSection ini sectionMaybe = maybe ini (\section -> M.insert (name section) section ini) sectionMaybe我希望得到关于如何简化代码和/或提高代码可读性的反馈。
我现在意识到和接受的事情:
addSection可以被降低。发布于 2019-05-21 21:07:39
只是一些评论
parseToIni :: T.Text -> Ini表示从任意随机字符串中,parseToIni可以生成Ini。这让我想知道它将如何处理无效的.ini文件,或者例如字符串foo。updateSection :: Maybe Section -> T.Text -> Maybe Section中,Maybes混淆了函数应该做什么。如果第一个参数是Nothing,该函数可以生成Just吗?如果需要的话,最好去掉Maybes和fmap的全部内容。addSection是相似的。main只使用do-notation,它将更具可读性。parseToIni中,foldr的worker函数非常复杂,应该有一个类型注释。只需调用内部工作者函数f IMHO是可以的。Ini和Section's name字段中,节名都显示为键,这有点让人费解。我可能会删除name字段。addSection和updateSection中,参数的顺序有点非常规。对于部分应用程序来说,通常的a -> b -> b排序更好一些。maybe (以及类似于either的函数)并不能提高可读性。如果不想要变量名,请尝试LambdaCase扩展。Data.Text.IO.readFile。getSectionName和isSectionName不同,只有一个类型为Text -> Maybe SectionName的函数。总之,我认为您的代码是相当可读的。大多数情况下可能会更好一些。
发布于 2019-05-22 04:57:22
我知道您说过不用像parsec这样的解析器组合器库很好,但我想您可能想看看使用解析器的情况,所以我为您的数据类型编写了一个基于Attoparsec的解析器:
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
module IniParser where
import Control.Monad ( void )
import Data.Attoparsec.Text ( Parser
, char
, endOfInput
, endOfLine
, many'
, many1
, notInClass
, parseOnly
, satisfy
, space
)
import Data.Map.Strict ( Map )
import qualified Data.Map.Strict as Map
import Data.Text ( Text
, pack
)
import System.Environment ( getArgs )
type Ini = Map Text Section
data Section = Section
{ name :: Text
, properties :: Map Text Text
} deriving (Show)
main :: IO ()
main = do
[path] <- getArgs
parseIniFile path >>= \case
Right ini -> putStrLn $ "Parsed INI: " ++ show ini
Left err -> putStrLn $ "ERROR parsing ini: " ++ err
parseIniFile :: FilePath -> IO (Either String Ini)
parseIniFile iniFilePath = parseIni . pack <$> readFile iniFilePath
parseIni :: Text -> Either String Ini
parseIni = parseOnly ini
ini :: Parser Ini
ini = do
defaultSection <- lexeme (Section "" <$> (Map.fromList <$> many' property))
namedSections <- lexeme (many' section)
void $ endOfInput
let allSections | null (properties defaultSection) = namedSections
| otherwise = defaultSection:namedSections
pure . Map.fromList . map (\section -> (name section, section))
$ allSections
section :: Parser Section
section = Section <$> sectionName <*> (Map.fromList <$> many' (lexeme property))
sectionName :: Parser Text
sectionName = char '[' *> sectionNameChars <* char ']' <* endOfLine
sectionNameChars :: Parser Text
sectionNameChars = pack <$> many' (satisfy $ notInClass "]\r\n")
property :: Parser (Text, Text)
property = (,) <$> propertyName <*> (lexeme (char '=') *> propertyValue)
propertyName :: Parser Text
propertyName = pack <$> many' (satisfy $ notInClass "=\r\n\t ")
propertyValue :: Parser Text
propertyValue = pack <$> many' (satisfy $ notInClass "\r\n")
lexeme :: Parser a -> Parser a
lexeme p = whitespace *> p <* whitespace
whitespace :: Parser String
whitespace = many' space我认为这种方法的主要优点是不言而喻的。它消除了所有的多行lambdas,整个foldr,等等,这些(至少是IMHO)实际上掩盖了代码所表达的本质。
此外,我还将合格导入的使用限制在一种用法上,我认为这会使代码更加可读性,尽管您的品味可能会有所不同。
如果您感兴趣,可以看到整个基于堆栈的这里的项目。
https://codereview.stackexchange.com/questions/220460
复制相似问题