From e7deae4242eb030a19b91130b522b306359b1c37 Mon Sep 17 00:00:00 2001 From: justinwoo Date: Sun, 10 Jun 2018 09:21:58 +0200 Subject: [PATCH 1/2] get rid of usages of codepoints functions that are slow --- src/Text/Parsing/StringParser/String.purs | 13 +++++++------ test/Main.purs | 6 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Text/Parsing/StringParser/String.purs b/src/Text/Parsing/StringParser/String.purs index 1f0b2b9..b1896ab 100644 --- a/src/Text/Parsing/StringParser/String.purs +++ b/src/Text/Parsing/StringParser/String.purs @@ -27,8 +27,9 @@ import Data.Char (toCharCode) import Data.Either (Either(..)) import Data.Foldable (class Foldable, foldMap, elem, notElem) import Data.Maybe (Maybe(..)) -import Data.String (Pattern(..), drop, length, indexOf', stripPrefix) import Data.String.CodeUnits (charAt, singleton) +import Data.String.CodeUnits as SCU +import Data.String.Pattern (Pattern(..)) import Data.String.Regex as Regex import Data.String.Regex.Flags (noFlags) import Text.Parsing.StringParser (Parser(..), ParseError(..), try, fail) @@ -38,7 +39,7 @@ import Text.Parsing.StringParser.Combinators (many, ()) eof :: Parser Unit eof = Parser \s -> case s of - { str, pos } | pos < length str -> Left { pos, error: ParseError "Expected EOF" } + { str, pos } | pos < SCU.length str -> Left { pos, error: ParseError "Expected EOF" } _ -> Right { result: unit, suffix: s } -- | Match any character. @@ -60,7 +61,7 @@ anyDigit = try do string :: String -> Parser String string nt = Parser \s -> case s of - { str, pos } | indexOf' (Pattern nt) pos str == Just pos -> Right { result: nt, suffix: { str, pos: pos + length nt } } + { str, pos } | SCU.indexOf' (Pattern nt) pos str == Just pos -> Right { result: nt, suffix: { str, pos: pos + SCU.length nt } } { pos } -> Left { pos, error: ParseError ("Expected '" <> nt <> "'.") } -- | Match a character satisfying the given predicate. @@ -128,7 +129,7 @@ regex pat = where -- ensure the pattern only matches the current position in the parse pattern = - case stripPrefix (Pattern "^") pat of + case SCU.stripPrefix (Pattern "^") pat of Nothing -> "^" <> pat _ -> @@ -137,10 +138,10 @@ regex pat = matchRegex r = Parser \{ str, pos } -> let - remainder = drop pos str + remainder = SCU.drop pos str in case NEA.head <$> Regex.match r remainder of Just (Just matched) -> - Right { result: matched, suffix: { str, pos: pos + length matched } } + Right { result: matched, suffix: { str, pos: pos + SCU.length matched } } _ -> Left { pos, error: ParseError "no match" } diff --git a/test/Main.purs b/test/Main.purs index 94d4c5a..685b3fb 100644 --- a/test/Main.purs +++ b/test/Main.purs @@ -9,8 +9,8 @@ import Data.List (List(Nil), (:)) import Data.List.Lazy (take, repeat) import Data.List.NonEmpty (NonEmptyList(..)) import Data.NonEmpty ((:|)) -import Data.String (joinWith) import Data.String.CodeUnits (singleton) +import Data.String.Common as SC import Data.Unfoldable (replicate) import Effect (Effect) import Test.Assert (assert', assert) @@ -66,11 +66,11 @@ expectResult res p input = runParser p input == Right res main :: Effect Unit main = do - assert' "many should not blow the stack" $ canParse (many (string "a")) (joinWith "" $ replicate 100000 "a") + assert' "many should not blow the stack" $ canParse (many (string "a")) (SC.joinWith "" $ replicate 100000 "a") assert' "many failing after" $ parseFail (do as <- many (string "a") eof - pure as) (joinWith "" (replicate 100000 "a") <> "b" ) + pure as) (SC.joinWith "" (replicate 100000 "a") <> "b" ) assert $ expectResult 3 nested "(((a)))" assert $ expectResult ("a":"a":"a":Nil) (many (string "a")) "aaa" From 0fc6730c7096ac9ce3962299bf4179df27ca460d Mon Sep 17 00:00:00 2001 From: justinwoo Date: Wed, 4 Jul 2018 14:39:56 +0300 Subject: [PATCH 2/2] update README for code units message --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 61c6faa..54f1a13 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Dependency status](https://img.shields.io/librariesio/github/purescript-contrib/purescript-string-parsers.svg)](https://libraries.io/github/purescript-contrib/purescript-string-parsers) [![Maintainer: paf31](https://img.shields.io/badge/maintainer-paf31-lightgrey.svg)](http://github.com/paf31) -A parsing library for parsing strings. +A parsing library for parsing strings using [Code Units](https://pursuit.purescript.org/packages/purescript-strings/docs/Data.String.CodeUnits) (JS Strings). Does not handle [Code Points](https://pursuit.purescript.org/packages/purescript-strings/docs/Data.String.CodePoints). This library is a simpler, faster alternative to `purescript-parsing`, for when you know your input will be a string.