8000 Factor source code-related facilities into a new package by robrix · Pull Request #269 · github/semantic · GitHub
[go: up one dir, main page]

Skip to content
This repository was archived by the owner on Apr 1, 2025. It is now read-only.

Factor source code-related facilities into a new package #269

Merged
merged 49 commits into from
Sep 20, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
974e2ca
Define a semantic-source package.
robrix Sep 20, 2019
a66f459
Merge branch 'master' into semantic-source
robrix Sep 20, 2019
5802d46
Move the ToJSONFields instance for Range into Data.JSON.Fields.
robrix Sep 20, 2019
10e4bbb
Move the ToJSONFields instance for Span into Data.JSON.Fields.
robrix Sep 20, 2019
57ab2f6
Link the doctests against the lib.
robrix Sep 20, 2019
2c99f09
Link the doctests against QuickCheck.
robrix Sep 20, 2019
a126e39
Use the right dir for the doctests.
robrix Sep 20, 2019
4e40108
Copy Range in.
robrix Sep 20, 2019
325e1f1
Derive a Hashable instance for Range.
robrix Sep 20, 2019
ddef713
Copy Span in.
robrix Sep 20, 2019
81f43c9
Move the ToJSONFields instance for Location into Data.JSON.Fields.
robrix Sep 20, 2019
2748529
Copy Location in as Loc.
robrix Sep 20, 2019
cc82051
Depend on semantic-source.
robrix Sep 20, 2019
1d5e150
Switch everything over to using Source.Range.
robrix Sep 20, 2019
17c61c1
Switch everything over to using Source.Span.
robrix Sep 20, 2019
0f8e69c
Switch everything over to using Source.Loc.
robrix Sep 20, 2019
f6e4864
Move the span/range stuff into CMark.
robrix Sep 20, 2019
b20dcf4
Copy Source in.
robrix Sep 20, 2019
8aae312
Rename the Source symbols and recommend importing it qualified.
robrix Sep 20, 2019
ca6a785
Flip lineRangesWithin.
robrix Sep 20, 2019
d929a8c
Make Data.Source reexport Source.Source.
Sep 20, 2019
948deb4
Fixup remaining test cases.
Sep 20, 2019
7b599a6
Use Source.Source instead of Data.Source.
Sep 20, 2019
a422061
Delete Data.Source.
Sep 20, 2019
f17a2e8
Remove Data.Source from the .cabal file.
Sep 20, 2019
f0567fd
De-suffix dropSource and takeSource.
Sep 20, 2019
86682d8
De-suffix sourceBytes.
Sep 20, 2019
74693f4
Bring in the Source tests.
robrix Sep 20, 2019
2ce8b51
:fire: Data.Source.Spec.
robrix Sep 20, 2019
c86186a
:fire: a redundant import.
robrix Sep 20, 2019
a00a78e
Merge branch 'master' into semantic-source
robrix Sep 20, 2019
bb20471
Define lenses for the starts/ends of Range.
robrix Sep 20, 2019
64ef37e
Rename the line/column lenses to line_/column_.
robrix Sep 20, 2019
1e6ebd2
Rename posLine/posColumn to line/column.
robrix Sep 20, 2019
57c385d
Rename the HasSpan start/end lenses to start_/end_.
robrix Sep 20, 2019
d59a44b
Rename the HasSpan span lens to span_.
robrix Sep 20, 2019
7d1567e
:fire: a bunch of redundant hidden imports.
robrix Sep 20, 2019
0312300
Rename the spanStart/spanEnd fields to start/end.
robrix Sep 20, 2019
e08a495
Define a point fiunction for Range.
robrix Sep 20, 2019
935acb4
:memo: point.
robrix Sep 20, 2019
6356443
Define a point constructor for Span.
robrix Sep 20, 2019
e28e81b
:memo: point.
robrix Sep 20, 2019
9551742
Use point to define emptyTerm.
robrix Sep 20, 2019
52bc7e6
Rename locByteRange/locSpan to byteRange/span.
robrix Sep 20, 2019
4bc5491
Extract lens to the top level.
robrix Sep 20, 2019
909fa63
Define a byteRange_ lens for Loc.
robrix Sep 20, 2019
8df1345
Run semantic-source’s tests in CI.
robrix Sep 20, 2019
918bfb4
Apparently this should not exist.
robrix Sep 20, 2019
77ff50b
Run the doctests from the right place.
robrix Sep 20, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Copy Source in.
  • Loading branch information
robrix committed Sep 20, 2019
commit b20dcf4a19a5865fcf2831f14d619e8e6b947e08
3 changes: 3 additions & 0 deletions semantic-source/semantic-source.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,19 @@ library
exposed-modules:
Source.Loc
Source.Range
Source.Source
Source.Span
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IMO we should format .cabal files this way in general. Aligning crap to the right of labels is terrible.

-- other-modules:
-- other-extensions:
build-depends:
aeson ^>= 1.4.2.0
, base >= 4.12 && < 5
, bytestring ^>= 0.10.8.2
, deepseq ^>= 1.4.4.0
, generic-monoid ^>= 0.1.0.0
, hashable ^>= 1.2.7.0
, semilattices ^>= 0.0.0.3
, text ^>= 1.2.3.1
hs-source-dirs: src

test-suite doctest
Expand Down
133 changes: 133 additions & 0 deletions semantic-source/src/Source/Source.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
{-# LANGUAGE DeriveGeneric, GeneralizedNewtypeDeriving #-}
module Source.Source
( Source
, sourceBytes
, fromUTF8
-- * Measurement
, sourceLength
, nullSource
, totalRange
, totalSpan
-- * En/decoding
, fromText
, toText
-- * Slicing
, slice
, dropSource
, takeSource
-- * Splitting
, sourceLines
, sourceLineRanges
, sourceLineRangesWithin
, newlineIndices
) where

import Control.Arrow ((&&&))
import Data.Aeson (FromJSON (..), withText)
import qualified Data.ByteString as B
import Data.Char (ord)
import Data.Maybe (fromMaybe)
import Data.Monoid (Last(..))
import Data.Semilattice.Lower
import Data.String (IsString (..))
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import GHC.Generics (Generic)
import Source.Range
import Source.Span hiding (HasSpan (..))


-- | The contents of a source file. This is represented as a UTF-8
-- 'ByteString' under the hood. Construct these with 'fromUTF8'; obviously,
-- passing 'fromUTF8' non-UTF8 bytes will cause crashes.
newtype Source = Source { sourceBytes :: B.ByteString }
deriving (Eq, Semigroup, Monoid, IsString, Show, Generic)

fromUTF8 :: B.ByteString -> Source
fromUTF8 = Source

instance FromJSON Source where
parseJSON = withText "Source" (pure . fromText)


-- Measurement

sourceLength :: Source -> Int
sourceLength = B.length . sourceBytes

nullSource :: Source -> Bool
nullSource = B.null . sourceBytes

-- | Return a 'Range' that covers the entire text.
totalRange :: Source -> Range
totalRange = Range 0 . B.length . sourceBytes

-- | Return a 'Span' that covers the entire text.
totalSpan :: Source -> Span
totalSpan source = Span lowerBound (Pos (length ranges) (succ (end lastRange - start lastRange))) where
ranges = sourceLineRanges source
lastRange = fromMaybe lowerBound (getLast (foldMap (Last . Just) ranges))


-- En/decoding

-- | Return a 'Source' from a 'Text'.
fromText :: T.Text -> Source
fromText = Source . T.encodeUtf8

-- | Return the Text contained in the 'Source'.
toText :: Source -> T.Text
toText = T.decodeUtf8 . sourceBytes


-- Slicing

-- | Return a 'Source' that contains a slice of the given 'Source'.
slice :: Source -> Range -> Source
slice source range = take $ drop source where
drop = dropSource (start range)
take = takeSource (rangeLength range)

dropSource :: Int -> Source -> Source
dropSource i = Source . B.drop i . sourceBytes

takeSource :: Int -> Source -> Source
takeSource i = Source . B.take i . sourceBytes


-- Splitting

-- | Split the contents of the source after newlines.
sourceLines :: Source -> [Source]
sourceLines source = slice source <$> sourceLineRanges source

-- | Compute the 'Range's of each line in a 'Source'.
sourceLineRanges :: Source -> [Range]
sourceLineRanges source = sourceLineRangesWithin (totalRange source) source

-- | Compute the 'Range's of each line in a 'Range' of a 'Source'.
sourceLineRangesWithin :: Range -> Source -> [Range]
sourceLineRangesWithin range
= uncurry (zipWith Range)
. ((start range:) &&& (<> [ end range ]))
. fmap (+ succ (start range))
. newlineIndices
. sourceBytes
. flip slice range

-- | Return all indices of newlines ('\n', '\r', and '\r\n') in the 'ByteString'.
newlineIndices :: B.ByteString -> [Int]
newlineIndices = go 0 where
go n bs
| B.null bs = []
| otherwise = case (searchCR bs, searchLF bs) of
(Nothing, Nothing) -> []
(Just i, Nothing) -> recur n i bs
(Nothing, Just i) -> recur n i bs
(Just crI, Just lfI)
| succ crI == lfI -> recur n lfI bs
| otherwise -> recur n (min crI lfI) bs
recur n i bs = let j = n + i in j : go (succ j) (B.drop (succ i) bs)
searchLF = B.elemIndex (toEnum (ord '\n'))
searchCR = B.elemIndex (toEnum (ord '\r'))
{-# INLINE newlineIndices #-}
0