csv-conduit-0.7.3.0/src/0000755000000000000000000000000013550145407013076 5ustar0000000000000000csv-conduit-0.7.3.0/src/Data/0000755000000000000000000000000013550145407013747 5ustar0000000000000000csv-conduit-0.7.3.0/src/Data/CSV/0000755000000000000000000000000014071111440014367 5ustar0000000000000000csv-conduit-0.7.3.0/src/Data/CSV/Conduit/0000755000000000000000000000000014071130675016007 5ustar0000000000000000csv-conduit-0.7.3.0/src/Data/CSV/Conduit/Conversion/0000755000000000000000000000000013550145407020134 5ustar0000000000000000csv-conduit-0.7.3.0/src/Data/CSV/Conduit/Parser/0000755000000000000000000000000013550145407017243 5ustar0000000000000000csv-conduit-0.7.3.0/test/0000755000000000000000000000000014071131317013260 5ustar0000000000000000csv-conduit-0.7.3.0/src/Data/CSV/Conduit.hs0000644000000000000000000004372014071111440016336 0ustar0000000000000000{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TypeSynonymInstances #-} {-# LANGUAGE UndecidableInstances #-} module Data.CSV.Conduit ( -- * Main Interface decodeCSV , readCSVFile , writeCSVFile , transformCSV , transformCSV' , mapCSVFile , writeHeaders , writeHeadersOrdered -- Types , CSV (..) , CSVSettings (..) , defCSVSettings , MapRow , OrderedMapRow , Row -- * Re-exported For Convenience , runResourceT ) where ------------------------------------------------------------------------------- import Control.Exception import Control.Monad.Catch.Pure (CatchT) import Control.Monad.Catch.Pure (runCatchT) import Control.Monad.Except import Control.Monad.Primitive import Control.Monad.ST import Control.Monad.Trans.Resource (MonadResource, MonadThrow, runResourceT) import Data.Attoparsec.Types (Parser) import qualified Data.ByteString as B import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B8 import Data.ByteString.Internal (c2w) import Data.Conduit import Data.Conduit.Attoparsec import Data.Conduit.Binary (sinkFile, sinkIOHandle, sourceFile) import qualified Data.Conduit.List as C import qualified Data.Map as M import qualified Data.Map.Ordered as MO import Data.String import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Vector as V import qualified Data.Vector.Generic as GV import qualified Data.Vector.Generic.Mutable as GMV import Data.Void as Void import System.IO ------------------------------------------------------------------------------- import Data.CSV.Conduit.Conversion (FromNamedRecord (..), FromNamedRecordOrdered (..), Named (..), NamedOrdered (..), ToNamedRecord (..), ToNamedRecordOrdered (..), runParser) import qualified Data.CSV.Conduit.Parser.ByteString as BSP import qualified Data.CSV.Conduit.Parser.Text as TP import Data.CSV.Conduit.Types ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- -- | Represents types 'r' that are CSV-like and can be converted -- to/from an underlying stream of type 's'. There is nothing scary -- about the type: -- -- @s@ represents stream types that can be converted to\/from CSV rows. -- Examples are 'ByteString', 'Text' and 'String'. -- -- @r@ represents the target CSV row representations that this library -- can work with. Examples are the 'Row' types, the 'Record' type and -- the 'MapRow' family of types. We can also convert directly to -- complex Haskell types using the 'Data.CSV.Conduit.Conversion' -- module that was borrowed from the cassava package, which was itself -- inspired by the aeson package. -- -- -- Example #1: Basics Using Convenience API -- -- >import Data.Conduit -- >import Data.Conduit.Binary -- >import Data.Conduit.List as CL -- >import Data.CSV.Conduit -- > -- >myProcessor :: Conduit (Row Text) m (Row Text) -- >myProcessor = CL.map reverse -- > -- >test = runResourceT $ -- > transformCSV defCSVSettings -- > (sourceFile "input.csv") -- > myProcessor -- > (sinkFile "output.csv") -- -- -- Example #2: Basics Using Conduit API -- -- >import Data.Conduit -- >import Data.Conduit.Binary -- >import Data.CSV.Conduit -- > -- >myProcessor :: Conduit (MapRow Text) m (MapRow Text) -- >myProcessor = undefined -- > -- >test = runResourceT $ runConduit $ -- > sourceFile "test/BigFile.csv" .| -- > intoCSV defCSVSettings .| -- > myProcessor .| -- > (writeHeaders defCSVSettings >> fromCSV defCSVSettings) .| -- > sinkFile "test/BigFileOut.csv" class CSV s r where ----------------------------------------------------------------------------- -- | Convert a CSV row into strict ByteString equivalent. rowToStr :: CSVSettings -> r -> s ----------------------------------------------------------------------------- -- | Turn a stream of 's' into a stream of CSV row type. An example -- would be parsing a ByteString stream as rows of 'MapRow' 'Text'. intoCSV :: (MonadThrow m) => CSVSettings -> ConduitM s r m () ----------------------------------------------------------------------------- -- | Turn a stream of CSV row type back into a stream of 's'. An -- example would be rendering a stream of 'Row' 'ByteString' rows as -- 'Text'. fromCSV :: Monad m => CSVSettings -> ConduitM r s m () ------------------------------------------------------------------------------ -- | 'Row' instance using 'ByteString' instance CSV ByteString (Row ByteString) where rowToStr s !r = let sep = B.pack [c2w (csvSep s)] wrapField !f = case csvQuoteChar s of Just !x-> (x `B8.cons` escape x f) `B8.snoc` x _ -> f escape c str = B8.intercalate (B8.pack [c,c]) $ B8.split c str in B.intercalate sep . map wrapField $ r intoCSV set = intoCSVRow (BSP.row set) fromCSV set = fromCSVRow set ------------------------------------------------------------------------------ -- | 'Row' instance using 'Text' instance CSV Text (Row Text) where rowToStr s !r = let sep = T.pack [csvSep s] wrapField !f = case csvQuoteChar s of Just !x-> x `T.cons` escape x f `T.snoc` x _ -> f escape c str = T.intercalate (T.pack [c,c]) $ T.split (== c) str in T.intercalate sep . map wrapField $ r intoCSV set = intoCSVRow (TP.row set) fromCSV set = fromCSVRow set ------------------------------------------------------------------------------- -- | 'Row' instance using 'Text' based on 'ByteString' stream instance CSV ByteString (Row Text) where rowToStr s r = T.encodeUtf8 $ rowToStr s r intoCSV set = intoCSV set .| C.map (map T.decodeUtf8) fromCSV set = fromCSV set .| C.map T.encodeUtf8 ------------------------------------------------------------------------------- -- | 'Row' instance using 'String' based on 'ByteString' stream. -- Please note this uses the ByteString operations underneath and has -- lots of unnecessary overhead. Included for convenience. instance CSV ByteString (Row String) where rowToStr s r = rowToStr s $ map B8.pack r intoCSV set = intoCSV set .| C.map (map B8.unpack) fromCSV set = C.map (map B8.pack) .| fromCSV set -- | Support for parsing rows in the 'Vector' form. instance (CSV s (Row s)) => CSV s (V.Vector s) where rowToStr s r = rowToStr s . V.toList $ r intoCSV set = intoCSV set .| C.map (V.fromList) fromCSV set = C.map (V.toList) .| fromCSV set ------------------------------------------------------------------------------- fromCSVRow :: (Monad m, IsString s, CSV s r) => CSVSettings -> ConduitM r s m () fromCSVRow set = awaitForever $ \row -> mapM_ yield [rowToStr set row, "\n"] ------------------------------------------------------------------------------- intoCSVRow :: (MonadThrow m, AttoparsecInput i) => Parser i (Maybe o) -> ConduitM i o m () intoCSVRow p = parse .| puller where parse = {-# SCC "conduitParser_p" #-} conduitParser p puller = {-# SCC "puller" #-} awaitForever $ \ (_, mrow) -> maybe (return ()) yield mrow ------------------------------------------------------------------------------- -- | Generic 'MapRow' instance; any stream type with a 'Row' instance -- automatically gets a 'MapRow' instance. instance (CSV s (Row s'), Ord s', IsString s) => CSV s (MapRow s') where rowToStr s r = rowToStr s . M.elems $ r intoCSV set = intoCSVMap set fromCSV set = fromCSVMap set instance (CSV s (Row s'), Ord s', IsString s) => CSV s (OrderedMapRow s') where rowToStr s r = rowToStr s . (map snd . MO.assocs) $ r intoCSV set = intoCSVMapOrdered set fromCSV set = fromCSVMapOrdered set ------------------------------------------------------------------------------- intoCSVMap :: (Ord a, MonadThrow m, CSV s [a]) => CSVSettings -> ConduitM s (MapRow a) m () intoCSVMap set = intoCSV set .| (headers >>= converter) where headers = do mrow <- await case mrow of Nothing -> return [] Just [] -> headers Just hs -> return hs converter hs = awaitForever $ yield . toMapCSV hs toMapCSV !hs !fs = M.fromList $ zip hs fs intoCSVMapOrdered :: (Ord a, MonadThrow m, CSV s [a]) => CSVSettings -> ConduitM s (OrderedMapRow a) m () intoCSVMapOrdered set = intoCSV set .| (headers >>= converter) where headers = do mrow <- await case mrow of Nothing -> return [] Just [] -> headers Just hs -> return hs converter hs = awaitForever $ yield . toMapCSV hs toMapCSV !hs !fs = MO.fromList $ zip hs fs -- | Conversion of stream directly to/from a custom complex haskell -- type. instance (FromNamedRecord a, ToNamedRecord a, CSV s (MapRow ByteString)) => CSV s (Named a) where rowToStr s a = rowToStr s . toNamedRecord . getNamed $ a intoCSV set = intoCSV set .| C.mapMaybe go where go x = either (const Nothing) (Just . Named) $ runParser (parseNamedRecord x) fromCSV set = C.map go .| fromCSV set where go = toNamedRecord . getNamed instance (FromNamedRecordOrdered a, ToNamedRecordOrdered a, CSV s (OrderedMapRow ByteString)) => CSV s (NamedOrdered a) where rowToStr s a = rowToStr s . toNamedRecordOrdered . getNamedOrdered $ a intoCSV set = intoCSV set .| C.mapMaybe go where go x = either (const Nothing) (Just . NamedOrdered) $ runParser (parseNamedRecordOrdered x) fromCSV set = C.map go .| fromCSV set where go = toNamedRecordOrdered . getNamedOrdered ------------------------------------------------------------------------------- fromCSVMap :: (Monad m, IsString s, CSV s [a]) => CSVSettings -> ConduitM (M.Map k a) s m () fromCSVMap set = awaitForever push where push r = mapM_ yield [rowToStr set (M.elems r), "\n"] fromCSVMapOrdered :: (Monad m, IsString s, CSV s [a]) => CSVSettings -> ConduitM (MO.OMap k a) s m () fromCSVMapOrdered set = awaitForever push where push r = mapM_ yield [rowToStr set (map snd $ MO.assocs r), "\n"] ------------------------------------------------------------------------------- -- | Write headers AND the row into the output stream, once. If you -- don't call this while using 'MapRow' family of row types, then your -- resulting output will NOT have any headers in it. -- -- Usage: Just chain this using the 'Monad' instance in your pipeline: -- -- > runConduit $ ... .| writeHeaders settings >> fromCSV settings .| sinkFile "..." writeHeaders :: (Monad m, CSV s (Row r), IsString s) => CSVSettings -> ConduitM (MapRow r) s m () writeHeaders set = do mrow <- await case mrow of Nothing -> return () Just row -> mapM_ yield [ rowToStr set (M.keys row) , "\n" , rowToStr set (M.elems row) , "\n" ] writeHeadersOrdered :: (Monad m, CSV s (Row r), IsString s) => CSVSettings -> ConduitM (OrderedMapRow r) s m () writeHeadersOrdered set = do mrow <- await case mrow of Nothing -> return () Just row -> mapM_ yield [ rowToStr set (map fst $ MO.assocs row) , "\n" , rowToStr set (map snd $ MO.assocs row) , "\n" ] --------------------------- -- Convenience Functions -- --------------------------- ------------------------------------------------------------------------------- -- | Read the entire contents of a CSV file into memory. readCSVFile :: (MonadIO m, CSV ByteString a) => CSVSettings -- ^ Settings to use in deciphering stream -> FilePath -- ^ Input file -> m (V.Vector a) readCSVFile set fp = liftIO . runResourceT $ runConduit $ sourceFile fp .| intoCSV set .| transPipe lift (sinkVector growthFactor) where growthFactor = 10 ------------------------------------------------------------------------------- -- | A simple way to decode a CSV string. Don't be alarmed by the -- polymorphic nature of the signature. 's' is the type for the string -- and 'v' is a kind of 'Vector' here. -- -- For example for 'ByteString': -- -- >>> s <- LB.readFile "my.csv" -- >>> decodeCSV defCSVSettings s :: Either SomeException (Vector (Vector ByteString)) -- -- will work as long as the data is comma separated. decodeCSV :: forall v a s. (GV.Vector v a, CSV s a) => CSVSettings -> s -> Either SomeException (v a) decodeCSV set bs = runST $ runExceptT pipeline where src :: ConduitM () s (ExceptT SomeException (ST s1)) () src = C.sourceList [bs] csvConvert :: ConduitM s a (ExceptT SomeException (ST s1)) () csvConvert = transPipe (ExceptT . runCatchT) csvConvert' csvConvert' :: ConduitM s a (CatchT (ST s1)) () csvConvert' = intoCSV set growthFactor = 10 sink :: ConduitM a Void.Void (ExceptT SomeException (ST s1)) (v a) sink = sinkVector growthFactor pipeline :: ExceptT SomeException (ST s1) (v a) pipeline = runConduit (src .| csvConvert .| sink) ------------------------------------------------------------------------------- -- | Write CSV data into file. As we use a 'ByteString' sink, you'll -- need to get your data into a 'ByteString' stream type. writeCSVFile :: (CSV ByteString a) => CSVSettings -- ^ CSV Settings -> FilePath -- ^ Target file -> IOMode -- ^ Write vs. append mode -> [a] -- ^ List of rows -> IO () writeCSVFile set fo fmode rows = runResourceT $ runConduit $ do C.sourceList rows .| fromCSV set .| sinkIOHandle (openFile fo fmode) ------------------------------------------------------------------------------- -- | Map over the rows of a CSV file. Provided for convenience for -- historical reasons. -- -- An easy way to run this function would be 'runResourceT' after -- feeding it all the arguments. mapCSVFile :: ( MonadResource m , CSV ByteString a , CSV ByteString b # if MIN_VERSION_resourcet(1,2,0) , MonadThrow m #endif ) => CSVSettings -- ^ Settings to use both for both input and output -> (a -> [b]) -- ^ A mapping function -> FilePath -- ^ Input file -> FilePath -- ^ Output file -> m () mapCSVFile set f fi fo = transformCSV set (sourceFile fi) (C.concatMap f) (sinkFile fo) ------------------------------------------------------------------------------- -- | Like transformCSV' but uses the same settings for both input and -- output. transformCSV :: (MonadThrow m, CSV s a, CSV s' b) => CSVSettings -- ^ Settings to be used for both input and output -> ConduitM () s m () -- ^ A raw stream data source. Ex: 'sourceFile inFile' -> ConduitM a b m () -- ^ A transforming conduit -> ConduitM s' Void.Void m () -- ^ A raw stream data sink. Ex: 'sinkFile outFile' -> m () transformCSV set = transformCSV' set set ------------------------------------------------------------------------------- -- | General purpose CSV transformer. Apply a list-like processing -- function from 'Data.Conduit.List' to the rows of a CSV stream. You -- need to provide a stream data source, a transformer and a stream -- data sink. -- -- An easy way to run this function would be 'runResourceT' after -- feeding it all the arguments. -- -- Example - map a function over the rows of a CSV file: -- -- > transformCSV setIn setOut (sourceFile inFile) (C.map f) (sinkFile outFile) transformCSV' :: (MonadThrow m, CSV s a, CSV s' b) => CSVSettings -- ^ Settings to be used for input -> CSVSettings -- ^ Settings to be used for output -> ConduitM () s m () -- ^ A raw stream data source. Ex: 'sourceFile inFile' -> ConduitM a b m () -- ^ A transforming conduit -> ConduitM s' Void.Void m () -- ^ A raw stream data sink. Ex: 'sinkFile outFile' -> m () transformCSV' setIn setOut source c sink = runConduit $ source .| intoCSV setIn .| c .| fromCSV setOut .| sink ------------------ -- Vector Utils -- ------------------ ------------------------------------------------------------------------------- -- | An efficient sink that incrementally grows a vector from the input stream sinkVector :: (PrimMonad m, GV.Vector v a) => Int -> ConduitM a o m (v a) sinkVector by = do v <- lift $ GMV.new by go 0 v where -- i is the index of the next element to be written by go -- also exactly the number of elements in v so far go i v = do res <- await case res of Nothing -> do v' <- lift $ GV.freeze $ GMV.slice 0 i v return $! v' Just x -> do v' <- case GMV.length v == i of True -> lift $ GMV.grow v by False -> return v lift $ GMV.write v' i x go (i+1) v' csv-conduit-0.7.3.0/src/Data/CSV/Conduit/Types.hs0000644000000000000000000000376514071111440017447 0ustar0000000000000000{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TypeSynonymInstances #-} module Data.CSV.Conduit.Types where ------------------------------------------------------------------------------- import Data.Default import qualified Data.Map as M import qualified Data.Map.Ordered as MO ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- -- | Settings for a CSV file. This library is intended to be flexible -- and offer a way to process the majority of text data files out -- there. data CSVSettings = CSVSettings { -- | Separator character to be used in between fields csvSep :: !Char -- | Quote character that may sometimes be present around fields. -- If 'Nothing' is given, the library will never expect quotation -- even if it is present. , csvQuoteChar :: !(Maybe Char) } deriving (Read, Show, Eq) ------------------------------------------------------------------------------- -- | Default settings for a CSV file. -- -- > csvSep = ',' -- > csvQuoteChar = Just '"' -- defCSVSettings :: CSVSettings defCSVSettings = CSVSettings { csvSep = ',' , csvQuoteChar = Just '"' } instance Default CSVSettings where def = defCSVSettings ------------------------------------------------------------------------------- -- | A 'Row' is just a list of fields type Row a = [a] ------------------------------------------------------------------------------- -- | A 'MapRow' is a dictionary based on 'Data.Map' where column names -- are keys and row's individual cell values are the values of the -- 'Map'. type MapRow a = M.Map a a -- | An 'OrderedMapRow' is a dictionary based on 'Data.Map.Ordered' where column -- names are keys and row's individual cell values are the values of the 'OMap'. -- Unlike 'MapRow', 'OrderedMapRow' preserves the insertion ordering of columns. -- 'OrderedMapRow' is a reasonable default in most cases. type OrderedMapRow a = MO.OMap a a csv-conduit-0.7.3.0/src/Data/CSV/Conduit/Conversion.hs0000644000000000000000000007044414071131013020464 0ustar0000000000000000{-# LANGUAGE BangPatterns, CPP, FlexibleInstances, OverloadedStrings, Rank2Types #-} #ifdef GENERICS {-# LANGUAGE DefaultSignatures, TypeOperators, KindSignatures, FlexibleContexts, MultiParamTypeClasses, UndecidableInstances, ScopedTypeVariables, DataKinds #-} #endif ----------------------------------------------------------------------------- -- | -- Module : Data.CSV.Conduit.Conversion -- Copyright : Ozgun Ataman, Johan Tibell -- License : BSD3 -- -- Maintainer : Ozgun Ataman -- Stability : experimental -- -- This module has been shamelessly taken from Johan Tibell's nicely -- put together cassava package, which itself borrows the approach -- from Bryan O'Sullivan's widely used aeson package. -- -- We make the necessary adjustments and some simplifications here to -- bolt this parsing interface onto our underlying "CSV" typeclass. ---------------------------------------------------------------------------- module Data.CSV.Conduit.Conversion ( -- * Type conversion Only(..) , Named (..) , NamedOrdered (..) , Record , NamedRecord , NamedRecordOrdered , FromRecord(..) , FromNamedRecord(..) , FromNamedRecordOrdered(..) , ToNamedRecord(..) , ToNamedRecordOrdered(..) , FromField(..) , ToRecord(..) , ToField(..) , Field -- * Parser , Parser , runParser -- * Accessors , index , (.!) , unsafeIndex , lookup , lookupOrdered , (.:) , namedField , (.=) , record , namedRecord , namedRecordOrdered ) where import Control.Applicative as A import Control.Monad (MonadPlus, mplus, mzero) import Data.Attoparsec.ByteString.Char8 (double, parseOnly) import qualified Data.Attoparsec.ByteString.Char8 as A8 import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Lazy as L import Data.Int (Int8, Int16, Int32, Int64) import Data.Kind (Type) import qualified Data.Map as M import qualified Data.Map.Ordered as MO import Data.Semigroup as Semigroup import qualified Data.Text as T import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as LT import qualified Data.Text.Lazy.Encoding as LT import Data.Traversable as DT import Data.Vector (Vector, (!)) import qualified Data.Vector as V import qualified Data.Vector.Unboxed as U import Data.Word as W import GHC.Float (double2Float) import Prelude hiding (lookup, takeWhile) #ifdef GENERICS import GHC.Generics import qualified Data.IntMap as IM #endif import Data.CSV.Conduit.Conversion.Internal ------------------------------------------------------------------------ -- bytestring compatibility toStrict :: L.ByteString -> B.ByteString fromStrict :: B.ByteString -> L.ByteString #if MIN_VERSION_bytestring(0,10,0) toStrict = L.toStrict fromStrict = L.fromStrict #else toStrict = B.concat . L.toChunks fromStrict = L.fromChunks . (:[]) #endif {-# INLINE toStrict #-} {-# INLINE fromStrict #-} ------------------------------------------------------------------------ -- Type conversion -- | A shorthand for the ByteString case of 'MapRow' type NamedRecord = M.Map B8.ByteString B8.ByteString type NamedRecordOrdered = MO.OMap B8.ByteString B8.ByteString -- | A wrapper around custom haskell types that can directly be -- converted/parsed from an incoming CSV stream. -- -- We define this wrapper to stop GHC from complaining -- about overlapping instances. Just use 'getNamed' to get your -- object out of the wrapper. newtype Named a = Named { getNamed :: a } deriving (Eq,Show,Read,Ord) newtype NamedOrdered a = NamedOrdered { getNamedOrdered :: a } deriving (Eq,Show,Read,Ord) -- | A record corresponds to a single line in a CSV file. type Record = Vector B8.ByteString -- | A single field within a record. type Field = B8.ByteString ------------------------------------------------------------------------ -- Index-based conversion -- | A type that can be converted from a single CSV record, with the -- possibility of failure. -- -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a -- conversion fail, e.g. if a 'Record' has the wrong number of -- columns. -- -- Given this example data: -- -- > John,56 -- > Jane,55 -- -- here's an example type and instance: -- -- > data Person = Person { name :: !Text, age :: !Int } -- > -- > instance FromRecord Person where -- > parseRecord v -- > | length v == 2 = Person <$> -- > v .! 0 <*> -- > v .! 1 -- > | otherwise = mzero class FromRecord a where parseRecord :: Record -> Parser a #ifdef GENERICS default parseRecord :: (Generic a, GFromRecord (Rep a)) => Record -> Parser a parseRecord r = to A.<$> gparseRecord r #endif -- | Haskell lacks a single-element tuple type, so if you CSV data -- with just one column you can use the 'Only' type to represent a -- single-column result. newtype Only a = Only { fromOnly :: a } deriving (Eq, Ord, Read, Show) -- | A type that can be converted to a single CSV record. -- -- An example type and instance: -- -- > data Person = Person { name :: !Text, age :: !Int } -- > -- > instance ToRecord Person where -- > toRecord (Person name age) = record [ -- > toField name, toField age] -- -- Outputs data on this form: -- -- > John,56 -- > Jane,55 class ToRecord a where toRecord :: a -> Record #ifdef GENERICS default toRecord :: (Generic a, GToRecord (Rep a) Field) => a -> Record toRecord = V.fromList . gtoRecord . from #endif instance FromField a => FromRecord (Only a) where parseRecord v | n == 1 = Only <$> unsafeIndex v 0 | otherwise = lengthMismatch 1 v where n = V.length v -- TODO: Check if we want all toRecord conversions to be stricter. instance ToField a => ToRecord (Only a) where toRecord = V.singleton . toField . fromOnly instance (FromField a, FromField b) => FromRecord (a, b) where parseRecord v | n == 2 = (,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 | otherwise = lengthMismatch 2 v where n = V.length v instance (ToField a, ToField b) => ToRecord (a, b) where toRecord (a, b) = V.fromList [toField a, toField b] instance (FromField a, FromField b, FromField c) => FromRecord (a, b, c) where parseRecord v | n == 3 = (,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 | otherwise = lengthMismatch 3 v where n = V.length v instance (ToField a, ToField b, ToField c) => ToRecord (a, b, c) where toRecord (a, b, c) = V.fromList [toField a, toField b, toField c] instance (FromField a, FromField b, FromField c, FromField d) => FromRecord (a, b, c, d) where parseRecord v | n == 4 = (,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 | otherwise = lengthMismatch 4 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d) => ToRecord (a, b, c, d) where toRecord (a, b, c, d) = V.fromList [ toField a, toField b, toField c, toField d] instance (FromField a, FromField b, FromField c, FromField d, FromField e) => FromRecord (a, b, c, d, e) where parseRecord v | n == 5 = (,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 | otherwise = lengthMismatch 5 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e) => ToRecord (a, b, c, d, e) where toRecord (a, b, c, d, e) = V.fromList [ toField a, toField b, toField c, toField d, toField e] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f) => FromRecord (a, b, c, d, e, f) where parseRecord v | n == 6 = (,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 | otherwise = lengthMismatch 6 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f) => ToRecord (a, b, c, d, e, f) where toRecord (a, b, c, d, e, f) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f] instance (FromField a, FromField b, FromField c, FromField d, FromField e, FromField f, FromField g) => FromRecord (a, b, c, d, e, f, g) where parseRecord v | n == 7 = (,,,,,,) <$> unsafeIndex v 0 <*> unsafeIndex v 1 <*> unsafeIndex v 2 <*> unsafeIndex v 3 <*> unsafeIndex v 4 <*> unsafeIndex v 5 <*> unsafeIndex v 6 | otherwise = lengthMismatch 7 v where n = V.length v instance (ToField a, ToField b, ToField c, ToField d, ToField e, ToField f, ToField g) => ToRecord (a, b, c, d, e, f, g) where toRecord (a, b, c, d, e, f, g) = V.fromList [ toField a, toField b, toField c, toField d, toField e, toField f, toField g] lengthMismatch :: Int -> Record -> Parser a lengthMismatch expected v = fail $ "cannot unpack array of length " ++ show n ++ " into a " ++ desired ++ ". Input record: " ++ show v where n = V.length v desired | expected == 1 = "Only" | expected == 2 = "pair" | otherwise = show expected ++ "-tuple" instance FromField a => FromRecord [a] where parseRecord = DT.traverse parseField . V.toList instance ToField a => ToRecord [a] where toRecord = V.fromList . map toField instance FromField a => FromRecord (V.Vector a) where parseRecord = traverse parseField instance ToField a => ToRecord (Vector a) where toRecord = V.map toField instance (FromField a, U.Unbox a) => FromRecord (U.Vector a) where parseRecord = fmap U.convert . traverse parseField instance (ToField a, U.Unbox a) => ToRecord (U.Vector a) where toRecord = V.map toField . U.convert ------------------------------------------------------------------------ -- Name-based conversion -- | A type that can be converted from a single CSV record, with the -- possibility of failure. -- -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a -- conversion fail, e.g. if a 'Record' has the wrong number of -- columns. -- -- Given this example data: -- -- > name,age -- > John,56 -- > Jane,55 -- -- here's an example type and instance: -- -- > {-# LANGUAGE OverloadedStrings #-} -- > -- > data Person = Person { name :: !Text, age :: !Int } -- > -- > instance FromNamedRecord Person where -- > parseNamedRecord m = Person <$> -- > m .: "name" <*> -- > m .: "age" -- -- Note the use of the @OverloadedStrings@ language extension which -- enables 'B8.ByteString' values to be written as string literals. class FromNamedRecord a where parseNamedRecord :: NamedRecord -> Parser a #ifdef GENERICS default parseNamedRecord :: (Generic a, GFromNamedRecord (Rep a)) => NamedRecord -> Parser a parseNamedRecord r = to <$> gparseNamedRecord r #endif class FromNamedRecordOrdered a where parseNamedRecordOrdered :: NamedRecordOrdered -> Parser a -- | A type that can be converted to a single CSV record. -- -- An example type and instance: -- -- > data Person = Person { name :: !Text, age :: !Int } -- > -- > instance ToNamedRecord Person where -- > toNamedRecord (Person name age) = namedRecord [ -- > "name" .= name, "age" .= age] class ToNamedRecord a where toNamedRecord :: a -> NamedRecord #ifdef GENERICS default toNamedRecord :: (Generic a, GToRecord (Rep a) (B.ByteString, B.ByteString)) => a -> NamedRecord toNamedRecord = namedRecord . gtoRecord . from #endif class ToNamedRecordOrdered a where toNamedRecordOrdered :: a -> NamedRecordOrdered instance FromField a => FromNamedRecord (M.Map B.ByteString a) where parseNamedRecord m = traverse parseField m instance FromField a => FromNamedRecordOrdered (MO.OMap B.ByteString a) where parseNamedRecordOrdered m = traverse parseField m instance ToField a => ToNamedRecord (M.Map B.ByteString a) where toNamedRecord = M.map toField instance ToField a => ToNamedRecordOrdered (MO.OMap B.ByteString a) where toNamedRecordOrdered a = MO.fromList $ map (fmap toField) $ MO.assocs a -- instance FromField a => FromNamedRecord (HM.HashMap B.ByteString a) where -- parseNamedRecord m = traverse (\ s -> parseField s) m -- instance ToField a => ToNamedRecord (HM.HashMap B.ByteString a) where -- toNamedRecord = HM.map toField ------------------------------------------------------------------------ -- Individual field conversion -- | A type that can be converted from a single CSV field, with the -- possibility of failure. -- -- When writing an instance, use 'empty', 'mzero', or 'fail' to make a -- conversion fail, e.g. if a 'Field' can't be converted to the given -- type. -- -- Example type and instance: -- -- > {-# LANGUAGE OverloadedStrings #-} -- > -- > data Color = Red | Green | Blue -- > -- > instance FromField Color where -- > parseField s -- > | s == "R" = pure Red -- > | s == "G" = pure Green -- > | s == "B" = pure Blue -- > | otherwise = mzero class FromField a where parseField :: Field -> Parser a -- | A type that can be converted to a single CSV field. -- -- Example type and instance: -- -- > {-# LANGUAGE OverloadedStrings #-} -- > -- > data Color = Red | Green | Blue -- > -- > instance ToField Color where -- > toField Red = "R" -- > toField Green = "G" -- > toField Blue = "B" class ToField a where toField :: a -> Field -- | 'Nothing' if the 'Field' is 'B.empty', 'Just' otherwise. instance FromField a => FromField (Maybe a) where parseField s | B.null s = pure Nothing | otherwise = Just <$> parseField s {-# INLINE parseField #-} -- | 'Nothing' is encoded as an 'B.empty' field. instance ToField a => ToField (Maybe a) where toField = maybe B.empty toField {-# INLINE toField #-} -- | Ignores the 'Field'. Always succeeds. instance FromField () where parseField _ = pure () {-# INLINE parseField #-} -- | Assumes UTF-8 encoding. instance FromField Char where parseField s = case T.decodeUtf8' s of Left e -> fail $ show e Right t | T.compareLength t 1 == EQ -> pure (T.head t) | otherwise -> typeError "Char" s Nothing {-# INLINE parseField #-} -- | Uses UTF-8 encoding. instance ToField Char where toField = toField . T.encodeUtf8 . T.singleton {-# INLINE toField #-} -- | Accepts same syntax as 'rational'. instance FromField Double where parseField = parseDouble {-# INLINE parseField #-} -- | Uses decimal notation or scientific notation, depending on the -- number. instance ToField Double where toField = realFloat {-# INLINE toField #-} -- | Accepts same syntax as 'rational'. instance FromField Float where parseField s = double2Float <$> parseDouble s {-# INLINE parseField #-} -- | Uses decimal notation or scientific notation, depending on the -- number. instance ToField Float where toField = realFloat {-# INLINE toField #-} parseDouble :: B.ByteString -> Parser Double parseDouble s = case parseOnly double s of Left err -> typeError "Double" s (Just err) Right n -> pure n {-# INLINE parseDouble #-} -- | Accepts a signed decimal number. instance FromField Int where parseField = parseSigned "Int" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. instance FromField Integer where parseField = parseSigned "Integer" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Integer where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. instance FromField Int8 where parseField = parseSigned "Int8" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int8 where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. instance FromField Int16 where parseField = parseSigned "Int16" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int16 where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. instance FromField Int32 where parseField = parseSigned "Int32" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int32 where toField = decimal {-# INLINE toField #-} -- | Accepts a signed decimal number. instance FromField Int64 where parseField = parseSigned "Int64" {-# INLINE parseField #-} -- | Uses decimal encoding with optional sign. instance ToField Int64 where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. instance FromField W.Word where parseField = parseUnsigned "Word" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. instance FromField Word8 where parseField = parseUnsigned "Word8" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word8 where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. instance FromField Word16 where parseField = parseUnsigned "Word16" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word16 where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. instance FromField Word32 where parseField = parseUnsigned "Word32" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word32 where toField = decimal {-# INLINE toField #-} -- | Accepts an unsigned decimal number. instance FromField Word64 where parseField = parseUnsigned "Word64" {-# INLINE parseField #-} -- | Uses decimal encoding. instance ToField Word64 where toField = decimal {-# INLINE toField #-} instance FromField B.ByteString where parseField = pure {-# INLINE parseField #-} instance ToField B.ByteString where toField = id {-# INLINE toField #-} instance FromField L.ByteString where parseField = pure . fromStrict {-# INLINE parseField #-} instance ToField L.ByteString where toField = toStrict {-# INLINE toField #-} -- | Assumes UTF-8 encoding. Fails on invalid byte sequences. instance FromField T.Text where parseField = either (fail . show) pure . T.decodeUtf8' {-# INLINE parseField #-} -- | Uses UTF-8 encoding. instance ToField T.Text where toField = toField . T.encodeUtf8 {-# INLINE toField #-} -- | Assumes UTF-8 encoding. Fails on invalid byte sequences. instance FromField LT.Text where parseField = either (fail . show) (pure . LT.fromStrict) . T.decodeUtf8' {-# INLINE parseField #-} -- | Uses UTF-8 encoding. instance ToField LT.Text where toField = toField . toStrict . LT.encodeUtf8 {-# INLINE toField #-} -- | Assumes UTF-8 encoding. Fails on invalid byte sequences. instance FromField [Char] where parseField = fmap T.unpack . parseField {-# INLINE parseField #-} -- | Uses UTF-8 encoding. instance ToField [Char] where toField = toField . T.pack {-# INLINE toField #-} parseSigned :: (Integral a) => String -> B.ByteString -> Parser a parseSigned typ s = case parseOnly (A8.signed A8.decimal) s of Left err -> typeError typ s (Just err) Right n -> pure n {-# INLINE parseSigned #-} parseUnsigned :: Integral a => String -> B.ByteString -> Parser a parseUnsigned typ s = case parseOnly A8.decimal s of Left err -> typeError typ s (Just err) Right n -> pure n {-# INLINE parseUnsigned #-} typeError :: String -> B.ByteString -> Maybe String -> Parser a typeError typ s mmsg = fail $ "expected " ++ typ ++ ", got " ++ show (B8.unpack s) ++ cause where cause = case mmsg of Just msg -> " (" ++ msg ++ ")" Nothing -> "" ------------------------------------------------------------------------ -- Constructors and accessors -- | Retrieve the /n/th field in the given record. The result is -- 'empty' if the value cannot be converted to the desired type. -- Raises an exception if the index is out of bounds. -- -- 'index' is a simple convenience function that is equivalent to -- @'parseField' (v '!' idx)@. If you're certain that the index is not -- out of bounds, using 'unsafeIndex' is somewhat faster. index :: FromField a => Record -> Int -> Parser a index v idx = parseField (v ! idx) {-# INLINE index #-} -- | Alias for 'index'. (.!) :: FromField a => Record -> Int -> Parser a (.!) = index {-# INLINE (.!) #-} infixl 9 .! -- | Like 'index' but without bounds checking. unsafeIndex :: FromField a => Record -> Int -> Parser a unsafeIndex v idx = parseField (V.unsafeIndex v idx) {-# INLINE unsafeIndex #-} -- | Retrieve a field in the given record by name. The result is -- 'empty' if the field is missing or if the value cannot be converted -- to the desired type. lookup :: FromField a => NamedRecord -> B.ByteString -> Parser a lookup m name = maybe (fail err) parseField $ M.lookup name m where err = "no field named " ++ show (B8.unpack name) {-# INLINE lookup #-} lookupOrdered :: FromField a => NamedRecordOrdered -> B.ByteString -> Parser a lookupOrdered m name = maybe (fail err) parseField $ MO.lookup name m where err = "no field named " ++ show (B8.unpack name) {-# INLINE lookupOrdered #-} -- | Alias for 'lookup'. (.:) :: FromField a => NamedRecord -> B.ByteString -> Parser a (.:) = lookup {-# INLINE (.:) #-} -- | Construct a pair from a name and a value. For use with -- 'namedRecord'. namedField :: ToField a => B.ByteString -> a -> (B.ByteString, B.ByteString) namedField name val = (name, toField val) {-# INLINE namedField #-} -- | Alias for 'namedField'. (.=) :: ToField a => B.ByteString -> a -> (B.ByteString, B.ByteString) (.=) = namedField {-# INLINE (.=) #-} -- | Construct a record from a list of 'B.ByteString's. Use 'toField' -- to convert values to 'B.ByteString's for use with 'record'. record :: [B.ByteString] -> Record record = V.fromList -- | Construct a named record from a list of name-value 'B.ByteString' -- pairs. Use '.=' to construct such a pair from a name and a value. namedRecord :: [(B.ByteString, B.ByteString)] -> NamedRecord namedRecord = M.fromList namedRecordOrdered :: [(B.ByteString, B.ByteString)] -> NamedRecordOrdered namedRecordOrdered = MO.fromList ------------------------------------------------------------------------ -- Parser for converting records to data types -- | Failure continuation. type Failure f r = String -> f r -- | Success continuation. type Success a f r = a -> f r -- | Conversion of a field to a value might fail e.g. if the field is -- malformed. This possibility is captured by the 'Parser' type, which -- lets you compose several field conversions together in such a way -- that if any of them fail, the whole record conversion fails. newtype Parser a = Parser { unParser :: forall f r. Failure f r -> Success a f r -> f r } instance Monad Parser where m >>= g = Parser $ \kf ks -> let ks' a = unParser (g a) kf ks in unParser m kf ks' {-# INLINE (>>=) #-} return a = Parser $ \_kf ks -> ks a {-# INLINE return #-} #if MIN_VERSION_base(4,13,0) instance MonadFail Parser where #endif fail msg = Parser $ \kf _ks -> kf msg {-# INLINE fail #-} instance Functor Parser where fmap f m = Parser $ \kf ks -> let ks' a = ks (f a) in unParser m kf ks' {-# INLINE fmap #-} instance Applicative Parser where pure = return {-# INLINE pure #-} (<*>) = apP {-# INLINE (<*>) #-} instance Alternative Parser where empty = fail "empty" {-# INLINE empty #-} (<|>) = mplus {-# INLINE (<|>) #-} instance MonadPlus Parser where mzero = fail "mzero" {-# INLINE mzero #-} mplus a b = Parser $ \kf ks -> let kf' _ = unParser b kf ks in unParser a kf' ks {-# INLINE mplus #-} instance Semigroup.Semigroup (Parser a) where (<>) = mplus {-# INLINE (<>) #-} apP :: Parser (a -> b) -> Parser a -> Parser b apP d e = do b <- d a <- e return (b a) {-# INLINE apP #-} -- | Run a 'Parser', returning either @'Left' errMsg@ or @'Right' -- result@. Forces the value in the 'Left' or 'Right' constructors to -- weak head normal form. -- -- You most likely won't need to use this function directly, but it's -- included for completeness. runParser :: Parser a -> Either String a runParser p = unParser p left right where left !errMsg = Left errMsg right !x = Right x {-# INLINE runParser #-} #ifdef GENERICS class GFromRecord f where gparseRecord :: Record -> Parser (f p) instance GFromRecordSum f Record => GFromRecord (M1 i n f) where gparseRecord v = case (IM.lookup n gparseRecordSum) of Nothing -> lengthMismatch n v Just p -> M1 <$> p v where n = V.length v class GFromNamedRecord f where gparseNamedRecord :: NamedRecord -> Parser (f p) instance GFromRecordSum f NamedRecord => GFromNamedRecord (M1 i n f) where gparseNamedRecord v = foldr (\f p -> p <|> M1 <$> f v) empty (IM.elems gparseRecordSum) class GFromRecordSum f r where gparseRecordSum :: IM.IntMap (r -> Parser (f p)) instance (GFromRecordSum a r, GFromRecordSum b r) => GFromRecordSum (a :+: b) r where gparseRecordSum = IM.unionWith (\a b r -> a r <|> b r) (fmap (L1 <$>) <$> gparseRecordSum) (fmap (R1 <$>) <$> gparseRecordSum) instance GFromRecordProd f r => GFromRecordSum (M1 i n f) r where gparseRecordSum = IM.singleton n (fmap (M1 <$>) f) where (n, f) = gparseRecordProd 0 class GFromRecordProd f r where gparseRecordProd :: Int -> (Int, r -> Parser (f p)) instance GFromRecordProd U1 r where gparseRecordProd n = (n, const (pure U1)) instance (GFromRecordProd a r, GFromRecordProd b r) => GFromRecordProd (a :*: b) r where gparseRecordProd n0 = (n2, f) where f r = (:*:) <$> fa r <*> fb r (n1, fa) = gparseRecordProd n0 (n2, fb) = gparseRecordProd n1 instance GFromRecordProd f Record => GFromRecordProd (M1 i n f) Record where gparseRecordProd n = fmap (M1 <$>) <$> gparseRecordProd n instance FromField a => GFromRecordProd (K1 i a) Record where gparseRecordProd n = (n + 1, \v -> K1 <$> parseField (V.unsafeIndex v n)) #if MIN_VERSION_base(4,9,0) data Proxy (s :: Meta) (f :: Type -> Type) a = Proxy #else data Proxy s (f :: * -> *) a = Proxy #endif instance (FromField a, Selector s) => GFromRecordProd (M1 S s (K1 i a)) NamedRecord where gparseRecordProd n = (n + 1, \v -> (M1 . K1) <$> v .: name) where name = T.encodeUtf8 (T.pack (selName (Proxy :: Proxy s f a))) class GToRecord a f where gtoRecord :: a p -> [f] instance GToRecord U1 f where gtoRecord U1 = [] instance (GToRecord a f, GToRecord b f) => GToRecord (a :*: b) f where gtoRecord (a :*: b) = gtoRecord a ++ gtoRecord b instance (GToRecord a f, GToRecord b f) => GToRecord (a :+: b) f where gtoRecord (L1 a) = gtoRecord a gtoRecord (R1 b) = gtoRecord b instance GToRecord a f => GToRecord (M1 D c a) f where gtoRecord (M1 a) = gtoRecord a instance GToRecord a f => GToRecord (M1 C c a) f where gtoRecord (M1 a) = gtoRecord a instance GToRecord a Field => GToRecord (M1 S c a) Field where gtoRecord (M1 a) = gtoRecord a instance ToField a => GToRecord (K1 i a) Field where gtoRecord (K1 a) = [toField a] instance (ToField a, Selector s) => GToRecord (M1 S s (K1 i a)) (B.ByteString, B.ByteString) where gtoRecord m@(M1 (K1 a)) = [T.encodeUtf8 (T.pack (selName m)) .= toField a] #endif csv-conduit-0.7.3.0/src/Data/CSV/Conduit/Parser/ByteString.hs0000644000000000000000000000550213550145407021673 0ustar0000000000000000{-| This module exports the underlying Attoparsec row parser. This is helpful if you want to do some ad-hoc CSV string parsing. -} module Data.CSV.Conduit.Parser.ByteString ( parseCSV , parseRow , row , csv ) where ------------------------------------------------------------------------------- import Control.Applicative import Control.Monad (mzero) import Data.Attoparsec.ByteString as P hiding (take) import qualified Data.Attoparsec.ByteString.Char8 as C8 import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as B8 import Data.Word (Word8) ------------------------------------------------------------------------------- import Data.CSV.Conduit.Types ------------------------------------------------------------------------------ -- | Try to parse given string as CSV parseCSV :: CSVSettings -> ByteString -> Either String [Row ByteString] parseCSV s = parseOnly $ csv s ------------------------------------------------------------------------------ -- | Try to parse given string as 'Row ByteString' parseRow :: CSVSettings -> ByteString -> Either String (Maybe (Row ByteString)) parseRow s = parseOnly $ row s ------------------------------------------------------------------------------ -- | Parse CSV csv :: CSVSettings -> Parser [Row ByteString] csv s = do r <- row s end <- atEnd if end then case r of Just x -> return [x] Nothing -> return [] else do rest <- csv s return $ case r of Just x -> x : rest Nothing -> rest ------------------------------------------------------------------------------ -- | Parse a CSV row row :: CSVSettings -> Parser (Maybe (Row ByteString)) row csvs = csvrow csvs <|> badrow badrow :: Parser (Maybe (Row ByteString)) badrow = P.takeWhile (not . C8.isEndOfLine) *> (C8.endOfLine <|> C8.endOfInput) *> return Nothing csvrow :: CSVSettings -> Parser (Maybe (Row ByteString)) csvrow c = let rowbody = (quotedField' <|> field c) `sepBy` C8.char (csvSep c) properrow = rowbody <* (C8.endOfLine <|> P.endOfInput) quotedField' = case csvQuoteChar c of Nothing -> mzero Just q' -> try (quotedField q') in do res <- properrow return $ Just res field :: CSVSettings -> Parser ByteString field s = P.takeWhile (isFieldChar s) isFieldChar :: CSVSettings -> Word8 -> Bool isFieldChar s = notInClass xs' where xs = csvSep s : "\n\r" xs' = case csvQuoteChar s of Nothing -> xs Just x -> x : xs quotedField :: Char -> Parser ByteString quotedField c = let quoted = string dbl *> return c dbl = B8.pack [c,c] in do _ <- C8.char c f <- many (C8.notChar c <|> quoted) _ <- C8.char c return $ B8.pack f csv-conduit-0.7.3.0/src/Data/CSV/Conduit/Parser/Text.hs0000644000000000000000000000530613550145407020527 0ustar0000000000000000{-| This module exports the underlying Attoparsec row parser. This is helpful if you want to do some ad-hoc CSV string parsing. -} module Data.CSV.Conduit.Parser.Text ( parseCSV , parseRow , row , csv ) where ------------------------------------------------------------------------------- import Control.Applicative import Control.Monad (mzero) import Data.Attoparsec.Text as P hiding (take) import qualified Data.Attoparsec.Text as T import Data.Text (Text) import qualified Data.Text as T ------------------------------------------------------------------------------- import Data.CSV.Conduit.Types ------------------------------------------------------------------------------- ------------------------------------------------------------------------------ -- | Try to parse given string as CSV parseCSV :: CSVSettings -> Text -> Either String [Row Text] parseCSV s = parseOnly $ csv s ------------------------------------------------------------------------------ -- | Try to parse given string as 'Row Text' parseRow :: CSVSettings -> Text -> Either String (Maybe (Row Text)) parseRow s = parseOnly $ row s ------------------------------------------------------------------------------ -- | Parse CSV csv :: CSVSettings -> Parser [Row Text] csv s = do r <- row s end <- atEnd if end then case r of Just x -> return [x] Nothing -> return [] else do rest <- csv s return $ case r of Just x -> x : rest Nothing -> rest ------------------------------------------------------------------------------ -- | Parse a CSV row row :: CSVSettings -> Parser (Maybe (Row Text)) row csvs = csvrow csvs <|> badrow badrow :: Parser (Maybe (Row Text)) badrow = P.takeWhile (not . T.isEndOfLine) *> (T.endOfLine <|> T.endOfInput) *> return Nothing csvrow :: CSVSettings -> Parser (Maybe (Row Text)) csvrow c = let rowbody = (quotedField' <|> field c) `sepBy` T.char (csvSep c) properrow = rowbody <* (T.endOfLine <|> P.endOfInput) quotedField' = case csvQuoteChar c of Nothing -> mzero Just q' -> try (quotedField q') in do res <- properrow return $ Just res field :: CSVSettings -> Parser Text field s = P.takeWhile (isFieldChar s) isFieldChar :: CSVSettings -> Char -> Bool isFieldChar s = notInClass xs' where xs = csvSep s : "\n\r" xs' = case csvQuoteChar s of Nothing -> xs Just x -> x : xs quotedField :: Char -> Parser Text quotedField c = do let quoted = string dbl *> return c dbl = T.pack [c,c] _ <- T.char c f <- many (T.notChar c <|> quoted) _ <- T.char c return $ T.pack f csv-conduit-0.7.3.0/src/Data/CSV/Conduit/Conversion/Internal.hs0000644000000000000000000002422013550146062022242 0ustar0000000000000000module Data.CSV.Conduit.Conversion.Internal ( decimal , realFloat ) where import Blaze.ByteString.Builder import Blaze.ByteString.Builder.Char8 import Data.Array.Base (unsafeAt) import Data.Array.IArray import qualified Data.ByteString as B import Data.Char (ord) import Data.Int import Data.Word import Data.CSV.Conduit.Monoid as Monoid ((<>)) ------------------------------------------------------------------------ -- Integers decimal :: Integral a => a -> B.ByteString decimal = toByteString . formatDecimal {-# INLINE decimal #-} -- TODO: Add an optimized version for Integer. formatDecimal :: Integral a => a -> Builder {-# RULES "formatDecimal/Int" formatDecimal = formatBoundedSigned :: Int -> Builder #-} {-# RULES "formatDecimal/Int8" formatDecimal = formatBoundedSigned :: Int8 -> Builder #-} {-# RULES "formatDecimal/Int16" formatDecimal = formatBoundedSigned :: Int16 -> Builder #-} {-# RULES "formatDecimal/Int32" formatDecimal = formatBoundedSigned :: Int32 -> Builder #-} {-# RULES "formatDecimal/Int64" formatDecimal = formatBoundedSigned :: Int64 -> Builder #-} {-# RULES "formatDecimal/Word" formatDecimal = formatPositive :: Word -> Builder #-} {-# RULES "formatDecimal/Word8" formatDecimal = formatPositive :: Word8 -> Builder #-} {-# RULES "formatDecimal/Word16" formatDecimal = formatPositive :: Word16 -> Builder #-} {-# RULES "formatDecimal/Word32" formatDecimal = formatPositive :: Word32 -> Builder #-} {-# RULES "formatDecimal/Word64" formatDecimal = formatPositive :: Word64 -> Builder #-} {-# NOINLINE formatDecimal #-} formatDecimal i | i < 0 = minus Monoid.<> if i <= -128 then formatPositive (-(i `quot` 10)) <> digit (-(i `rem` 10)) else formatPositive (-i) | otherwise = formatPositive i formatBoundedSigned :: (Integral a, Bounded a) => a -> Builder {-# SPECIALIZE formatBoundedSigned :: Int -> Builder #-} {-# SPECIALIZE formatBoundedSigned :: Int8 -> Builder #-} {-# SPECIALIZE formatBoundedSigned :: Int16 -> Builder #-} {-# SPECIALIZE formatBoundedSigned :: Int32 -> Builder #-} {-# SPECIALIZE formatBoundedSigned :: Int64 -> Builder #-} formatBoundedSigned i | i < 0 = minus <> if i == minBound then formatPositive (-(i `quot` 10)) <> digit (-(i `rem` 10)) else formatPositive (-i) | otherwise = formatPositive i formatPositive :: Integral a => a -> Builder {-# SPECIALIZE formatPositive :: Int -> Builder #-} {-# SPECIALIZE formatPositive :: Int8 -> Builder #-} {-# SPECIALIZE formatPositive :: Int16 -> Builder #-} {-# SPECIALIZE formatPositive :: Int32 -> Builder #-} {-# SPECIALIZE formatPositive :: Int64 -> Builder #-} {-# SPECIALIZE formatPositive :: Word -> Builder #-} {-# SPECIALIZE formatPositive :: Word8 -> Builder #-} {-# SPECIALIZE formatPositive :: Word16 -> Builder #-} {-# SPECIALIZE formatPositive :: Word32 -> Builder #-} {-# SPECIALIZE formatPositive :: Word64 -> Builder #-} formatPositive = go where go n | n < 10 = digit n | otherwise = go (n `quot` 10) <> digit (n `rem` 10) minus :: Builder minus = fromWord8 45 zero :: Word8 zero = 48 digit :: Integral a => a -> Builder digit n = fromWord8 $! i2w (fromIntegral n) {-# INLINE digit #-} i2w :: Int -> Word8 i2w i = zero + fromIntegral i {-# INLINE i2w #-} ------------------------------------------------------------------------ -- Floating point numbers realFloat :: RealFloat a => a -> B.ByteString {-# SPECIALIZE realFloat :: Float -> B.ByteString #-} {-# SPECIALIZE realFloat :: Double -> B.ByteString #-} realFloat = toByteString . formatRealFloat Generic -- | Control the rendering of floating point numbers. data FPFormat = Exponent -- ^ Scientific notation (e.g. @2.3e123@). | Fixed -- ^ Standard decimal notation. | Generic -- ^ Use decimal notation for values between @0.1@ and -- @9,999,999@, and scientific notation otherwise. deriving (Enum, Read, Show) formatRealFloat :: RealFloat a => FPFormat -> a -> Builder {-# SPECIALIZE formatRealFloat :: FPFormat -> Float -> Builder #-} {-# SPECIALIZE formatRealFloat :: FPFormat -> Double -> Builder #-} formatRealFloat fmt x | isNaN x = fromString "NaN" | isInfinite x = if x < 0 then fromString "-Infinity" else fromString "Infinity" | x < 0 || isNegativeZero x = minus <> doFmt fmt (floatToDigits (-x)) | otherwise = doFmt fmt (floatToDigits x) where doFmt format (is, e) = let ds = map i2d is in case format of Generic -> doFmt (if e < 0 || e > 7 then Exponent else Fixed) (is,e) Exponent -> let show_e' = formatDecimal (e-1) in case ds of [48] -> fromString "0.0e0" [d] -> fromWord8 d <> fromString ".0e" <> show_e' (d:ds') -> fromWord8 d <> fromChar '.' <> fromWord8s ds' <> fromChar 'e' <> show_e' [] -> error "formatRealFloat/doFmt/Exponent: []" Fixed | e <= 0 -> fromString "0." <> fromByteString (B.replicate (-e) zero) <> fromWord8s ds | otherwise -> let f 0 s rs = mk0 (reverse s) <> fromChar '.' <> mk0 rs f n s [] = f (n-1) (zero:s) [] f n s (r:rs) = f (n-1) (r:s) rs in f e [] ds where mk0 ls = case ls of { [] -> fromWord8 zero ; _ -> fromWord8s ls} -- Based on "Printing Floating-Point Numbers Quickly and Accurately" -- by R.G. Burger and R.K. Dybvig in PLDI 96. -- This version uses a much slower logarithm estimator. It should be improved. -- | 'floatToDigits' takes a base and a non-negative 'RealFloat' number, -- and returns a list of digits and an exponent. -- In particular, if @x>=0@, and -- -- > floatToDigits base x = ([d1,d2,...,dn], e) -- -- then -- -- (1) @n >= 1@ -- -- (2) @x = 0.d1d2...dn * (base**e)@ -- -- (3) @0 <= di <= base-1@ floatToDigits :: (RealFloat a) => a -> ([Int], Int) {-# SPECIALIZE floatToDigits :: Float -> ([Int], Int) #-} {-# SPECIALIZE floatToDigits :: Double -> ([Int], Int) #-} floatToDigits 0 = ([0], 0) floatToDigits x = let (f0, e0) = decodeFloat x (minExp0, _) = floatRange x p = floatDigits x b = floatRadix x minExp = minExp0 - p -- the real minimum exponent -- Haskell requires that f be adjusted so denormalized numbers -- will have an impossibly low exponent. Adjust for this. (f, e) = let n = minExp - e0 in if n > 0 then (f0 `quot` (expt b n), e0+n) else (f0, e0) (r, s, mUp, mDn) = if e >= 0 then let be = expt b e in if f == expt b (p-1) then (f*be*b*2, 2*b, be*b, be) -- according to Burger and Dybvig else (f*be*2, 2, be, be) else if e > minExp && f == expt b (p-1) then (f*b*2, expt b (-e+1)*2, b, 1) else (f*2, expt b (-e)*2, 1, 1) k :: Int k = let k0 :: Int k0 = if b == 2 then -- logBase 10 2 is very slightly larger than 8651/28738 -- (about 5.3558e-10), so if log x >= 0, the approximation -- k1 is too small, hence we add one and need one fixup step less. -- If log x < 0, the approximation errs rather on the high side. -- That is usually more than compensated for by ignoring the -- fractional part of logBase 2 x, but when x is a power of 1/2 -- or slightly larger and the exponent is a multiple of the -- denominator of the rational approximation to logBase 10 2, -- k1 is larger than logBase 10 x. If k1 > 1 + logBase 10 x, -- we get a leading zero-digit we don't want. -- With the approximation 3/10, this happened for -- 0.5^1030, 0.5^1040, ..., 0.5^1070 and values close above. -- The approximation 8651/28738 guarantees k1 < 1 + logBase 10 x -- for IEEE-ish floating point types with exponent fields -- <= 17 bits and mantissae of several thousand bits, earlier -- convergents to logBase 10 2 would fail for long double. -- Using quot instead of div is a little faster and requires -- fewer fixup steps for negative lx. let lx = p - 1 + e0 k1 = (lx * 8651) `quot` 28738 in if lx >= 0 then k1 + 1 else k1 else -- f :: Integer, log :: Float -> Float, -- ceiling :: Float -> Int ceiling ((log (fromInteger (f+1) :: Float) + fromIntegral e * log (fromInteger b)) / log 10) --WAS: fromInt e * log (fromInteger b)) fixup n = if n >= 0 then if r + mUp <= expt 10 n * s then n else fixup (n+1) else if expt 10 (-n) * (r + mUp) <= s then n else fixup (n+1) in fixup k0 gen ds rn sN mUpN mDnN = let (dn, rn') = (rn * 10) `quotRem` sN mUpN' = mUpN * 10 mDnN' = mDnN * 10 in case (rn' < mDnN', rn' + mUpN' > sN) of (True, False) -> dn : ds (False, True) -> dn+1 : ds (True, True) -> if rn' * 2 < sN then dn : ds else dn+1 : ds (False, False) -> gen (dn:ds) rn' sN mUpN' mDnN' rds = if k >= 0 then gen [] r (s * expt 10 k) mUp mDn else let bk = expt 10 (-k) in gen [] (r * bk) s (mUp * bk) (mDn * bk) in (map fromIntegral (reverse rds), k) -- Exponentiation with a cache for the most common numbers. minExpt, maxExpt :: Int minExpt = 0 maxExpt = 1100 expt :: Integer -> Int -> Integer expt base n | base == 2 && n >= minExpt && n <= maxExpt = expts `unsafeAt` n | base == 10 && n <= maxExpt10 = expts10 `unsafeAt` n | otherwise = base^n expts :: Array Int Integer expts = array (minExpt,maxExpt) [(n,2^n) | n <- [minExpt .. maxExpt]] maxExpt10 :: Int maxExpt10 = 324 expts10 :: Array Int Integer expts10 = array (minExpt,maxExpt10) [(n,10^n) | n <- [minExpt .. maxExpt10]] -- | Unsafe conversion for decimal digits. {-# INLINE i2d #-} i2d :: Int -> Word8 i2d i = fromIntegral (ord '0' + i) csv-conduit-0.7.3.0/src/Data/CSV/Conduit/Monoid.hs0000644000000000000000000000046013550145407017570 0ustar0000000000000000{-# LANGUAGE CPP #-} {-# OPTIONS_GHC -fno-warn-unused-imports #-} module Data.CSV.Conduit.Monoid ( (<>) ) where import Data.Monoid #if !MIN_VERSION_base(4,5,0) infixr 6 <> -- | An infix synonym for 'mappend'. (<>) :: Monoid m => m -> m -> m (<>) = mappend {-# INLINE (<>) #-} #endif csv-conduit-0.7.3.0/test/Test.hs0000644000000000000000000000637214071113477014552 0ustar0000000000000000{-# LANGUAGE MultiParamTypeClasses #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} module Main where import qualified Conduit as C import Control.Exception import qualified Data.ByteString.Char8 as B import Data.CSV.Conduit import Data.CSV.Conduit.Conversion import qualified Data.Map as Map import qualified Data.Map.Ordered as OMap import Data.Monoid as M import Data.Text import qualified Data.Vector as V import System.Directory import Test.Framework (Test, defaultMain, testGroup) import Test.Framework.Providers.HUnit import Test.HUnit (assertFailure, (@=?), (@?=)) main :: IO () main = defaultMain tests tests :: [Test] tests = [ testGroup "Basic Ops" baseTests, testGroup "decodeCSV" decodeCSVTests ] baseTests :: [Test] baseTests = [ testCase "mapping with id works" test_identityMap, testCase "simple parsing works" test_simpleParse, testCase "OrderedMap" test_orderedMap ] decodeCSVTests :: [Test] decodeCSVTests = [ testCase "parses a CSV" $ do let efoos = decodeCSV defCSVSettings ("Foo\nfoo" :: B.ByteString) case efoos :: Either SomeException (V.Vector (Named Foo)) of Left e -> assertFailure (show e) Right foos -> V.fromList [Named Foo] @=? foos, testCase "eats parse errors, evidently" $ do let efoos = decodeCSV defCSVSettings ("Foo\nbad" :: B.ByteString) case efoos :: Either SomeException (V.Vector (Named Foo)) of Left e -> assertFailure (show e) Right foos -> M.mempty @=? foos ] data Foo = Foo deriving (Show, Eq) instance FromNamedRecord Foo where parseNamedRecord nr = do s <- nr .: "Foo" case s of "foo" -> pure Foo _ -> fail ("Expected \"foo\" but got " <> B.unpack s) instance ToNamedRecord Foo where toNamedRecord Foo = namedRecord ["Foo" .= ("foo" :: B.ByteString)] test_identityMap :: IO () test_identityMap = do _ <- runResourceT $ mapCSVFile csvSettings f testFile2 outFile f1 <- readFile testFile2 f2 <- readFile outFile f1 @=? f2 removeFile outFile where outFile = "test/testOut.csv" f :: Row Text -> [Row Text] f = return test_simpleParse :: IO () test_simpleParse = do (d :: V.Vector (MapRow B.ByteString)) <- readCSVFile csvSettings testFile1 V.mapM_ assertRow d where assertRow r = v3 @=? (v1 + v2) where v1 = readBS $ r Map.! "Col2" v2 = readBS $ r Map.! "Col3" v3 = readBS $ r Map.! "Sum" test_orderedMap :: IO () test_orderedMap = do unorderedRes <- C.runConduit $ C.yieldMany [unorderedRow] C..| writeHeaders defCSVSettings C..| C.foldC unorderedRes @?= ("\"a\",\"b\"\n\"aval\",\"bval\"\n" :: B.ByteString) orderedRes <- C.runConduit $ C.yieldMany [orderedRow] C..| writeHeadersOrdered defCSVSettings C..| C.foldC orderedRes @?= ("\"b\",\"a\"\n\"bval\",\"aval\"\n" :: B.ByteString) where orderedRow :: OrderedMapRow Text orderedRow = OMap.fromList pairs unorderedRow :: MapRow Text unorderedRow = Map.fromList pairs pairs = [("b", "bval"), ("a", "aval")] csvSettings :: CSVSettings csvSettings = defCSVSettings {csvQuoteChar = Just '`'} testFile1, testFile2 :: FilePath testFile1 = "test/test.csv" testFile2 = "test/test.csv" readBS :: B.ByteString -> Int readBS = read . B.unpack csv-conduit-0.7.3.0/LICENSE0000644000000000000000000000302113550145407013310 0ustar0000000000000000Copyright (c)2013, Ozgun Ataman Copyright (c)2012, Johan Tibell All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Ozgun Ataman nor the names of other contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. csv-conduit-0.7.3.0/Setup.hs0000644000000000000000000000005613550145407013744 0ustar0000000000000000import Distribution.Simple main = defaultMain csv-conduit-0.7.3.0/csv-conduit.cabal0000644000000000000000000000663114071131256015533 0ustar0000000000000000Name: csv-conduit Version: 0.7.3.0 Synopsis: A flexible, fast, conduit-based CSV parser library for Haskell. Homepage: http://github.com/ozataman/csv-conduit License: BSD3 License-file: LICENSE Author: Ozgun Ataman Maintainer: Ozgun Ataman Category: Data, Conduit, CSV, Text Build-type: Simple Cabal-version: >= 1.10 Tested-with: GHC == 9.0.1, GHC == 8.10.4, GHC == 8.8.4, GHC == 8.8.3, GHC == 8.6.5, GHC == 8.4.4, GHC == 8.2.2 Description: CSV files are the de-facto standard in many situations involving data transfer, particularly when dealing with enterprise application or disparate database systems. . While there are a number of CSV libraries in Haskell, at the time of this project's start in 2010, there wasn't one that provided all of the following: . * Full flexibility in quote characters, separators, input/output . * Constant space operation . * Robust parsing, correctness and error resiliency . * Convenient interface that supports a variety of use cases . * Fast operation . This library is an attempt to close these gaps. Please note that this library started its life based on the enumerator package and has recently been ported to work with conduits instead. In the process, it has been greatly simplified thanks to the modular nature of the conduits library. . Following the port to conduits, the library has also gained the ability to parameterize on the stream type and work both with ByteString and Text. . For more documentation and examples, check out the README at: . . extra-source-files: README.md changelog.md test/test.csv test/Test.hs flag lib-Werror default: False manual: True library default-language: Haskell2010 exposed-modules: Data.CSV.Conduit Data.CSV.Conduit.Types Data.CSV.Conduit.Conversion Data.CSV.Conduit.Parser.ByteString Data.CSV.Conduit.Parser.Text other-modules: Data.CSV.Conduit.Conversion.Internal Data.CSV.Conduit.Monoid ghc-options: -Wall -funbox-strict-fields if flag(lib-Werror) ghc-options: -Werror hs-source-dirs: src build-depends: attoparsec >= 0.10 , base >= 4 && < 5 , bytestring , conduit >= 1.2.8 , conduit-extra , containers >= 0.3 , exceptions >= 0.3 , monad-control , text , data-default , vector , array , blaze-builder , unordered-containers , ordered-containers , transformers , mtl , mmorph , primitive , resourcet >= 1.1.2.1 , semigroups if impl(ghc >= 7.2.1) cpp-options: -DGENERICS build-depends: ghc-prim >= 0.2 test-suite test default-language: Haskell2010 type: exitcode-stdio-1.0 main-is: Test.hs ghc-options: -Wall if flag(lib-Werror) ghc-options: -Werror hs-source-dirs: test build-depends: base >= 4 && < 5 , bytestring , conduit >= 1.3.0 , containers >= 0.3 , csv-conduit , directory , vector , HUnit >= 1.2 , test-framework , test-framework-hunit , text , ordered-containers , transformers , mtl , primitive source-repository head type: git location: git://github.com/ozataman/csv-conduit.git csv-conduit-0.7.3.0/README.md0000644000000000000000000000711713550145407013574 0ustar0000000000000000# README [![Build Status](https://travis-ci.org/ozataman/csv-conduit.svg?branch=master)](https://travis-ci.org/ozataman/csv-conduit) ## CSV Files and Haskell CSV files are the de-facto standard in many cases of data transfer, particularly when dealing with enterprise application or disparate database systems. While there are a number of csv libraries in Haskell, at the time of this project's start, there wasn't one that provided all of the following: * Full flexibility in quote characters, separators, input/output * Constant space operation * Robust parsing and error resiliency * Battle-tested reliability in real-world datasets * Fast operation * Convenient interface that supports a variety of use cases Over time, people created other plausible CSV packages like cassava. The major benefit from this library remains to be: * Direct participation in the conduit ecosystem, which is now quite large, and all the benefits that come with it. * Flexibility in CSV format definition. * Resiliency to errors in the input data. ## This package csv-conduit is a conduit-based CSV parsing library that is easy to use, flexible and fast. It leverages the conduit infrastructure to provide constant-space operation, which is quite critical in many real world use cases. For example, you can use http-conduit to download a CSV file from the internet and plug its Source into intoCSV to stream-convert the download into the Row data type and do something with it as the data streams, that is without having to download the entire file to disk first. ## Author & Contributors - Ozgun Ataman (@ozataman) - Daniel Bergey (@bergey) - BJTerry (@BJTerry) - Mike Craig (@mkscrg) - Daniel Corson (@dancor) - Dmitry Dzhus (@dzhus) - Niklas Hambüchen (@nh2) - Facundo Domínguez (@facundominguez) ### Introduction * The CSVeable typeclass implements the key operations. * CSVeable is parameterized on both a stream type and a target CSV row type. * There are 2 basic row types and they implement *exactly* the same operations, so you can chose the right one for the job at hand: - `type MapRow t = Map t t` - `type Row t = [t]` * You basically use the Conduits defined in this library to do the parsing from a CSV stream and rendering back into a CSV stream. * Use the full flexibility and modularity of conduits for sources and sinks. ### Speed While fast operation is of concern, I have so far cared more about correct operation and a flexible API. Please let me know if you notice any performance regressions or optimization opportunities. ### Usage Examples #### Example #1: Basics Using Convenience API ```haskell {-# LANGUAGE OverloadedStrings #-} import Data.Conduit import Data.Conduit.Binary import Data.Conduit.List as CL import Data.CSV.Conduit import Data.Text (Text) -- Just reverse te columns myProcessor :: Monad m => Conduit (Row Text) m (Row Text) myProcessor = CL.map reverse test :: IO () test = runResourceT $ transformCSV defCSVSettings (sourceFile "input.csv") myProcessor (sinkFile "output.csv") ``` #### Example #2: Basics Using Conduit API ```haskell {-# LANGUAGE OverloadedStrings #-} import Data.Conduit import Data.Conduit.Binary import Data.CSV.Conduit import Data.Text (Text) myProcessor :: Monad m => Conduit (Row Text) m (Row Text) myProcessor = awaitForever $ yield -- Let's simply stream from a file, parse the CSV, reserialize it -- and push back into another file. test :: IO () test = runResourceT $ sourceFile "test/BigFile.csv" $= intoCSV defCSVSettings $= myProcessor $= fromCSV defCSVSettings $$ sinkFile "test/BigFileOut.csv" ``` csv-conduit-0.7.3.0/changelog.md0000644000000000000000000000137314071131314014553 0ustar00000000000000000.7.3.0 * Add ordered versions of named records for consistent, controllable header column ordering. [PR 44](https://github.com/ozataman/csv-conduit/pull/44) * Add support for GHC 9.0.1 0.7.2.0 * Remove some dependency upper bounds for forward compatibility. 0.7.1.0 * Add MonadFail instance for Parser. [PR 38](https://github.com/ozataman/csv-conduit/pull/38) 0.7.0.0 * BREAKING: Switch from partial Monoid instance on Parser to total Semigroup instance. * Compatibility with GHC 8.4.x/base-4.11.1.0 0.6.8.1 * Fix documentation mistake in FromNamedRecord/ToNamedRecord examples. 0.6.8 * Haddocks improvements * Fix inlining and specialization rules around formatDecimal * Updates to permit newest conduit/resourcet packages 0.6.7 * Fix build for GHC 8.0.1 csv-conduit-0.7.3.0/test/test.csv0000644000000000000000000000020413550145407014756 0ustar0000000000000000`Col1`,`Col2`,`Col3`,`Sum` `A`,`2`,`3`,`5` `B`,`3`,`4`,`7` `Field using the quote char ``this is the in-quoted value```,`4`,`5`,`9`