Setup
To execute the code examples provided in this module in ghci, please run the following commands first.
>>>
:m
>>>
import qualified Streamly.Data.Fold as Fold
>>>
import qualified Streamly.Data.Stream as Stream
>>>
import qualified Streamly.Unicode.Stream as Unicode
For APIs that have not been released yet.
>>>
:set -XMagicHash
>>>
import qualified Streamly.Internal.Unicode.Stream as Unicode
Construction (Decoding)
decodeLatin1 :: Monad m => Stream m Word8 -> Stream m Char Source #
Decode a stream of bytes to Unicode characters by mapping each byte to a
corresponding Unicode Char
in 0-255 range.
UTF-8 Decoding
data CodingFailureMode Source #
Instances
Show CodingFailureMode Source # | |
Defined in Streamly.Internal.Unicode.Stream |
parseCharUtf8With :: Monad m => CodingFailureMode -> Parser Word8 m Char Source #
decodeUtf8 :: Monad m => Stream m Word8 -> Stream m Char Source #
Decode a UTF-8 encoded bytestream to a stream of Unicode characters. Any invalid codepoint encountered is replaced with the unicode replacement character.
decodeUtf8' :: Monad m => Stream m Word8 -> Stream m Char Source #
Decode a UTF-8 encoded bytestream to a stream of Unicode characters. The function throws an error if an invalid codepoint is encountered.
decodeUtf8_ :: Monad m => Stream m Word8 -> Stream m Char Source #
Decode a UTF-8 encoded bytestream to a stream of Unicode characters. Any invalid codepoint encountered is dropped.
decodeUtf16le' :: Stream m Word16 -> Stream m Char Source #
Decode a UTF-16 little endian encoded bytestream to a stream of Unicode characters. The function throws an error if an invalid codepoint is encountered.
Unimplemented
Resumable UTF-8 Decoding
data DecodeError Source #
Instances
Show DecodeError Source # | |
Defined in Streamly.Internal.Unicode.Stream |
type DecodeState = Word8 Source #
decodeUtf8Either :: Monad m => Stream m Word8 -> Stream m (Either DecodeError Char) Source #
Pre-release
resumeDecodeUtf8Either :: Monad m => DecodeState -> CodePoint -> Stream m Word8 -> Stream m (Either DecodeError Char) Source #
Pre-release
UTF-8 Array Stream Decoding
decodeUtf8Chunks :: MonadIO m => Stream m (Array Word8) -> Stream m Char Source #
Like decodeUtf8
but for a chunked stream. It may be slightly faster than
flattening the stream and then decoding with decodeUtf8
.
decodeUtf8Chunks' :: MonadIO m => Stream m (Array Word8) -> Stream m Char Source #
Like 'decodeUtf8'' but for a chunked stream. It may be slightly faster than flattening the stream and then decoding with 'decodeUtf8''.
decodeUtf8Chunks_ :: MonadIO m => Stream m (Array Word8) -> Stream m Char Source #
Like decodeUtf8_
but for a chunked stream. It may be slightly faster
than flattening the stream and then decoding with decodeUtf8_
.
Elimination (Encoding)
Latin1 Encoding
encodeLatin1 :: Monad m => Stream m Char -> Stream m Word8 Source #
Like encodeLatin1'
but silently maps input codepoints beyond 255 to
arbitrary Latin1 chars in 0-255 range. No error or exception is thrown when
such mapping occurs.
encodeLatin1' :: Monad m => Stream m Char -> Stream m Word8 Source #
Encode a stream of Unicode characters to bytes by mapping each character to a byte in 0-255 range. Throws an error if the input stream contains characters beyond 255.
encodeLatin1_ :: Monad m => Stream m Char -> Stream m Word8 Source #
Like encodeLatin1
but drops the input characters beyond 255.
UTF-8 Encoding
encodeUtf8 :: Monad m => Stream m Char -> Stream m Word8 Source #
Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any Invalid characters (U+D800-U+D8FF) in the input stream are replaced by the Unicode replacement character U+FFFD.
encodeUtf8' :: Monad m => Stream m Char -> Stream m Word8 Source #
Encode a stream of Unicode characters to a UTF-8 encoded bytestream. When any invalid character (U+D800-U+D8FF) is encountered in the input stream the function errors out.
encodeUtf8_ :: Monad m => Stream m Char -> Stream m Word8 Source #
Encode a stream of Unicode characters to a UTF-8 encoded bytestream. Any Invalid characters (U+D800-U+D8FF) in the input stream are dropped.
encodeStrings :: MonadIO m => (Stream m Char -> Stream m Word8) -> Stream m String -> Stream m (Array Word8) Source #
Encode a stream of String
using the supplied encoding scheme. Each
string is encoded as an Array Word8
.
encodeUtf16le' :: Stream m Char -> Stream m Word16 Source #
Encode a stream of Unicode characters to a UTF-16 little endian encoded bytestream.
Unimplemented
Transformation
stripHead :: Monad m => Stream m Char -> Stream m Char Source #
Remove leading whitespace from a string.
stripHead = Stream.dropWhile isSpace
Pre-release
lines :: Monad m => Fold m Char b -> Stream m Char -> Stream m b Source #
Fold each line of the stream using the supplied Fold
and stream the result.
>>>
Stream.fold Fold.toList $ Unicode.lines Fold.toList (Stream.fromList "lines\nthis\nstring\n\n\n")
["lines","this","string","",""]
lines = Stream.splitOnSuffix (== '\n')
Pre-release
words :: Monad m => Fold m Char b -> Stream m Char -> Stream m b Source #
Fold each word of the stream using the supplied Fold
and stream the result.
>>>
Stream.fold Fold.toList $ Unicode.words Fold.toList (Stream.fromList "fold these words")
["fold","these","words"]
words = Stream.wordsBy isSpace
Pre-release
unlines :: MonadIO m => Unfold m a Char -> Stream m a -> Stream m Char Source #
Unfold a stream to character streams using the supplied Unfold
and concat the results suffixing a newline character \n
to each stream.
unlines = Stream.interposeSuffix 'n' unlines = Stream.intercalateSuffix Unfold.fromList "n"
Pre-release
unwords :: MonadIO m => Unfold m a Char -> Stream m a -> Stream m Char Source #
Unfold the elements of a stream to character streams using the supplied
Unfold
and concat the results with a whitespace character infixed between
the streams.
unwords = Stream.interpose ' ' unwords = Stream.intercalate Unfold.fromList " "
Pre-release
StreamD UTF8 Encoding / Decoding transformations.
encodeUtf8D :: Monad m => Stream m Char -> Stream m Word8 Source #
See section "3.9 Unicode Encoding Forms" in https://www.unicode.org/versions/Unicode13.0.0/UnicodeStandard-13.0.pdf
decodeUtf8EitherD :: Monad m => Stream m Word8 -> Stream m (Either DecodeError Char) Source #
resumeDecodeUtf8EitherD :: Monad m => DecodeState -> CodePoint -> Stream m Word8 -> Stream m (Either DecodeError Char) Source #
Decoding String Literals
fromStr# :: MonadIO m => Addr# -> Stream m Char Source #
Read UTF-8 encoded bytes as chars from an Addr#
until a 0 byte is
encountered, the 0 byte is not included in the stream.
Unsafe: The caller is responsible for safe addressing.
Note that this is completely safe when reading from Haskell string literals because they are guaranteed to be NULL terminated:
>>>
Stream.fold Fold.toList (Unicode.fromStr# "Haskell"#)
"Haskell"
Deprecations
decodeUtf8Lax :: Monad m => Stream m Word8 -> Stream m Char Source #
Deprecated: Please use decodeUtf8
instead
Same as decodeUtf8
encodeLatin1Lax :: Monad m => Stream m Char -> Stream m Word8 Source #
Deprecated: Please use encodeLatin1
instead
Same as encodeLatin1
encodeUtf8Lax :: Monad m => Stream m Char -> Stream m Word8 Source #
Deprecated: Please use encodeUtf8
instead
Same as encodeUtf8