-- |
-- Module      : Unicode.Char.General.Compat
-- Copyright   : (c) 2020 Composewell Technologies and Contributors
-- License     : Apache-2.0
-- Maintainer  : streamly@composewell.com
-- Stability   : experimental
--
-- Compatibility module for general character property related functions.
--
-- The functions of this module are drop-in replacement for those in "Data.Char".
-- They are similar but not identical to some functions in "Unicode.Char.General",
-- therefore they are placed in a separate module in order to avoid ambiguity.
--
module Unicode.Char.General.Compat
    ( isAlpha
    , isAlphaNum
    , isLetter
    , isSpace
    ) where

import Data.Char (ord)
import qualified Unicode.Internal.Char.UnicodeData.GeneralCategory as UC

-- $setup
-- import qualified Unicode.Char.General

-- | Same as 'isLetter'.
--
-- @since 0.3.0
{-# INLINE isAlpha #-}
isAlpha :: Char -> Bool
isAlpha :: Char -> Bool
isAlpha = Char -> Bool
isLetter

{-| Selects alphabetic or numeric Unicode characters.

This function returns 'True' if its argument has one of the
following 'GeneralCategory's, or 'False' otherwise:

* 'UppercaseLetter'
* 'LowercaseLetter'
* 'TitlecaseLetter'
* 'ModifierLetter'
* 'OtherLetter'
* 'DecimalNumber'
* 'LetterNumber'
* 'OtherNumber'

prop> isAlphaNum c == Data.Char.isAlphaNum c

__Note:__ this function is incompatible with 'Unicode.Char.General.isAlphabetic':

>>> Unicode.Char.General.isAlphabetic '\x345'
True
>>> isAlphaNum '\x345'
False

@since 0.6.0 moved to Compat module

@since 0.3.0
-}
isAlphaNum :: Char -> Bool
isAlphaNum :: Char -> Bool
isAlphaNum Char
c =
    let !cp :: Int
cp = Char -> Int
ord Char
c
    -- NOTE: The guard constant is updated at each Unicode revision.
    --       It must be < 0x40000 to be accepted by generalCategoryPlanes0To3.
    in Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
UC.MaxIsAlphaNum Bool -> Bool -> Bool
&&
        let !gc :: Int
gc = Int -> Int
UC.generalCategoryPlanes0To3 Int
cp
        in Int
gc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
UC.OtherLetter Bool -> Bool -> Bool
||
           (Int
UC.DecimalNumber Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
gc Bool -> Bool -> Bool
&& Int
gc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
UC.OtherNumber)
    -- Use the following in case the previous code is not valid anymore:
    -- gc <= UC.OtherLetter || (UC.DecimalNumber <= gc && gc <= UC.OtherNumber)
    -- where !gc = UC.generalCategory c

{-| Selects alphabetic Unicode characters (lower-case, upper-case and title-case
letters, plus letters of caseless scripts and modifiers letters).

This function returns 'True' if its argument has one of the
following 'GeneralCategory's, or 'False' otherwise:

* 'UppercaseLetter'
* 'LowercaseLetter'
* 'TitlecaseLetter'
* 'ModifierLetter'
* 'OtherLetter'

__Note:__ this function is /not/ equivalent to 'Unicode.Char.General.isAlphabetic'.
See the description of 'Unicode.Char.General.isAlphabetic' for further details.

prop> isLetter c == Data.Char.isLetter c

@since 0.3.0
-}
{-# INLINE isLetter #-}
isLetter :: Char -> Bool
isLetter :: Char -> Bool
isLetter Char
c =
    let !cp :: Int
cp = Char -> Int
ord Char
c
    -- NOTE: The guard constant is updated at each Unicode revision.
    --       It must be < 0x40000 to be accepted by generalCategoryPlanes0To3.
    in Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
UC.MaxIsLetter Bool -> Bool -> Bool
&&
        let !gc :: Int
gc = Int -> Int
UC.generalCategoryPlanes0To3 Int
cp
        in Int
gc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
UC.OtherLetter
    -- Use the following in case the previous code is not valid anymore:
    -- UC.generalCategory c <= UC.OtherLetter

{-| Selects Unicode space characters (general category 'Space'),
and the control characters @\\t@, @\\n@, @\\r@, @\\f@, @\\v@.

__Note:__ 'isSpace' is /not/ equivalent to 'Unicode.Char.General.isWhiteSpace'.
'Unicode.Char.General.isWhiteSpace' selects the same characters from 'isSpace'
plus the following:

* @U+0085@ NEXT LINE (NEL)
* @U+2028@ LINE SEPARATOR
* @U+2029@ PARAGRAPH SEPARATOR

prop> isSpace c == Data.Char.isSpace c

@since 0.3.0
-}
isSpace :: Char -> Bool
-- NOTE: The guard constant is updated at each Unicode revision.
--       It must be < 0x40000 to be accepted by generalCategoryPlanes0To3.
isSpace :: Char -> Bool
isSpace Char
c = Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
UC.MaxIsSpace Bool -> Bool -> Bool
&& case Char
c of
    Char
'\t' -> Bool
True
    Char
'\n' -> Bool
True
    Char
'\v' -> Bool
True
    Char
'\f' -> Bool
True
    Char
'\r' -> Bool
True
    Char
_    -> Int -> Int
UC.generalCategoryPlanes0To3 Int
cp Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
UC.Space
    where cp :: Int
cp = Char -> Int
ord Char
c