Loading...
{-# LANGUAGE FlexibleInstances, CPP #-}

{-# OPTIONS_GHC -Wno-orphans #-}

import Data.Char (isSpace)
import Data.Foldable (traverse_)
import Data.Function ((&))
import Data.Functor.Identity (Identity(..))
import Data.Hashable (Hashable(..))
import System.Environment (getArgs)
import Streamly.Data.Array (Unbox)

import qualified Data.Char as Char
import qualified Data.HashMap.Strict as Map
import qualified Data.List as List
import qualified Data.Ord as Ord
import qualified Streamly.Data.Array as Array
import qualified Streamly.Data.Fold.Prelude as Fold
import qualified Streamly.Data.Stream as Stream
import qualified Streamly.FileSystem.File as File
import qualified Streamly.Unicode.Stream as Unicode

hashArray :: (Unbox a, Integral b, Num c) =>
    Fold.Fold Identity a b -> Array.Array a -> c
hashArray f arr =
      Array.read arr
    & Stream.fold f
    & fromIntegral . runIdentity

#if MIN_VERSION_hashable(1,4,0)
instance (Eq a, Enum a, Unbox a) => Hashable (Array.Array a) where
#else
instance (Enum a, Unbox a) => Hashable (Array.Array a) where
#endif
    hash = hashArray Fold.rollingHash

    hashWithSalt salt =
        hashArray (Fold.rollingHashWithSalt (fromIntegral salt))

{-# INLINE toLower #-}
toLower :: Char -> Char
toLower c
  | uc >= 0x61 && uc <= 0x7a = c
  | otherwise = Char.toLower c
  where
    uc = fromIntegral (Char.ord c) :: Word

{-# INLINE isAlpha #-}
isAlpha :: Char -> Bool
isAlpha c
  | uc >= 0x61 && uc <= 0x7a = True
  | otherwise = Char.isAlpha c
  where
    uc = fromIntegral (Char.ord c) :: Word

main :: IO ()
main = do
    inFile <- fmap head getArgs

    let counter = Fold.foldl' (\n _ -> n + 1) (0 :: Int)
        classifier = Fold.toHashMapIO id counter
    -- Write the stream to a hashmap consisting of word counts
    mp <-
        File.read inFile                       -- Stream IO Word8
         & Unicode.decodeLatin1                -- Stream IO Char
         & fmap toLower                        -- Stream IO Char
         & Stream.wordsBy isSpace Fold.toList  -- Stream IO String
         & Stream.filter (all isAlpha)         -- Stream IO String
         & Stream.fold classifier              -- IO (HashMap String Int)

    traverse_ print
        $ List.sortOn (Ord.Down . snd) (Map.toList mp)
        & List.take 25