1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
|
import qualified System.FilePath.Glob as Glob
import System.Environment
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString.Lazy.UTF8 as U
import Data.Char
import Data.List
import qualified Data.Set as Set
data Stat = Stat {
stChars :: !Int,
stVowels :: !Int,
stPairEL :: !Int,
stWords :: !Int
}
instance Show Stat where
show (Stat stChars stVowels stPairEL stWords) =
"Characters: " ++ show stChars ++ "\n" ++
"Vowels: " ++ show stVowels ++ "\n" ++
"EL pairs: " ++ show stPairEL ++ "\n" ++
"Words: " ++ show stWords ++ "\n"
defaultStat = Stat 0 0 0 0
combineStat :: Stat -> Stat -> Stat
combineStat
(Stat s1Chars s1Vowels s1PairEL s1Words)
(Stat s2Chars s2Vowels s2PairEL s2Words) =
Stat
(s1Chars + s2Chars)
(s1Vowels + s2Vowels)
(s1PairEL + s2PairEL)
(s1Words + s2Words)
main = do
argv <- getArgs
case argv of
[] -> print "Need to specify language code (e.g. \"es\")"
[lang] -> processLang lang
processLang :: String -> IO ()
processLang lang = do
files <- getFileList lang
results <- mapM tabulateFile files
print $ foldl' combineStat defaultStat results
getFileList :: String -> IO ([FilePath])
getFileList lang = do
let patt = Glob.compile "*.txt"
Glob.globDir1 patt (lang++"/")
tabulateText :: Char -> L.ByteString -> Stat -> Stat
tabulateText lastChr bs stat =
case U.uncons bs of
Nothing -> stat
Just (chr, newBs) ->
tabulateText lchr newBs (countChar lastChr lchr stat)
where lchr = toLower chr
countChar :: Char -> Char -> Stat -> Stat
countChar !lastChr !chr !(Stat stChars stVowels stPairEL stWords) =
Stat
(stChars + 1)
(stVowels + (countIf $ isVowel chr))
(stPairEL + (countIf (lastChr == 'e' && chr == 'l')))
(stWords + (countIf ((not $ isLetter lastChr) && isLetter chr)))
tabulateFile :: FilePath -> IO Stat
tabulateFile path = do
putStrLn path
contents <- L.readFile path
return $! tabulateText ' ' contents defaultStat
vowels = Set.fromAscList ['a', 'e', 'i', 'o', 'u',
'à',
'\xe0',
'\xe1',
'\xe2',
'\xe3',
'\xe4',
'\xe5',
'\xe6',
'\xe8',
'\xe9',
'\xea',
'\xeb',
'\xec',
'\xed',
'\xee',
'\xef',
'\xf1',
'\xf2',
'\xf3',
'\xf4',
'\xf5',
'\xf6',
'\xf8',
'\xf9',
'\xfa',
'\xfb',
'\xfc',
'\xfd',
'\xff'
]
isVowel :: Char -> Bool
isVowel c = Set.member c vowels
countIf :: Bool -> Int
countIf True = 1
countIf False = 0
|