1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
|
import Prelude hiding (not)
import Text.Parsers.Frisby
import Data.Char (toUpper)
not = doesNotMatch
loption = option []
cmany f = concat `fmap` many f
cmany1 f = concat `fmap` many1 f
parser = mdo
words <- newRule $ optional pause ->> cmany (word <<- optional pause)
word <- newRule $ lojbanWord // nonLojbanWord
lojbanWord <- newRule $ cmene // cmavo // brivla
brivla <- newRule $ gismu // fuhivla // lujvo
cmene <- newRule $ jbocme // zifcme
zifcme <- newRule $ not h ->> cmany (nucleus // glide // h // consonant <<- doesNotMatch pause // digit)
->> consonant
->> matches pause
jbocme <- newRule $ matches zifcme ->> cmany (anySyllable // digit) <<- matches pause
cmavo <- newRule $ not cmene ->> not cvcyLujvo ->> cmavoForm <<- matches postWord
cvcyLujvo <- newRule $ cvcRafsi <++> y <++> loption h <++> cmany initialRafsi <++> brivlaCore
// stressedCvcRafsi <++> y <++> shortFinalRafsi
cmavoForm <- newRule $ not h ->> not cluster ->> onset <++> cmany (nucleus <++> h) <++> (not stressed ->> nucleus // nucleus <<- not cluster)
// cmany1 y
// digit
lujvo <- newRule $ not gismu ->> not fuhivla ->> not cmavo ->> cmany initialRafsi <++> brivlaCore
brivlaCore <- newRule $ fuhivla // gismu // cvvFinalRafsi // stressedInitialRafsi <++> shortFinalRafsi
stressedInitialRafsi <- newRule $ stressedExtendedRafsi // stressedYRafsi // stressedYLessRafsi
initialRafsi <- newRule $ extendedRafsi // yRafsi // not anyExtendedRafsi ->> yLessRafsi
anyExtendedRafsi <- newRule $ fuhivla // extendedRafsi // stressedExtendedRafsi
fuhivla <- newRule $ fuhivlaHead <++> stressedSyllable <++> cmany consonantalSyllable <++> finalSyllable
stressedExtendedRafsi <- newRule $ stressedBrivlaRafsi // stressedFuhivlaRafsi
extendedRafsi <- newRule $ brivlaRafsi // fuhivlaRafsi
stressedBrivlaRafsi <- newRule $ matches unstressedSyllable ->> brivlaHead <++> stressedSyllable <++> h <++> y
brivlaRafsi <- newRule $ matches (syllable <<- cmany consonantalSyllable <<- syllable) ->> brivlaHead <++> h <++> y <++> loption h
stressedFuhivlaRafsi <- newRule $ fuhivlaHead <++> stressedSyllable <++> (matches consonant ->> onset <++> y)
fuhivlaRafsi <- newRule $ not unstressedSyllable ->> (fuhivlaHead <<- stressedSyllable) <++> (matches consonant ->> onset <++> y)
fuhivlaHead <- newRule $ not rafsiString ->> brivlaHead
brivlaHead <- newRule $ not cmavo ->> not slinkuhi ->> not h ->> matches onset ->> cmany unstressedSyllable
slinkuhi <- newRule $ consonant <++> rafsiString
rafsiString <- newRule $ cmany yLessRafsi
<++> (gismu
// cvvFinalRafsi
// stressedYLessRafsi <++> shortFinalRafsi
// yRafsi
// stressedYRafsi
// loption stressedYLessRafsi <++> initialPair <++> y
)
gismu <- newRule $ ((initialPair <++> stressedVowel // consonant <++> stressedVowel <++> consonant) <<- matches finalSyllable)
<++> consonant <++> vowel <<- matches postWord
cvvFinalRafsi <- newRule $ consonant <++> stressedVowel <++> h <++> matches finalSyllable ->> vowel <<- matches postWord
shortFinalRafsi <- newRule $ matches finalSyllable ->> (consonant <++> diphthong // initialPair <++> vowel) <<- matches postWord
stressedYRafsi <- newRule $ (stressedLongRafsi // stressedCvcRafsi) <++> y
stressedYLessRafsi <- newRule $ stressedCvcRafsi <<- not y // stressedCcvRafsi // stressedCvvRafsi
stressedLongRafsi <- newRule $ initialPair <++> stressedVowel <++> consonant // consonant <++> stressedVowel <++> consonant <++> consonant
stressedCvcRafsi <- newRule $ consonant <++> stressedVowel <++> consonant
stressedCcvRafsi <- newRule $ initialPair <++> stressedVowel
stressedCvvRafsi <- newRule $ consonant <++> (unstressedVowel <++> h <++> stressedVowel // stressedDiphthong) <++> loption rHyphen
yRafsi <- newRule $ (longRafsi // cvcRafsi) <++> y <++> loption h
yLessRafsi <- newRule $ not yRafsi ->> (cvcRafsi <<- not y // ccvRafsi // cvvRafsi) <<- not anyExtendedRafsi
longRafsi <- newRule $ initialPair <++> unstressedVowel <++> consonant // consonant <++> unstressedVowel <++> consonant <++> consonant
cvcRafsi <- newRule $ consonant <++> unstressedVowel <++> consonant
ccvRafsi <- newRule $ initialPair <++> unstressedVowel
cvvRafsi <- newRule $ consonant <++> (unstressedVowel <++> h <++> unstressedVowel // unstressedDiphthong) <++> loption rHyphen
rHyphen <- newRule $ r <<- matches consonant // n <<- matches r
finalSyllable <- newRule $ (onset <<- not y <<- not stressed) <++> (nucleus <<- not cmene <<- matches postWord)
stressedSyllable <- newRule $ matches stressed ->> syllable // syllable <<- matches stress
stressedDiphthong <- newRule $ matches stressed ->> diphthong // diphthong <<- matches stress
stressedVowel <- newRule $ matches stressed ->> vowel // vowel <<- matches stressed
unstressedSyllable <- newRule $ not stressed ->> syllable <<- not stress // consonantalSyllable
unstressedDiphthong <- newRule $ not stressed ->> diphthong <<- not stress
unstressedVowel <- newRule $ not stressed ->> vowel <<- not stress
stress <- newRule $ cmany consonant <++> loption y <++> syllable <++> pause
stressed <- newRule $ onset <++> cmany comma <++> choice [a,e,i,o,u]
anySyllable <- newRule $ onset <++> nucleus <++> loption coda // consonantalSyllable
syllable <- newRule $ (onset <<- not y) <++> nucleus <++> loption l
consonantalSyllable <- newRule $ consonant <++> syllabic <<- matches (consonantalSyllable // onset) <++> loption (consonant <<- matches spaces)
coda <- newRule $ not anySyllable ->> consonant <<- anySyllable // loption syllabic <++> loption consonant <<- matches pause
onset <- newRule $ h // loption consonant <++> glide // initial
nucleus <- newRule $ vowel // diphthong // y <<- not nucleus
glide <- newRule $ (i/u) <<- matches nucleus <<- not glide
diphthong <- newRule $ (a <++> i // a <++> u // e <++> i // o <++> i) <<- not nucleus <<- not glide
vowel <- newRule $ (a // e // i // o // u) <<- not nucleus
let letter x = cmany comma ->> oneOf' ([x,toUpper x])
a <- newRule $ letter 'a'
e <- newRule $ letter 'e'
i <- newRule $ letter 'i'
o <- newRule $ letter 'o'
u <- newRule $ letter 'u'
y <- newRule $ letter 'y'
cluster <- newRule $ consonant <++> cmany1 consonant
initialPair <- newRule $ matches initial ->> consonant <++> consonant <<- not consonant
initial <- newRule $ (affricate // loption sibilant <++> loption other <++> loption liquid) <<- not consonant <<- not glide
affricate <- newRule $ t <++> c // t <++> s // d <++> j // d <++> z
liquid <- newRule $ l // r
other <- newRule $ p // t <<- not l // k // f // x // b // d <<- not l // g // v // m // n <<- not liquid
sibilant <- newRule $ c // s <<- not x // (j // z) <<- not n <<- not liquid
consonant <- newRule $ voiced // unvoiced // syllabic
syllabic <- newRule $ l // m // n // r
voiced <- newRule $ b // d // g // j // v // z
unvoiced <- newRule $ c // f // k // p // s // t // x
l <- newRule $ letter 'l' <<- not h <<- not l
m <- newRule $ letter 'm' <<- not h <<- not m <<- not z
n <- newRule $ letter 'n' <<- not h <<- not n <<- not affricate
r <- newRule $ letter 'r' <<- not h <<- not r
b <- newRule $ letter 'b' <<- not h <<- not b <<- not unvoiced
d <- newRule $ letter 'd' <<- not h <<- not d <<- not unvoiced
g <- newRule $ letter 'g' <<- not h <<- not g <<- not unvoiced
v <- newRule $ letter 'v' <<- not h <<- not v <<- not unvoiced
j <- newRule $ letter 'j' <<- not h <<- not j <<- not z <<- not unvoiced
z <- newRule $ letter 'z' <<- not h <<- not z <<- not j <<- not unvoiced
s <- newRule $ letter 's' <<- not h <<- not s <<- not c <<- not voiced
c <- newRule $ letter 'c' <<- not h <<- not c <<- not s <<- not x <<- not voiced
x <- newRule $ letter 'x' <<- not h <<- not x <<- not c <<- not x <<- not voiced
k <- newRule $ letter 'k' <<- not h <<- not k <<- not x <<- not voiced
f <- newRule $ letter 'f' <<- not h <<- not f <<- not voiced
p <- newRule $ letter 'p' <<- not h <<- not p <<- not voiced
t <- newRule $ letter 't' <<- not h <<- not t <<- not voiced
h <- newRule $ cmany comma ->> oneOf' "'h" <++> nucleus
digit <- newRule $ cmany comma ->> oneOf' "0123456789" <<- not h <<- not nucleus
postWord <- newRule $ pause // not nucleus ->> lojbanWord
pause <- newRule $ cmany comma <++> cmany1 spaceChar // eOF
eOF <- newRule $ cmany comma <<- not (char '.')
comma <- newRule $ text ","
nonLojbanWord <- newRule $ not lojbanWord ->> cmany nonSpace
nonSpace <- newRule $ not spaceChar ->> anyChar
spaceChar <- newRule $ oneOf' ".\t\n\r?!\0020"
spaces <- newRule $ not cY ->> initialSpaces
initialSpaces <- newRule $ cmany1 (cmany comma <++> spaceChar // not ybu ->> cY) <++> loption eOF // eOF
ybu <- newRule $ cY <++> cmany spaceChar <++> cBU
cBU <- newRule $ matches cmavo ->> b <++> u <<- matches postWord
cY <- newRule $ matches cmavo ->> many y <<- matches postWord
return words
oneOf' = fmap (:[]) . oneOf
|