{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE DeriveLift #-}
module Text.Collate.Collation
( Collation(..)
, CollationElement(..)
, unfoldCollation
, insertElements
, alterElements
, suppressContractions
, findLast
, findFirst
, matchLongestPrefix
, getCollationElements
, parseCollation
, parseCJKOverrides
)
where
import qualified Data.IntSet as IntSet
import qualified Data.Text as T
import qualified Data.Text.Read as TR
import Data.Text (Text)
import Data.Bits ( Bits((.|.), shiftR, (.&.)) )
import Data.List (foldl')
import Text.Collate.UnicodeData (readCodePoints)
import Data.Maybe
import Data.Foldable (minimumBy, maximumBy)
import Data.Word (Word16)
import Data.Binary (Binary(get, put))
import Language.Haskell.TH.Syntax (Lift(..))
import Instances.TH.Lift ()
import qualified Text.Collate.Trie as Trie
import Text.Collate.CanonicalCombiningClass (canonicalCombiningClass)
import Text.Printf
#if MIN_VERSION_base(4,11,0)
#else
import Data.Semigroup (Semigroup(..))
#endif
data CollationElement =
CollationElement
{ CollationElement -> Bool
collationVariable :: !Bool
, CollationElement -> Word16
collationL1 :: {-# UNPACK #-} !Word16
, CollationElement -> Word16
collationL2 :: {-# UNPACK #-} !Word16
, CollationElement -> Word16
collationL3 :: {-# UNPACK #-} !Word16
, CollationElement -> Word16
collationL4 :: {-# UNPACK #-} !Word16
} deriving (CollationElement -> CollationElement -> Bool
(CollationElement -> CollationElement -> Bool)
-> (CollationElement -> CollationElement -> Bool)
-> Eq CollationElement
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CollationElement -> CollationElement -> Bool
$c/= :: CollationElement -> CollationElement -> Bool
== :: CollationElement -> CollationElement -> Bool
$c== :: CollationElement -> CollationElement -> Bool
Eq, CollationElement -> Q Exp
(CollationElement -> Q Exp) -> Lift CollationElement
forall t. (t -> Q Exp) -> Lift t
lift :: CollationElement -> Q Exp
$clift :: CollationElement -> Q Exp
Lift)
instance Ord CollationElement where
compare :: CollationElement -> CollationElement -> Ordering
compare (CollationElement _ p1 :: Word16
p1 s1 :: Word16
s1 t1 :: Word16
t1 q1 :: Word16
q1) (CollationElement _ p2 :: Word16
p2 s2 :: Word16
s2 t2 :: Word16
t2 q2 :: Word16
q2) =
Word16 -> Word16 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16
p1 Word16
p2 Ordering -> Ordering -> Ordering
forall a. Semigroup a => a -> a -> a
<> Word16 -> Word16 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16
s1 Word16
s2 Ordering -> Ordering -> Ordering
forall a. Semigroup a => a -> a -> a
<> Word16 -> Word16 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16
t1 Word16
t2 Ordering -> Ordering -> Ordering
forall a. Semigroup a => a -> a -> a
<> Word16 -> Word16 -> Ordering
forall a. Ord a => a -> a -> Ordering
compare Word16
q1 Word16
q2
instance Show CollationElement where
show :: CollationElement -> String
show (CollationElement v :: Bool
v l1 :: Word16
l1 l2 :: Word16
l2 l3 :: Word16
l3 l4 :: Word16
l4) =
String -> String -> Word16 -> Word16 -> Word16 -> Word16 -> String
forall r. PrintfType r => String -> r
printf "CollationElement %s 0x%04X 0x%04X 0x%04X 0x%04X" (Bool -> String
forall a. Show a => a -> String
show Bool
v) Word16
l1 Word16
l2 Word16
l3 Word16
l4
instance Binary CollationElement where
put :: CollationElement -> Put
put (CollationElement v :: Bool
v w :: Word16
w x :: Word16
x y :: Word16
y z :: Word16
z) = (Bool, Word16, Word16, Word16, Word16) -> Put
forall t. Binary t => t -> Put
put (Bool
v,Word16
w,Word16
x,Word16
y,Word16
z)
get :: Get CollationElement
get = do
(v :: Bool
v,w :: Word16
w,x :: Word16
x,y :: Word16
y,z :: Word16
z) <- Get (Bool, Word16, Word16, Word16, Word16)
forall t. Binary t => Get t
get
CollationElement -> Get CollationElement
forall (m :: * -> *) a. Monad m => a -> m a
return (CollationElement -> Get CollationElement)
-> CollationElement -> Get CollationElement
forall a b. (a -> b) -> a -> b
$ Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
v Word16
w Word16
x Word16
y Word16
z
newtype Collation = Collation { Collation -> Trie [CollationElement]
unCollation :: Trie.Trie [CollationElement] }
deriving (Int -> Collation -> ShowS
[Collation] -> ShowS
Collation -> String
(Int -> Collation -> ShowS)
-> (Collation -> String)
-> ([Collation] -> ShowS)
-> Show Collation
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Collation] -> ShowS
$cshowList :: [Collation] -> ShowS
show :: Collation -> String
$cshow :: Collation -> String
showsPrec :: Int -> Collation -> ShowS
$cshowsPrec :: Int -> Collation -> ShowS
Show, Collation -> Collation -> Bool
(Collation -> Collation -> Bool)
-> (Collation -> Collation -> Bool) -> Eq Collation
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Collation -> Collation -> Bool
$c/= :: Collation -> Collation -> Bool
== :: Collation -> Collation -> Bool
$c== :: Collation -> Collation -> Bool
Eq, Eq Collation
Eq Collation =>
(Collation -> Collation -> Ordering)
-> (Collation -> Collation -> Bool)
-> (Collation -> Collation -> Bool)
-> (Collation -> Collation -> Bool)
-> (Collation -> Collation -> Bool)
-> (Collation -> Collation -> Collation)
-> (Collation -> Collation -> Collation)
-> Ord Collation
Collation -> Collation -> Bool
Collation -> Collation -> Ordering
Collation -> Collation -> Collation
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Collation -> Collation -> Collation
$cmin :: Collation -> Collation -> Collation
max :: Collation -> Collation -> Collation
$cmax :: Collation -> Collation -> Collation
>= :: Collation -> Collation -> Bool
$c>= :: Collation -> Collation -> Bool
> :: Collation -> Collation -> Bool
$c> :: Collation -> Collation -> Bool
<= :: Collation -> Collation -> Bool
$c<= :: Collation -> Collation -> Bool
< :: Collation -> Collation -> Bool
$c< :: Collation -> Collation -> Bool
compare :: Collation -> Collation -> Ordering
$ccompare :: Collation -> Collation -> Ordering
$cp1Ord :: Eq Collation
Ord, Collation -> Q Exp
(Collation -> Q Exp) -> Lift Collation
forall t. (t -> Q Exp) -> Lift t
lift :: Collation -> Q Exp
$clift :: Collation -> Q Exp
Lift, b -> Collation -> Collation
NonEmpty Collation -> Collation
Collation -> Collation -> Collation
(Collation -> Collation -> Collation)
-> (NonEmpty Collation -> Collation)
-> (forall b. Integral b => b -> Collation -> Collation)
-> Semigroup Collation
forall b. Integral b => b -> Collation -> Collation
forall a.
(a -> a -> a)
-> (NonEmpty a -> a)
-> (forall b. Integral b => b -> a -> a)
-> Semigroup a
stimes :: b -> Collation -> Collation
$cstimes :: forall b. Integral b => b -> Collation -> Collation
sconcat :: NonEmpty Collation -> Collation
$csconcat :: NonEmpty Collation -> Collation
<> :: Collation -> Collation -> Collation
$c<> :: Collation -> Collation -> Collation
Semigroup, Semigroup Collation
Collation
Semigroup Collation =>
Collation
-> (Collation -> Collation -> Collation)
-> ([Collation] -> Collation)
-> Monoid Collation
[Collation] -> Collation
Collation -> Collation -> Collation
forall a.
Semigroup a =>
a -> (a -> a -> a) -> ([a] -> a) -> Monoid a
mconcat :: [Collation] -> Collation
$cmconcat :: [Collation] -> Collation
mappend :: Collation -> Collation -> Collation
$cmappend :: Collation -> Collation -> Collation
mempty :: Collation
$cmempty :: Collation
$cp1Monoid :: Semigroup Collation
Monoid)
instance Binary Collation where
put :: Collation -> Put
put (Collation m :: Trie [CollationElement]
m) = Trie [CollationElement] -> Put
forall t. Binary t => t -> Put
put Trie [CollationElement]
m
get :: Get Collation
get = Trie [CollationElement] -> Collation
Collation (Trie [CollationElement] -> Collation)
-> Get (Trie [CollationElement]) -> Get Collation
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Get (Trie [CollationElement])
forall t. Binary t => Get t
get
unfoldCollation :: Collation -> [([Int], [CollationElement])]
unfoldCollation :: Collation -> [([Int], [CollationElement])]
unfoldCollation (Collation trie :: Trie [CollationElement]
trie) = Trie [CollationElement] -> [([Int], [CollationElement])]
forall a. Trie a -> [([Int], a)]
Trie.unfoldTrie Trie [CollationElement]
trie
insertElements :: [Int] -> [CollationElement] -> Collation -> Collation
insertElements :: [Int] -> [CollationElement] -> Collation -> Collation
insertElements codepoints :: [Int]
codepoints els :: [CollationElement]
els (Collation trie :: Trie [CollationElement]
trie) =
Trie [CollationElement] -> Collation
Collation (Trie [CollationElement] -> Collation)
-> Trie [CollationElement] -> Collation
forall a b. (a -> b) -> a -> b
$ [Int]
-> [CollationElement]
-> Trie [CollationElement]
-> Trie [CollationElement]
forall a. [Int] -> a -> Trie a -> Trie a
Trie.insert [Int]
codepoints [CollationElement]
els Trie [CollationElement]
trie
suppressContractions :: [Int] -> Collation -> Collation
suppressContractions :: [Int] -> Collation -> Collation
suppressContractions cps :: [Int]
cps coll :: Collation
coll =
([Int] -> Collation -> Collation)
-> Collation -> [[Int]] -> Collation
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((Maybe [CollationElement] -> Maybe [CollationElement])
-> [Int] -> Collation -> Collation
alterElements (Maybe [CollationElement]
-> Maybe [CollationElement] -> Maybe [CollationElement]
forall a b. a -> b -> a
const Maybe [CollationElement]
forall a. Maybe a
Nothing)) Collation
coll
[[Int]
is | is :: [Int]
is@(i :: Int
i:_:_) <- [[Int]]
collationKeys, Int
i Int -> [Int] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Int]
cps]
where
collationKeys :: [[Int]]
collationKeys = (([Int], [CollationElement]) -> [Int])
-> [([Int], [CollationElement])] -> [[Int]]
forall a b. (a -> b) -> [a] -> [b]
map ([Int], [CollationElement]) -> [Int]
forall a b. (a, b) -> a
fst ([([Int], [CollationElement])] -> [[Int]])
-> [([Int], [CollationElement])] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ Collation -> [([Int], [CollationElement])]
unfoldCollation Collation
coll
alterElements :: (Maybe [CollationElement] -> Maybe [CollationElement])
-> [Int] -> Collation -> Collation
alterElements :: (Maybe [CollationElement] -> Maybe [CollationElement])
-> [Int] -> Collation -> Collation
alterElements f :: Maybe [CollationElement] -> Maybe [CollationElement]
f codepoints :: [Int]
codepoints (Collation trie :: Trie [CollationElement]
trie) =
Trie [CollationElement] -> Collation
Collation (Trie [CollationElement] -> Collation)
-> Trie [CollationElement] -> Collation
forall a b. (a -> b) -> a -> b
$ (Maybe [CollationElement] -> Maybe [CollationElement])
-> [Int] -> Trie [CollationElement] -> Trie [CollationElement]
forall a. (Maybe a -> Maybe a) -> [Int] -> Trie a -> Trie a
Trie.alter Maybe [CollationElement] -> Maybe [CollationElement]
f [Int]
codepoints Trie [CollationElement]
trie
{-# SPECIALIZE matchLongestPrefix
:: Collation -> [Int] -> Maybe ([CollationElement], Int, Collation) #-}
matchLongestPrefix :: Foldable t
=> Collation
-> t Int
-> Maybe ([CollationElement], Int, Collation)
matchLongestPrefix :: Collation -> t Int -> Maybe ([CollationElement], Int, Collation)
matchLongestPrefix (Collation trie :: Trie [CollationElement]
trie) codepoints :: t Int
codepoints =
case Trie [CollationElement]
-> t Int
-> Maybe ([CollationElement], Int, Trie [CollationElement])
forall (t :: * -> *) a.
Foldable t =>
Trie a -> t Int -> Maybe (a, Int, Trie a)
Trie.matchLongestPrefix Trie [CollationElement]
trie t Int
codepoints of
Nothing -> Maybe ([CollationElement], Int, Collation)
forall a. Maybe a
Nothing
Just (els :: [CollationElement]
els, consumed :: Int
consumed, trie' :: Trie [CollationElement]
trie') -> ([CollationElement], Int, Collation)
-> Maybe ([CollationElement], Int, Collation)
forall a. a -> Maybe a
Just ([CollationElement]
els, Int
consumed, Trie [CollationElement] -> Collation
Collation Trie [CollationElement]
trie')
lookupNonEmptyChild :: Collation
-> Int
-> Maybe ([CollationElement], Collation)
lookupNonEmptyChild :: Collation -> Int -> Maybe ([CollationElement], Collation)
lookupNonEmptyChild (Collation trie :: Trie [CollationElement]
trie) point :: Int
point =
case Trie [CollationElement]
-> Int -> Maybe ([CollationElement], Trie [CollationElement])
forall a. Trie a -> Int -> Maybe (a, Trie a)
Trie.lookupNonEmptyChild Trie [CollationElement]
trie Int
point of
Nothing -> Maybe ([CollationElement], Collation)
forall a. Maybe a
Nothing
Just (els :: [CollationElement]
els, trie' :: Trie [CollationElement]
trie') -> ([CollationElement], Collation)
-> Maybe ([CollationElement], Collation)
forall a. a -> Maybe a
Just ([CollationElement]
els, Trie [CollationElement] -> Collation
Collation Trie [CollationElement]
trie')
findFirst :: ([CollationElement] -> Bool)
-> Collation
-> Maybe ([Int], [CollationElement])
findFirst :: ([CollationElement] -> Bool)
-> Collation -> Maybe ([Int], [CollationElement])
findFirst f :: [CollationElement] -> Bool
f (Collation trie :: Trie [CollationElement]
trie) =
case (([Int], [CollationElement])
-> ([Int], [CollationElement]) -> Ordering)
-> [([Int], [CollationElement])] -> ([Int], [CollationElement])
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy ([Int], [CollationElement])
-> ([Int], [CollationElement]) -> Ordering
forall a a.
(a, [CollationElement]) -> (a, [CollationElement]) -> Ordering
comp ([([Int], [CollationElement])] -> ([Int], [CollationElement]))
-> [([Int], [CollationElement])] -> ([Int], [CollationElement])
forall a b. (a -> b) -> a -> b
$ Trie [CollationElement] -> [([Int], [CollationElement])]
forall a. Trie a -> [([Int], a)]
Trie.unfoldTrie Trie [CollationElement]
trie of
(is :: [Int]
is,elts :: [CollationElement]
elts) | [CollationElement] -> Bool
f [CollationElement]
elts -> ([Int], [CollationElement]) -> Maybe ([Int], [CollationElement])
forall a. a -> Maybe a
Just ([Int]
is,[CollationElement]
elts)
_ -> Maybe ([Int], [CollationElement])
forall a. Maybe a
Nothing
where
comp :: (a, [CollationElement]) -> (a, [CollationElement]) -> Ordering
comp (_,x :: [CollationElement]
x) (_,y :: [CollationElement]
y) =
Either [CollationElement] [CollationElement]
-> Either [CollationElement] [CollationElement] -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (if [CollationElement] -> Bool
f [CollationElement]
x then [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. a -> Either a b
Left [CollationElement]
x else [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. b -> Either a b
Right [CollationElement]
x)
(if [CollationElement] -> Bool
f [CollationElement]
y then [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. a -> Either a b
Left [CollationElement]
y else [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. b -> Either a b
Right [CollationElement]
y)
findLast :: ([CollationElement] -> Bool)
-> Collation
-> Maybe ([Int], [CollationElement])
findLast :: ([CollationElement] -> Bool)
-> Collation -> Maybe ([Int], [CollationElement])
findLast f :: [CollationElement] -> Bool
f (Collation trie :: Trie [CollationElement]
trie) =
case (([Int], [CollationElement])
-> ([Int], [CollationElement]) -> Ordering)
-> [([Int], [CollationElement])] -> ([Int], [CollationElement])
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
maximumBy ([Int], [CollationElement])
-> ([Int], [CollationElement]) -> Ordering
forall a a.
(a, [CollationElement]) -> (a, [CollationElement]) -> Ordering
comp ([([Int], [CollationElement])] -> ([Int], [CollationElement]))
-> [([Int], [CollationElement])] -> ([Int], [CollationElement])
forall a b. (a -> b) -> a -> b
$ Trie [CollationElement] -> [([Int], [CollationElement])]
forall a. Trie a -> [([Int], a)]
Trie.unfoldTrie Trie [CollationElement]
trie of
(is :: [Int]
is,elts :: [CollationElement]
elts) | [CollationElement] -> Bool
f [CollationElement]
elts -> ([Int], [CollationElement]) -> Maybe ([Int], [CollationElement])
forall a. a -> Maybe a
Just ([Int]
is,[CollationElement]
elts)
_ -> Maybe ([Int], [CollationElement])
forall a. Maybe a
Nothing
where
comp :: (a, [CollationElement]) -> (a, [CollationElement]) -> Ordering
comp (_,x :: [CollationElement]
x) (_,y :: [CollationElement]
y) =
Either [CollationElement] [CollationElement]
-> Either [CollationElement] [CollationElement] -> Ordering
forall a. Ord a => a -> a -> Ordering
compare (if [CollationElement] -> Bool
f [CollationElement]
x then [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. b -> Either a b
Right [CollationElement]
x else [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. a -> Either a b
Left [CollationElement]
x)
(if [CollationElement] -> Bool
f [CollationElement]
y then [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. b -> Either a b
Right [CollationElement]
y else [CollationElement] -> Either [CollationElement] [CollationElement]
forall a b. a -> Either a b
Left [CollationElement]
y)
getCollationElements :: Collation -> [Int] -> [CollationElement]
getCollationElements :: Collation -> [Int] -> [CollationElement]
getCollationElements collation :: Collation
collation = [Int] -> [CollationElement]
go
where
go :: [Int] -> [CollationElement]
go [] = []
go (c :: Int
c:cs :: [Int]
cs) =
case Collation -> [Int] -> Maybe ([CollationElement], Int, Collation)
forall (t :: * -> *).
Foldable t =>
Collation -> t Int -> Maybe ([CollationElement], Int, Collation)
matchLongestPrefix Collation
collation (Int
cInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Int]
cs) of
Nothing -> Int -> [CollationElement]
calculateImplicitWeight Int
c [CollationElement] -> [CollationElement] -> [CollationElement]
forall a. [a] -> [a] -> [a]
++ [Int] -> [CollationElement]
go [Int]
cs
Just (elts :: [CollationElement]
elts, consumed :: Int
consumed, subcollation :: Collation
subcollation)
-> [CollationElement]
elts' [CollationElement] -> [CollationElement] -> [CollationElement]
forall a. [a] -> [a] -> [a]
++ [Int] -> [CollationElement]
go ([Int]
unblockedNonStarters' [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
++ [Int]
is')
where
getUnblockedNonStarters :: Int -> [Int] -> ([Int], [Int])
getUnblockedNonStarters _ [] = ([], [])
getUnblockedNonStarters n :: Int
n (x :: Int
x:xs :: [Int]
xs)
= case Int -> Int
canonicalCombiningClass Int
x of
ccc :: Int
ccc
| Int
ccc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
n,
(xs' :: [Int]
xs', rest :: [Int]
rest) <- Int -> [Int] -> ([Int], [Int])
getUnblockedNonStarters Int
ccc [Int]
xs
-> (Int
x Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int]
xs', [Int]
rest)
| Bool
otherwise -> ([], Int
x Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: [Int]
xs)
(unblockedNonStarters :: [Int]
unblockedNonStarters, is' :: [Int]
is') = Int -> [Int] -> ([Int], [Int])
getUnblockedNonStarters 0
(Int -> [Int] -> [Int]
forall a. Int -> [a] -> [a]
drop Int
consumed (Int
cInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Int]
cs))
(elts' :: [CollationElement]
elts', unblockedNonStarters' :: [Int]
unblockedNonStarters') =
[CollationElement]
-> [Int] -> Collation -> ([CollationElement], [Int])
extendMatch [CollationElement]
elts [Int]
unblockedNonStarters Collation
subcollation
popExtender :: [Int] -> Collation -> Maybe ([CollationElement], [Int], Collation)
popExtender = ([Int] -> [Int])
-> [Int]
-> Collation
-> Maybe ([CollationElement], [Int], Collation)
forall c.
([Int] -> c)
-> [Int] -> Collation -> Maybe ([CollationElement], c, Collation)
popExtender' [Int] -> [Int]
forall a. a -> a
id
popExtender' :: ([Int] -> c)
-> [Int] -> Collation -> Maybe ([CollationElement], c, Collation)
popExtender' _ [] _ = Maybe ([CollationElement], c, Collation)
forall a. Maybe a
Nothing
popExtender' acc :: [Int] -> c
acc (x :: Int
x:xs :: [Int]
xs) subc :: Collation
subc
= case Collation -> Int -> Maybe ([CollationElement], Collation)
lookupNonEmptyChild Collation
subc Int
x of
Just (es' :: [CollationElement]
es', subc' :: Collation
subc') -> ([CollationElement], c, Collation)
-> Maybe ([CollationElement], c, Collation)
forall a. a -> Maybe a
Just ([CollationElement]
es', [Int] -> c
acc [Int]
xs, Collation
subc')
Nothing -> ([Int] -> c)
-> [Int] -> Collation -> Maybe ([CollationElement], c, Collation)
popExtender' ([Int] -> c
acc ([Int] -> c) -> ([Int] -> [Int]) -> [Int] -> c
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int
x Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:)) [Int]
xs Collation
subc
extendMatch :: [CollationElement]
-> [Int] -> Collation -> ([CollationElement], [Int])
extendMatch es :: [CollationElement]
es ubs :: [Int]
ubs subc :: Collation
subc = case [Int] -> Collation -> Maybe ([CollationElement], [Int], Collation)
popExtender [Int]
ubs Collation
subc of
Just (es' :: [CollationElement]
es', ubs' :: [Int]
ubs', subc' :: Collation
subc') -> [CollationElement]
-> [Int] -> Collation -> ([CollationElement], [Int])
extendMatch [CollationElement]
es' [Int]
ubs' Collation
subc'
Nothing -> ([CollationElement]
es, [Int]
ubs)
calculateImplicitWeight :: Int -> [CollationElement]
calculateImplicitWeight :: Int -> [CollationElement]
calculateImplicitWeight cp :: Int
cp =
[Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
False (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
aaaa) 0x0020 0x0002 0xFFFF,
Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
False (Int -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
bbbb) 0 0 0xFFFF]
where
range :: Int -> Int -> IntSet
range x :: Int
x y :: Int
y = [Int] -> IntSet
IntSet.fromList [Int
x..Int
y]
singleton :: Int -> IntSet
singleton = Int -> IntSet
IntSet.singleton
union :: IntSet -> IntSet -> IntSet
union = IntSet -> IntSet -> IntSet
IntSet.union
unifiedIdeographs :: IntSet
unifiedIdeographs = Int -> Int -> IntSet
range 0x3400 0x4DBF IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0x4E00 0x9FFC IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0xFA0E 0xFA0F IntSet -> IntSet -> IntSet
`union`
Int -> IntSet
singleton 0xFA11 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0xFA13 0xFA14 IntSet -> IntSet -> IntSet
`union`
Int -> IntSet
singleton 0xFA1F IntSet -> IntSet -> IntSet
`union`
Int -> IntSet
singleton 0xFA21 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0xFA23 0xFA24 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0xFA27 0xFA29 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0x20000 0x2A6DD IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0x2A700 0x2B734 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0x2B740 0x2B81D IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0x2B820 0x2CEA1 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0x2CEB0 0x2EBE0 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0x2CEB0 0x2EBE0 IntSet -> IntSet -> IntSet
`union`
Int -> Int -> IntSet
range 0x30000 0x3134A
cjkCompatibilityIdeographs :: IntSet
cjkCompatibilityIdeographs = Int -> Int -> IntSet
range 0xF900 0xFAFF
cjkUnifiedIdeographs :: IntSet
cjkUnifiedIdeographs = Int -> Int -> IntSet
range 0x4E00 0x9FFF
(aaaa :: Int
aaaa, bbbb :: Int
bbbb) =
case Int
cp of
_ | Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0x17000 , Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x18AFF
-> (0xFB00, (Int
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x17000) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. 0x8000)
| Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0x18D00 , Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x18D8F
-> (0xFB00, (Int
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x17000) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. 0x8000)
| Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0x1B170 , Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x1B2FF
-> (0xFB01, (Int
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x1B170) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. 0x8000)
| Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= 0x18B00 , Int
cp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= 0x18CFF
-> (0xFB02, (Int
cp Int -> Int -> Int
forall a. Num a => a -> a -> a
- 0x18B00) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. 0x8000)
| Int
cp Int -> IntSet -> Bool
`IntSet.member` IntSet
unifiedIdeographs Bool -> Bool -> Bool
&&
(Int
cp Int -> IntSet -> Bool
`IntSet.member` IntSet
cjkUnifiedIdeographs Bool -> Bool -> Bool
||
Int
cp Int -> IntSet -> Bool
`IntSet.member` IntSet
cjkCompatibilityIdeographs)
-> (0xFB40 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
cp Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 15), (Int
cp Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. 0x7FFF) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. 0x8000)
| Int
cp Int -> IntSet -> Bool
`IntSet.member` IntSet
unifiedIdeographs
-> (0xFB80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
cp Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 15), (Int
cp Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. 0x7FFF) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. 0x8000)
| Bool
otherwise
-> (0xFBC0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
cp Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` 15), (Int
cp Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. 0x7FFFF) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. 0x8000)
parseCollation :: Text -> Collation
parseCollation :: Text -> Collation
parseCollation = (Collation -> Text -> Collation)
-> Collation -> [Text] -> Collation
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Collation -> Text -> Collation
processLine Collation
forall a. Monoid a => a
mempty ([Text] -> Collation) -> (Text -> [Text]) -> Text -> Collation
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
where
processLine :: Collation -> Text -> Collation
processLine trie :: Collation
trie t :: Text
t =
case Text -> ([Int], Text)
readCodePoints Text
t of
([],_) -> Collation
trie
(c :: Int
c:cs :: [Int]
cs, rest :: Text
rest) -> [Int] -> [CollationElement] -> Collation -> Collation
insertElements (Int
cInt -> [Int] -> [Int]
forall a. a -> [a] -> [a]
:[Int]
cs) (Text -> [CollationElement]
go Text
rest) Collation
trie
go :: Text -> [CollationElement]
go t :: Text
t =
case (Char -> Bool) -> Text -> (Text, Text)
T.break (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== ']') (Int -> Text -> Text
T.drop 1 (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> Text -> Text
T.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= '[') Text
t) of
(contents :: Text
contents, rest :: Text
rest)
| Text -> Bool
T.null Text
rest -> []
| Bool
otherwise -> Text -> CollationElement
parseContents Text
contents CollationElement -> [CollationElement] -> [CollationElement]
forall a. a -> [a] -> [a]
: Text -> [CollationElement]
go Text
rest
parseContents :: Text -> CollationElement
parseContents t :: Text
t =
let isVariable :: Bool
isVariable = Bool -> Bool
not (Text -> Bool
T.null Text
t) Bool -> Bool -> Bool
&& Text -> Char
T.head Text
t Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== '*'
isIgnorable :: (a, a, a) -> Bool
isIgnorable (0,0,0) = Bool
True
isIgnorable _ = Bool
False
in case (Text -> Either String (Word16, Text))
-> [Text] -> [Either String (Word16, Text)]
forall a b. (a -> b) -> [a] -> [b]
map Text -> Either String (Word16, Text)
forall a. Integral a => Reader a
TR.hexadecimal ([Text] -> [Either String (Word16, Text)])
-> [Text] -> [Either String (Word16, Text)]
forall a b. (a -> b) -> a -> b
$ (Text -> Bool) -> [Text] -> [Text]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (Text -> Bool) -> Text -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null)
((Char -> Bool) -> Text -> [Text]
T.split Char -> Bool
isSep Text
t) of
[Right (x :: Word16
x,_), Right (y :: Word16
y,_), Right (z :: Word16
z,_)]
-> Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
isVariable Word16
x Word16
y Word16
z
(if Bool
isVariable Bool -> Bool -> Bool
|| (Word16, Word16, Word16) -> Bool
forall a a a.
(Eq a, Eq a, Eq a, Num a, Num a, Num a) =>
(a, a, a) -> Bool
isIgnorable (Word16
x,Word16
y,Word16
z)
then 0
else 0xFFFF)
_ -> Bool -> Word16 -> Word16 -> Word16 -> Word16 -> CollationElement
CollationElement Bool
isVariable 0 0 0 0
isSep :: Char -> Bool
isSep '*' = Bool
True
isSep '.' = Bool
True
isSep _ = Bool
False
parseCJKOverrides :: Text -> [Int]
parseCJKOverrides :: Text -> [Int]
parseCJKOverrides = (Text -> Maybe Int) -> [Text] -> [Int]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Text -> Maybe Int
forall a. Integral a => Text -> Maybe a
chunkToCp ([Text] -> [Int]) -> (Text -> [Text]) -> Text -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.words
where
chunkToCp :: Text -> Maybe a
chunkToCp t :: Text
t =
case Reader a
forall a. Integral a => Reader a
TR.hexadecimal Text
t of
Right (x :: a
x,rest :: Text
rest)
| Text -> Bool
T.null Text
rest -> a -> Maybe a
forall a. a -> Maybe a
Just a
x
_ -> Maybe a
forall a. Maybe a
Nothing