You can use a bit different approach.
There is a trick in proof that a finite product of countable sets is countable:
We can map the sequence [a1, ..., an]
to Nat
by product . zipWith (^) primes
: 2 ^ a1 * 3 ^ a2 * 5 ^ a3 * ... * primen ^ an
.
To avoid problems with sequences with zero at the end, we can increase the last index.
As the sequence is ordered, we can exploit the property as user5402 mentioned.
The benefit of using the tree, is that you can increase branching to speed-up traversal. OTOH prime trick could make indexes quite big, but hopefully some tree paths will just be unexplored (remain as thunks).
{-# LANGUAGE BangPatterns #-}
-- Modified from Kmett's answer:
data Tree a = Tree a (Tree a) (Tree a) (Tree a) (Tree a)
instance Functor Tree where
fmap f (Tree x a b c d) = Tree (f x) (fmap f a) (fmap f b) (fmap f c) (fmap f d)
index :: Tree a -> Integer -> a
index (Tree x _ _ _ _) 0 = x
index (Tree _ a b c d) n = case (n - 1) `divMod` 4 of
(q,0) -> index a q
(q,1) -> index b q
(q,2) -> index c q
(q,3) -> index d q
nats :: Tree Integer
nats = go 0 1
where
go !n !s = Tree n (go a s') (go b s') (go c s') (go d s')
where
a = n + s
b = a + s
c = b + s
d = c + s
s' = s * 4
toList :: Tree a -> [a]
toList as = map (index as) [0..]
-- Primes -- https://www.haskell.org/haskellwiki/Prime_numbers
-- Generation and factorisation could be done much better
minus (x:xs) (y:ys) = case (compare x y) of
LT -> x : minus xs (y:ys)
EQ -> minus xs ys
GT -> minus (x:xs) ys
minus xs _ = xs
primes = 2 : sieve [3..] primes
where
sieve xs (p:ps) | q <- p*p , (h,t) <- span (< q) xs =
h ++ sieve (t `minus` [q, q+p..]) ps
addToLast :: [Integer] -> [Integer]
addToLast [] = []
addToLast [x] = [x + 1]
addToLast (x:xs) = x : addToLast xs
subFromLast :: [Integer] -> [Integer]
subFromLast [] = []
subFromLast [x] = [x - 1]
subFromLast (x:xs) = x : subFromLast xs
addSubProp :: [NonNegative Integer] -> Property
addSubProp xs = xs' === subFromLast (addToLast xs')
where xs' = map getNonNegative xs
-- Trick from user5402 answer
toDiffList :: [Integer] -> [Integer]
toDiffList = toDiffList' 0
where toDiffList' _ [] = []
toDiffList' p (x:xs) = x - p : toDiffList' x xs
fromDiffList :: [Integer] -> [Integer]
fromDiffList = fromDiffList' 0
where fromDiffList' _ [] = []
fromDiffList' p (x:xs) = p + x : fromDiffList' (x + p) xs
diffProp :: [Integer] -> Property
diffProp xs = xs === fromDiffList (toDiffList xs)
listToInteger :: [Integer] -> Integer
listToInteger = product . zipWith (^) primes . addToLast
integerToList :: Integer -> [Integer]
integerToList = subFromLast . impl primes 0
where impl _ _ 0 = []
impl _ 0 1 = []
impl _ k 1 = [k]
impl (p:ps) k n = case n `divMod` p of
(n', 0) -> impl (p:ps) (k + 1) n'
(_, _) -> k : impl ps 0 n
listProp :: [NonNegative Integer] -> Property
listProp xs = xs' === integerToList (listToInteger xs')
where xs' = map getNonNegative xs
toIndex :: [Integer] -> Integer
toIndex = listToInteger . toDiffList
fromIndex :: Integer -> [Integer]
fromIndex = fromDiffList . integerToList
-- [1,0] /= [0]
-- Decreasing sequence!
doesntHold :: [NonNegative Integer] -> Property
doesntHold xs = xs' === fromIndex (toIndex xs')
where xs' = map getNonNegative xs
holds :: [NonNegative Integer] -> Property
holds xs = xs' === fromIndex (toIndex xs')
where xs' = sort $ map getNonNegative xs
g :: ([Integer] -> Integer) -> [Integer] -> Integer
g mg = g' . sort
where g' [] = 0
g' (x:xs) = x + sum (map mg $ tails xs)
g_tree :: Tree Integer
g_tree = fmap (g faster_g' . fromIndex) nats
faster_g' :: [Integer] -> Integer
faster_g' = index g_tree . toIndex
faster_g = faster_g' . sort
On my machine fix g [1..22]
feels slow, when faster_g [1..40]
is still blazing fast.
Addition: if we have bounded set (with indexes 0..n-1) , we can encode it as: a0 * n^0 + a1 * n^1 ...
.
We can encode any Integer
as binary list, e.g. 11
is [1, 1, 0, 1]
(least bit first).
Then if we separate integers in the list with 2
, we get sequence of bounded values.
As bonus we can take the sequence of 0, 1, 2 digits and compress it to binary using e.g. Huffman encoding, as 2 is much rarer than 0 or 1. But this might be overkill.
With this trick, indexes stay much smaller and the space probably is better packed.
{-# LANGUAGE BangPatterns #-}
-- From Kment's answer:
import Data.Function (fix)
import Data.List (sort, tails)
import Data.List.Split (splitOn)
import Test.QuickCheck
{-- Tree definition as before --}
-- 0, 1, 2
newtype N3 = N3 { unN3 :: Integer }
deriving (Eq, Show)
instance Arbitrary N3 where
arbitrary = elements $ map N3 [ 0, 1, 2 ]
-- Integer <-> N3
coeffs3 :: [Integer]
coeffs3 = coeffs' 1
where coeffs' n = n : coeffs' (n * 3)
listToInteger :: [N3] -> Integer
listToInteger = sum . zipWith f coeffs3
where f n (N3 m) = n * m
listFromInteger :: Integer -> [N3]
listFromInteger 0 = []
listFromInteger n = case n `divMod` 3 of
(q, m) -> N3 m : listFromInteger q
listProp :: [N3] -> Property
listProp xs = (null xs || last xs /= N3 0) ==> xs === listFromInteger (listToInteger xs)
-- Integer <-> N2
-- 0, 1
newtype N2 = N2 { unN2 :: Integer }
deriving (Eq, Show)
coeffs2 :: [Integer]
coeffs2 = coeffs' 1
where coeffs' n = n : coeffs' (n * 2)
integerToBin :: Integer -> [N2]
integerToBin 0 = []
integerToBin n = case n `divMod` 2 of
(q, m) -> N2 m : integerToBin q
integerFromBin :: [N2] -> Integer
integerFromBin = sum . zipWith f coeffs2
where f n (N2 m) = n * m
binProp :: NonNegative Integer -> Property
binProp (NonNegative n) = n === integerFromBin (integerToBin n)
-- unsafe!
n3ton2 :: N3 -> N2
n3ton2 = N2 . unN3
n2ton3 :: N2 -> N3
n2ton3 = N3 . unN2
-- [Integer] <-> [N3]
integerListToN3List :: [Integer] -> [N3]
integerListToN3List = concatMap (++ [N3 2]) . map (map n2ton3 . integerToBin)
integerListFromN3List :: [N3] -> [Integer]
integerListFromN3List = init . map (integerFromBin . map n3ton2) . splitOn [N3 2]
n3ListProp :: [NonNegative Integer] -> Property
n3ListProp xs = xs' === integerListFromN3List (integerListToN3List xs')
where xs' = map getNonNegative xs
-- Trick from user5402 answer
-- Integer <-> Sorted Integer
toDiffList :: [Integer] -> [Integer]
toDiffList = toDiffList' 0
where toDiffList' _ [] = []
toDiffList' p (x:xs) = x - p : toDiffList' x xs
fromDiffList :: [Integer] -> [Integer]
fromDiffList = fromDiffList' 0
where fromDiffList' _ [] = []
fromDiffList' p (x:xs) = p + x : fromDiffList' (x + p) xs
diffProp :: [Integer] -> Property
diffProp xs = xs === fromDiffList (toDiffList xs)
---
toIndex :: [Integer] -> Integer
toIndex = listToInteger . integerListToN3List . toDiffList
fromIndex :: Integer -> [Integer]
fromIndex = fromDiffList . integerListFromN3List . listFromInteger
-- [1,0] /= [0]
-- Decreasing sequence! doesn't terminate in this case
doesntHold :: [NonNegative Integer] -> Property
doesntHold xs = xs' === fromIndex (toIndex xs')
where xs' = map getNonNegative xs
holds :: [NonNegative Integer] -> Property
holds xs = xs' === fromIndex (toIndex xs')
where xs' = sort $ map getNonNegative xs
g :: ([Integer] -> Integer) -> [Integer] -> Integer
g mg = g' . sort
where g' [] = 0
g' (x:xs) = x + sum (map mg $ tails xs)
g_tree :: Tree Integer
g_tree = fmap (g faster_g' . fromIndex) nats
faster_g' :: [Integer] -> Integer
faster_g' = index g_tree . toIndex
faster_g = faster_g' . sort
Second addition:
I quickly benchmarked graph and binary sequence approach for my g
with:
main :: IO ()
main = do
n <- read . head <$> getArgs
print $ faster_g [100, 110..n]
And the results are:
% time ./IntegerMemo 1000
1225560638892526472150132981770
./IntegerMemo 1000 0.19s user 0.01s system 98% cpu 0.200 total
% time ./IntegerMemo 2000
3122858113354873680008305238045814042010921833620857170165770
./IntegerMemo 2000 1.83s user 0.05s system 99% cpu 1.888 total
% time ./IntegerMemo 2500
4399449191298176980662410776849867104410434903220291205722799441218623242250
./IntegerMemo 2500 3.74s user 0.09s system 99% cpu 3.852 total
% time ./IntegerMemo 3000
5947985907461048240178371687835977247601455563536278700587949163642187584269899171375349770
./IntegerMemo 3000 6.66s user 0.13s system 99% cpu 6.830 total
% time ./IntegerMemoGrap 1000
1225560638892526472150132981770
./IntegerMemoGrap 1000 0.10s user 0.01s system 97% cpu 0.113 total
% time ./IntegerMemoGrap 2000
3122858113354873680008305238045814042010921833620857170165770
./IntegerMemoGrap 2000 0.97s user 0.04s system 98% cpu 1.028 total
% time ./IntegerMemoGrap 2500
4399449191298176980662410776849867104410434903220291205722799441218623242250
./IntegerMemoGrap 2500 2.11s user 0.08s system 99% cpu 2.202 total
% time ./IntegerMemoGrap 3000
5947985907461048240178371687835977247601455563536278700587949163642187584269899171375349770
./IntegerMemoGrap 3000 3.33s user 0.09s system 99% cpu 3.452 total
Looks like that graph version is faster by constant factor of 2. But they seem to have same time complexity :)