PageRenderTime 53ms CodeModel.GetById 25ms RepoModel.GetById 0ms app.codeStats 0ms

/compiler/utils/UniqDFM.hs

http://github.com/ghc/ghc
Haskell | 392 lines | 199 code | 62 blank | 131 comment | 2 complexity | 81f6888b0745bc168bb6455a060d0c48 MD5 | raw file
Possible License(s): MIT, BSD-3-Clause, GPL-3.0
  1. {-
  2. (c) Bartosz Nitka, Facebook, 2015
  3. UniqDFM: Specialised deterministic finite maps, for things with @Uniques@.
  4. Basically, the things need to be in class @Uniquable@, and we use the
  5. @getUnique@ method to grab their @Uniques@.
  6. This is very similar to @UniqFM@, the major difference being that the order of
  7. folding is not dependent on @Unique@ ordering, giving determinism.
  8. Currently the ordering is determined by insertion order.
  9. See Note [Unique Determinism] in Unique for explanation why @Unique@ ordering
  10. is not deterministic.
  11. -}
  12. {-# LANGUAGE DeriveDataTypeable #-}
  13. {-# LANGUAGE DeriveFunctor #-}
  14. {-# LANGUAGE FlexibleContexts #-}
  15. {-# OPTIONS_GHC -Wall #-}
  16. module UniqDFM (
  17. -- * Unique-keyed deterministic mappings
  18. UniqDFM, -- abstract type
  19. -- ** Manipulating those mappings
  20. emptyUDFM,
  21. unitUDFM,
  22. addToUDFM,
  23. addToUDFM_C,
  24. addListToUDFM,
  25. delFromUDFM,
  26. delListFromUDFM,
  27. adjustUDFM,
  28. alterUDFM,
  29. mapUDFM,
  30. plusUDFM,
  31. plusUDFM_C,
  32. lookupUDFM, lookupUDFM_Directly,
  33. elemUDFM,
  34. foldUDFM,
  35. eltsUDFM,
  36. filterUDFM, filterUDFM_Directly,
  37. isNullUDFM,
  38. sizeUDFM,
  39. intersectUDFM, udfmIntersectUFM,
  40. intersectsUDFM,
  41. disjointUDFM, disjointUdfmUfm,
  42. minusUDFM,
  43. listToUDFM,
  44. udfmMinusUFM,
  45. partitionUDFM,
  46. anyUDFM, allUDFM,
  47. pprUDFM,
  48. udfmToList,
  49. udfmToUfm,
  50. nonDetFoldUDFM,
  51. alwaysUnsafeUfmToUdfm,
  52. ) where
  53. import Unique ( Uniquable(..), Unique, getKey )
  54. import Outputable
  55. import qualified Data.IntMap as M
  56. import Data.Data
  57. import Data.List (sortBy)
  58. import Data.Function (on)
  59. import UniqFM (UniqFM, listToUFM_Directly, nonDetUFMToList, ufmToIntMap)
  60. -- Note [Deterministic UniqFM]
  61. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  62. -- A @UniqDFM@ is just like @UniqFM@ with the following additional
  63. -- property: the function `udfmToList` returns the elements in some
  64. -- deterministic order not depending on the Unique key for those elements.
  65. --
  66. -- If the client of the map performs operations on the map in deterministic
  67. -- order then `udfmToList` returns them in deterministic order.
  68. --
  69. -- There is an implementation cost: each element is given a serial number
  70. -- as it is added, and `udfmToList` sorts it's result by this serial
  71. -- number. So you should only use `UniqDFM` if you need the deterministic
  72. -- property.
  73. --
  74. -- `foldUDFM` also preserves determinism.
  75. --
  76. -- Normal @UniqFM@ when you turn it into a list will use
  77. -- Data.IntMap.toList function that returns the elements in the order of
  78. -- the keys. The keys in @UniqFM@ are always @Uniques@, so you end up with
  79. -- with a list ordered by @Uniques@.
  80. -- The order of @Uniques@ is known to be not stable across rebuilds.
  81. -- See Note [Unique Determinism] in Unique.
  82. --
  83. --
  84. -- There's more than one way to implement this. The implementation here tags
  85. -- every value with the insertion time that can later be used to sort the
  86. -- values when asked to convert to a list.
  87. --
  88. -- An alternative would be to have
  89. --
  90. -- data UniqDFM ele = UDFM (M.IntMap ele) [ele]
  91. --
  92. -- where the list determines the order. This makes deletion tricky as we'd
  93. -- only accumulate elements in that list, but makes merging easier as you
  94. -- can just merge both structures independently.
  95. -- Deletion can probably be done in amortized fashion when the size of the
  96. -- list is twice the size of the set.
  97. -- | A type of values tagged with insertion time
  98. data TaggedVal val =
  99. TaggedVal
  100. val
  101. {-# UNPACK #-} !Int -- ^ insertion time
  102. deriving Data
  103. taggedFst :: TaggedVal val -> val
  104. taggedFst (TaggedVal v _) = v
  105. taggedSnd :: TaggedVal val -> Int
  106. taggedSnd (TaggedVal _ i) = i
  107. instance Eq val => Eq (TaggedVal val) where
  108. (TaggedVal v1 _) == (TaggedVal v2 _) = v1 == v2
  109. instance Functor TaggedVal where
  110. fmap f (TaggedVal val i) = TaggedVal (f val) i
  111. -- | Type of unique deterministic finite maps
  112. data UniqDFM ele =
  113. UDFM
  114. !(M.IntMap (TaggedVal ele)) -- A map where keys are Unique's values and
  115. -- values are tagged with insertion time.
  116. -- The invariant is that all the tags will
  117. -- be distinct within a single map
  118. {-# UNPACK #-} !Int -- Upper bound on the values' insertion
  119. -- time. See Note [Overflow on plusUDFM]
  120. deriving (Data, Functor)
  121. emptyUDFM :: UniqDFM elt
  122. emptyUDFM = UDFM M.empty 0
  123. unitUDFM :: Uniquable key => key -> elt -> UniqDFM elt
  124. unitUDFM k v = UDFM (M.singleton (getKey $ getUnique k) (TaggedVal v 0)) 1
  125. addToUDFM :: Uniquable key => UniqDFM elt -> key -> elt -> UniqDFM elt
  126. addToUDFM (UDFM m i) k v =
  127. UDFM (M.insert (getKey $ getUnique k) (TaggedVal v i) m) (i + 1)
  128. addToUDFM_Directly :: UniqDFM elt -> Unique -> elt -> UniqDFM elt
  129. addToUDFM_Directly (UDFM m i) u v =
  130. UDFM (M.insert (getKey u) (TaggedVal v i) m) (i + 1)
  131. addToUDFM_Directly_C
  132. :: (elt -> elt -> elt) -> UniqDFM elt -> Unique -> elt -> UniqDFM elt
  133. addToUDFM_Directly_C f (UDFM m i) u v =
  134. UDFM (M.insertWith tf (getKey u) (TaggedVal v i) m) (i + 1)
  135. where
  136. tf (TaggedVal a j) (TaggedVal b _) = TaggedVal (f a b) j
  137. addListToUDFM :: Uniquable key => UniqDFM elt -> [(key,elt)] -> UniqDFM elt
  138. addListToUDFM = foldl (\m (k, v) -> addToUDFM m k v)
  139. addToUDFM_C
  140. :: Uniquable key => (elt -> elt -> elt) -- old -> new -> result
  141. -> UniqDFM elt -- old
  142. -> key -> elt -- new
  143. -> UniqDFM elt -- result
  144. addToUDFM_C f (UDFM m i) k v =
  145. UDFM (M.insertWith tf (getKey $ getUnique k) (TaggedVal v i) m) (i + 1)
  146. where
  147. tf (TaggedVal a j) (TaggedVal b _) = TaggedVal (f b a) j
  148. -- Flip the arguments, just like
  149. -- addToUFM_C does.
  150. addListToUDFM_Directly :: UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
  151. addListToUDFM_Directly = foldl (\m (k, v) -> addToUDFM_Directly m k v)
  152. addListToUDFM_Directly_C
  153. :: (elt -> elt -> elt) -> UniqDFM elt -> [(Unique,elt)] -> UniqDFM elt
  154. addListToUDFM_Directly_C f = foldl (\m (k, v) -> addToUDFM_Directly_C f m k v)
  155. delFromUDFM :: Uniquable key => UniqDFM elt -> key -> UniqDFM elt
  156. delFromUDFM (UDFM m i) k = UDFM (M.delete (getKey $ getUnique k) m) i
  157. plusUDFM_C :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
  158. plusUDFM_C f udfml@(UDFM _ i) udfmr@(UDFM _ j)
  159. -- we will use the upper bound on the tag as a proxy for the set size,
  160. -- to insert the smaller one into the bigger one
  161. | i > j = insertUDFMIntoLeft_C f udfml udfmr
  162. | otherwise = insertUDFMIntoLeft_C f udfmr udfml
  163. -- Note [Overflow on plusUDFM]
  164. -- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
  165. -- There are multiple ways of implementing plusUDFM.
  166. -- The main problem that needs to be solved is overlap on times of
  167. -- insertion between different keys in two maps.
  168. -- Consider:
  169. --
  170. -- A = fromList [(a, (x, 1))]
  171. -- B = fromList [(b, (y, 1))]
  172. --
  173. -- If you merge them naively you end up with:
  174. --
  175. -- C = fromList [(a, (x, 1)), (b, (y, 1))]
  176. --
  177. -- Which loses information about ordering and brings us back into
  178. -- non-deterministic world.
  179. --
  180. -- The solution I considered before would increment the tags on one of the
  181. -- sets by the upper bound of the other set. The problem with this approach
  182. -- is that you'll run out of tags for some merge patterns.
  183. -- Say you start with A with upper bound 1, you merge A with A to get A' and
  184. -- the upper bound becomes 2. You merge A' with A' and the upper bound
  185. -- doubles again. After 64 merges you overflow.
  186. -- This solution would have the same time complexity as plusUFM, namely O(n+m).
  187. --
  188. -- The solution I ended up with has time complexity of
  189. -- O(m log m + m * min (n+m, W)) where m is the smaller set.
  190. -- It simply inserts the elements of the smaller set into the larger
  191. -- set in the order that they were inserted into the smaller set. That's
  192. -- O(m log m) for extracting the elements from the smaller set in the
  193. -- insertion order and O(m * min(n+m, W)) to insert them into the bigger
  194. -- set.
  195. plusUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
  196. plusUDFM udfml@(UDFM _ i) udfmr@(UDFM _ j)
  197. -- we will use the upper bound on the tag as a proxy for the set size,
  198. -- to insert the smaller one into the bigger one
  199. | i > j = insertUDFMIntoLeft udfml udfmr
  200. | otherwise = insertUDFMIntoLeft udfmr udfml
  201. insertUDFMIntoLeft :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
  202. insertUDFMIntoLeft udfml udfmr = addListToUDFM_Directly udfml $ udfmToList udfmr
  203. insertUDFMIntoLeft_C
  204. :: (elt -> elt -> elt) -> UniqDFM elt -> UniqDFM elt -> UniqDFM elt
  205. insertUDFMIntoLeft_C f udfml udfmr =
  206. addListToUDFM_Directly_C f udfml $ udfmToList udfmr
  207. lookupUDFM :: Uniquable key => UniqDFM elt -> key -> Maybe elt
  208. lookupUDFM (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey $ getUnique k) m
  209. lookupUDFM_Directly :: UniqDFM elt -> Unique -> Maybe elt
  210. lookupUDFM_Directly (UDFM m _i) k = taggedFst `fmap` M.lookup (getKey k) m
  211. elemUDFM :: Uniquable key => key -> UniqDFM elt -> Bool
  212. elemUDFM k (UDFM m _i) = M.member (getKey $ getUnique k) m
  213. -- | Performs a deterministic fold over the UniqDFM.
  214. -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
  215. foldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
  216. foldUDFM k z m = foldr k z (eltsUDFM m)
  217. -- | Performs a nondeterministic fold over the UniqDFM.
  218. -- It's O(n), same as the corresponding function on `UniqFM`.
  219. -- If you use this please provide a justification why it doesn't introduce
  220. -- nondeterminism.
  221. nonDetFoldUDFM :: (elt -> a -> a) -> a -> UniqDFM elt -> a
  222. nonDetFoldUDFM k z (UDFM m _i) = foldr k z $ map taggedFst $ M.elems m
  223. eltsUDFM :: UniqDFM elt -> [elt]
  224. eltsUDFM (UDFM m _i) =
  225. map taggedFst $ sortBy (compare `on` taggedSnd) $ M.elems m
  226. filterUDFM :: (elt -> Bool) -> UniqDFM elt -> UniqDFM elt
  227. filterUDFM p (UDFM m i) = UDFM (M.filter (\(TaggedVal v _) -> p v) m) i
  228. filterUDFM_Directly :: (Unique -> elt -> Bool) -> UniqDFM elt -> UniqDFM elt
  229. filterUDFM_Directly p (UDFM m i) = UDFM (M.filterWithKey p' m) i
  230. where
  231. p' k (TaggedVal v _) = p (getUnique k) v
  232. -- | Converts `UniqDFM` to a list, with elements in deterministic order.
  233. -- It's O(n log n) while the corresponding function on `UniqFM` is O(n).
  234. udfmToList :: UniqDFM elt -> [(Unique, elt)]
  235. udfmToList (UDFM m _i) =
  236. [ (getUnique k, taggedFst v)
  237. | (k, v) <- sortBy (compare `on` (taggedSnd . snd)) $ M.toList m ]
  238. isNullUDFM :: UniqDFM elt -> Bool
  239. isNullUDFM (UDFM m _) = M.null m
  240. sizeUDFM :: UniqDFM elt -> Int
  241. sizeUDFM (UDFM m _i) = M.size m
  242. intersectUDFM :: UniqDFM elt -> UniqDFM elt -> UniqDFM elt
  243. intersectUDFM (UDFM x i) (UDFM y _j) = UDFM (M.intersection x y) i
  244. -- M.intersection is left biased, that means the result will only have
  245. -- a subset of elements from the left set, so `i` is a good upper bound.
  246. udfmIntersectUFM :: UniqDFM elt -> UniqFM elt -> UniqDFM elt
  247. udfmIntersectUFM (UDFM x i) y = UDFM (M.intersection x (ufmToIntMap y)) i
  248. -- M.intersection is left biased, that means the result will only have
  249. -- a subset of elements from the left set, so `i` is a good upper bound.
  250. intersectsUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
  251. intersectsUDFM x y = isNullUDFM (x `intersectUDFM` y)
  252. disjointUDFM :: UniqDFM elt -> UniqDFM elt -> Bool
  253. disjointUDFM (UDFM x _i) (UDFM y _j) = M.null (M.intersection x y)
  254. disjointUdfmUfm :: UniqDFM elt -> UniqFM elt2 -> Bool
  255. disjointUdfmUfm (UDFM x _i) y = M.null (M.intersection x (ufmToIntMap y))
  256. minusUDFM :: UniqDFM elt1 -> UniqDFM elt2 -> UniqDFM elt1
  257. minusUDFM (UDFM x i) (UDFM y _j) = UDFM (M.difference x y) i
  258. -- M.difference returns a subset of a left set, so `i` is a good upper
  259. -- bound.
  260. udfmMinusUFM :: UniqDFM elt1 -> UniqFM elt2 -> UniqDFM elt1
  261. udfmMinusUFM (UDFM x i) y = UDFM (M.difference x (ufmToIntMap y)) i
  262. -- M.difference returns a subset of a left set, so `i` is a good upper
  263. -- bound.
  264. -- | Partition UniqDFM into two UniqDFMs according to the predicate
  265. partitionUDFM :: (elt -> Bool) -> UniqDFM elt -> (UniqDFM elt, UniqDFM elt)
  266. partitionUDFM p (UDFM m i) =
  267. case M.partition (p . taggedFst) m of
  268. (left, right) -> (UDFM left i, UDFM right i)
  269. -- | Delete a list of elements from a UniqDFM
  270. delListFromUDFM :: Uniquable key => UniqDFM elt -> [key] -> UniqDFM elt
  271. delListFromUDFM = foldl delFromUDFM
  272. -- | This allows for lossy conversion from UniqDFM to UniqFM
  273. udfmToUfm :: UniqDFM elt -> UniqFM elt
  274. udfmToUfm (UDFM m _i) =
  275. listToUFM_Directly [(getUnique k, taggedFst tv) | (k, tv) <- M.toList m]
  276. listToUDFM :: Uniquable key => [(key,elt)] -> UniqDFM elt
  277. listToUDFM = foldl (\m (k, v) -> addToUDFM m k v) emptyUDFM
  278. listToUDFM_Directly :: [(Unique, elt)] -> UniqDFM elt
  279. listToUDFM_Directly = foldl (\m (u, v) -> addToUDFM_Directly m u v) emptyUDFM
  280. -- | Apply a function to a particular element
  281. adjustUDFM :: Uniquable key => (elt -> elt) -> UniqDFM elt -> key -> UniqDFM elt
  282. adjustUDFM f (UDFM m i) k = UDFM (M.adjust (fmap f) (getKey $ getUnique k) m) i
  283. -- | The expression (alterUDFM f k map) alters value x at k, or absence
  284. -- thereof. alterUDFM can be used to insert, delete, or update a value in
  285. -- UniqDFM. Use addToUDFM, delFromUDFM or adjustUDFM when possible, they are
  286. -- more efficient.
  287. alterUDFM
  288. :: Uniquable key
  289. => (Maybe elt -> Maybe elt) -- How to adjust
  290. -> UniqDFM elt -- old
  291. -> key -- new
  292. -> UniqDFM elt -- result
  293. alterUDFM f (UDFM m i) k =
  294. UDFM (M.alter alterf (getKey $ getUnique k) m) (i + 1)
  295. where
  296. alterf Nothing = inject $ f Nothing
  297. alterf (Just (TaggedVal v _)) = inject $ f (Just v)
  298. inject Nothing = Nothing
  299. inject (Just v) = Just $ TaggedVal v i
  300. -- | Map a function over every value in a UniqDFM
  301. mapUDFM :: (elt1 -> elt2) -> UniqDFM elt1 -> UniqDFM elt2
  302. mapUDFM f (UDFM m i) = UDFM (M.map (fmap f) m) i
  303. anyUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
  304. anyUDFM p (UDFM m _i) = M.fold ((||) . p . taggedFst) False m
  305. allUDFM :: (elt -> Bool) -> UniqDFM elt -> Bool
  306. allUDFM p (UDFM m _i) = M.fold ((&&) . p . taggedFst) True m
  307. instance Monoid (UniqDFM a) where
  308. mempty = emptyUDFM
  309. mappend = plusUDFM
  310. -- This should not be used in commited code, provided for convenience to
  311. -- make ad-hoc conversions when developing
  312. alwaysUnsafeUfmToUdfm :: UniqFM elt -> UniqDFM elt
  313. alwaysUnsafeUfmToUdfm = listToUDFM_Directly . nonDetUFMToList
  314. -- Output-ery
  315. instance Outputable a => Outputable (UniqDFM a) where
  316. ppr ufm = pprUniqDFM ppr ufm
  317. pprUniqDFM :: (a -> SDoc) -> UniqDFM a -> SDoc
  318. pprUniqDFM ppr_elt ufm
  319. = brackets $ fsep $ punctuate comma $
  320. [ ppr uq <+> text ":->" <+> ppr_elt elt
  321. | (uq, elt) <- udfmToList ufm ]
  322. pprUDFM :: UniqDFM a -- ^ The things to be pretty printed
  323. -> ([a] -> SDoc) -- ^ The pretty printing function to use on the elements
  324. -> SDoc -- ^ 'SDoc' where the things have been pretty
  325. -- printed
  326. pprUDFM ufm pp = pp (eltsUDFM ufm)