PageRenderTime 26ms CodeModel.GetById 9ms RepoModel.GetById 0ms app.codeStats 0ms

/cabal-install/Distribution/Client/Tar.hs

https://gitlab.com/kranium/cabal
Haskell | 926 lines | 583 code | 131 blank | 212 comment | 16 complexity | bd408fcb476be648bb7df2000915f84d MD5 | raw file
  1. {-# OPTIONS_GHC -fno-warn-unused-imports #-}
  2. -----------------------------------------------------------------------------
  3. -- |
  4. -- Module : Distribution.Client.Tar
  5. -- Copyright : (c) 2007 Bjorn Bringert,
  6. -- 2008 Andrea Vezzosi,
  7. -- 2008-2009 Duncan Coutts
  8. -- License : BSD3
  9. --
  10. -- Maintainer : duncan@community.haskell.org
  11. -- Portability : portable
  12. --
  13. -- Reading, writing and manipulating \"@.tar@\" archive files.
  14. --
  15. -----------------------------------------------------------------------------
  16. module Distribution.Client.Tar (
  17. -- * High level \"all in one\" operations
  18. createTarGzFile,
  19. extractTarGzFile,
  20. -- * Converting between internal and external representation
  21. read,
  22. write,
  23. writeEntries,
  24. -- * Packing and unpacking files to\/from internal representation
  25. pack,
  26. unpack,
  27. -- * Tar entry and associated types
  28. Entry(..),
  29. entryPath,
  30. EntryContent(..),
  31. Ownership(..),
  32. FileSize,
  33. Permissions,
  34. EpochTime,
  35. DevMajor,
  36. DevMinor,
  37. TypeCode,
  38. Format(..),
  39. buildTreeRefTypeCode,
  40. entrySizeInBlocks,
  41. entrySizeInBytes,
  42. -- * Constructing simple entry values
  43. simpleEntry,
  44. fileEntry,
  45. directoryEntry,
  46. -- * TarPath type
  47. TarPath,
  48. toTarPath,
  49. fromTarPath,
  50. -- ** Sequences of tar entries
  51. Entries(..),
  52. foldrEntries,
  53. foldlEntries,
  54. unfoldrEntries,
  55. mapEntries,
  56. filterEntries,
  57. entriesIndex,
  58. ) where
  59. import Data.Char (ord)
  60. import Data.Int (Int64)
  61. import Data.Bits (Bits, shiftL, testBit)
  62. import Data.List (foldl')
  63. import Numeric (readOct, showOct)
  64. import Control.Monad (MonadPlus(mplus), when)
  65. import qualified Data.Map as Map
  66. import qualified Data.ByteString.Lazy as BS
  67. import qualified Data.ByteString.Lazy.Char8 as BS.Char8
  68. import Data.ByteString.Lazy (ByteString)
  69. import qualified Codec.Compression.GZip as GZip
  70. import qualified Distribution.Client.GZipUtils as GZipUtils
  71. import System.FilePath
  72. ( (</>) )
  73. import qualified System.FilePath as FilePath.Native
  74. import qualified System.FilePath.Windows as FilePath.Windows
  75. import qualified System.FilePath.Posix as FilePath.Posix
  76. import System.Directory
  77. ( getDirectoryContents, doesDirectoryExist, getModificationTime
  78. , getPermissions, createDirectoryIfMissing, copyFile )
  79. import qualified System.Directory as Permissions
  80. ( Permissions(executable) )
  81. import Distribution.Compat.FilePerms
  82. ( setFileExecutable )
  83. import System.Posix.Types
  84. ( FileMode )
  85. import Distribution.Compat.Time
  86. import System.IO
  87. ( IOMode(ReadMode), openBinaryFile, hFileSize )
  88. import System.IO.Unsafe (unsafeInterleaveIO)
  89. import Prelude hiding (read)
  90. --
  91. -- * High level operations
  92. --
  93. createTarGzFile :: FilePath -- ^ Full Tarball path
  94. -> FilePath -- ^ Base directory
  95. -> FilePath -- ^ Directory to archive, relative to base dir
  96. -> IO ()
  97. createTarGzFile tar base dir =
  98. BS.writeFile tar . GZip.compress . write =<< pack base [dir]
  99. extractTarGzFile :: FilePath -- ^ Destination directory
  100. -> FilePath -- ^ Expected subdir (to check for tarbombs)
  101. -> FilePath -- ^ Tarball
  102. -> IO ()
  103. extractTarGzFile dir expected tar = do
  104. unpack dir . checkTarbomb expected . read . GZipUtils.maybeDecompress =<< BS.readFile tar
  105. --
  106. -- * Entry type
  107. --
  108. type FileSize = Int64
  109. type DevMajor = Int
  110. type DevMinor = Int
  111. type TypeCode = Char
  112. type Permissions = FileMode
  113. -- | Tar archive entry.
  114. --
  115. data Entry = Entry {
  116. -- | The path of the file or directory within the archive. This is in a
  117. -- tar-specific form. Use 'entryPath' to get a native 'FilePath'.
  118. entryTarPath :: !TarPath,
  119. -- | The real content of the entry. For 'NormalFile' this includes the
  120. -- file data. An entry usually contains a 'NormalFile' or a 'Directory'.
  121. entryContent :: !EntryContent,
  122. -- | File permissions (Unix style file mode).
  123. entryPermissions :: !Permissions,
  124. -- | The user and group to which this file belongs.
  125. entryOwnership :: !Ownership,
  126. -- | The time the file was last modified.
  127. entryTime :: !EpochTime,
  128. -- | The tar format the archive is using.
  129. entryFormat :: !Format
  130. }
  131. -- | Type code for the local build tree reference entry type. We don't use the
  132. -- symbolic link entry type because it allows only 100 ASCII characters for the
  133. -- path.
  134. buildTreeRefTypeCode :: TypeCode
  135. buildTreeRefTypeCode = 'C'
  136. -- | Native 'FilePath' of the file or directory within the archive.
  137. --
  138. entryPath :: Entry -> FilePath
  139. entryPath = fromTarPath . entryTarPath
  140. -- | Return the size of an entry in bytes.
  141. entrySizeInBytes :: Entry -> FileSize
  142. entrySizeInBytes = (*512) . fromIntegral . entrySizeInBlocks
  143. -- | Return the number of blocks in an entry.
  144. entrySizeInBlocks :: Entry -> Int
  145. entrySizeInBlocks entry = 1 + case entryContent entry of
  146. NormalFile _ size -> bytesToBlocks size
  147. OtherEntryType _ _ size -> bytesToBlocks size
  148. _ -> 0
  149. where
  150. bytesToBlocks s = 1 + ((fromIntegral s - 1) `div` 512)
  151. -- | The content of a tar archive entry, which depends on the type of entry.
  152. --
  153. -- Portable archives should contain only 'NormalFile' and 'Directory'.
  154. --
  155. data EntryContent = NormalFile ByteString !FileSize
  156. | Directory
  157. | SymbolicLink !LinkTarget
  158. | HardLink !LinkTarget
  159. | CharacterDevice !DevMajor !DevMinor
  160. | BlockDevice !DevMajor !DevMinor
  161. | NamedPipe
  162. | OtherEntryType !TypeCode ByteString !FileSize
  163. data Ownership = Ownership {
  164. -- | The owner user name. Should be set to @\"\"@ if unknown.
  165. ownerName :: String,
  166. -- | The owner group name. Should be set to @\"\"@ if unknown.
  167. groupName :: String,
  168. -- | Numeric owner user id. Should be set to @0@ if unknown.
  169. ownerId :: !Int,
  170. -- | Numeric owner group id. Should be set to @0@ if unknown.
  171. groupId :: !Int
  172. }
  173. -- | There have been a number of extensions to the tar file format over the
  174. -- years. They all share the basic entry fields and put more meta-data in
  175. -- different extended headers.
  176. --
  177. data Format =
  178. -- | This is the classic Unix V7 tar format. It does not support owner and
  179. -- group names, just numeric Ids. It also does not support device numbers.
  180. V7Format
  181. -- | The \"USTAR\" format is an extension of the classic V7 format. It was
  182. -- later standardised by POSIX. It has some restructions but is the most
  183. -- portable format.
  184. --
  185. | UstarFormat
  186. -- | The GNU tar implementation also extends the classic V7 format, though
  187. -- in a slightly different way from the USTAR format. In general for new
  188. -- archives the standard USTAR/POSIX should be used.
  189. --
  190. | GnuFormat
  191. deriving Eq
  192. -- | @rw-r--r--@ for normal files
  193. ordinaryFilePermissions :: Permissions
  194. ordinaryFilePermissions = 0o0644
  195. -- | @rwxr-xr-x@ for executable files
  196. executableFilePermissions :: Permissions
  197. executableFilePermissions = 0o0755
  198. -- | @rwxr-xr-x@ for directories
  199. directoryPermissions :: Permissions
  200. directoryPermissions = 0o0755
  201. isExecutable :: Permissions -> Bool
  202. isExecutable p = testBit p 0 || testBit p 6 -- user or other exectuable
  203. -- | An 'Entry' with all default values except for the file name and type. It
  204. -- uses the portable USTAR/POSIX format (see 'UstarHeader').
  205. --
  206. -- You can use this as a basis and override specific fields, eg:
  207. --
  208. -- > (emptyEntry name HardLink) { linkTarget = target }
  209. --
  210. simpleEntry :: TarPath -> EntryContent -> Entry
  211. simpleEntry tarpath content = Entry {
  212. entryTarPath = tarpath,
  213. entryContent = content,
  214. entryPermissions = case content of
  215. Directory -> directoryPermissions
  216. _ -> ordinaryFilePermissions,
  217. entryOwnership = Ownership "" "" 0 0,
  218. entryTime = 0,
  219. entryFormat = UstarFormat
  220. }
  221. -- | A tar 'Entry' for a file.
  222. --
  223. -- Entry fields such as file permissions and ownership have default values.
  224. --
  225. -- You can use this as a basis and override specific fields. For example if you
  226. -- need an executable file you could use:
  227. --
  228. -- > (fileEntry name content) { fileMode = executableFileMode }
  229. --
  230. fileEntry :: TarPath -> ByteString -> Entry
  231. fileEntry name fileContent =
  232. simpleEntry name (NormalFile fileContent (BS.length fileContent))
  233. -- | A tar 'Entry' for a directory.
  234. --
  235. -- Entry fields such as file permissions and ownership have default values.
  236. --
  237. directoryEntry :: TarPath -> Entry
  238. directoryEntry name = simpleEntry name Directory
  239. --
  240. -- * Tar paths
  241. --
  242. -- | The classic tar format allowed just 100 characters for the file name. The
  243. -- USTAR format extended this with an extra 155 characters, however it uses a
  244. -- complex method of splitting the name between the two sections.
  245. --
  246. -- Instead of just putting any overflow into the extended area, it uses the
  247. -- extended area as a prefix. The aggravating insane bit however is that the
  248. -- prefix (if any) must only contain a directory prefix. That is the split
  249. -- between the two areas must be on a directory separator boundary. So there is
  250. -- no simple calculation to work out if a file name is too long. Instead we
  251. -- have to try to find a valid split that makes the name fit in the two areas.
  252. --
  253. -- The rationale presumably was to make it a bit more compatible with old tar
  254. -- programs that only understand the classic format. A classic tar would be
  255. -- able to extract the file name and possibly some dir prefix, but not the
  256. -- full dir prefix. So the files would end up in the wrong place, but that's
  257. -- probably better than ending up with the wrong names too.
  258. --
  259. -- So it's understandable but rather annoying.
  260. --
  261. -- * Tar paths use posix format (ie @\'/\'@ directory separators), irrespective
  262. -- of the local path conventions.
  263. --
  264. -- * The directory separator between the prefix and name is /not/ stored.
  265. --
  266. data TarPath = TarPath FilePath -- path name, 100 characters max.
  267. FilePath -- path prefix, 155 characters max.
  268. deriving (Eq, Ord)
  269. -- | Convert a 'TarPath' to a native 'FilePath'.
  270. --
  271. -- The native 'FilePath' will use the native directory separator but it is not
  272. -- otherwise checked for validity or sanity. In particular:
  273. --
  274. -- * The tar path may be invalid as a native path, eg the filename @\"nul\"@ is
  275. -- not valid on Windows.
  276. --
  277. -- * The tar path may be an absolute path or may contain @\"..\"@ components.
  278. -- For security reasons this should not usually be allowed, but it is your
  279. -- responsibility to check for these conditions (eg using 'checkSecurity').
  280. --
  281. fromTarPath :: TarPath -> FilePath
  282. fromTarPath (TarPath name prefix) = adjustDirectory $
  283. FilePath.Native.joinPath $ FilePath.Posix.splitDirectories prefix
  284. ++ FilePath.Posix.splitDirectories name
  285. where
  286. adjustDirectory | FilePath.Posix.hasTrailingPathSeparator name
  287. = FilePath.Native.addTrailingPathSeparator
  288. | otherwise = id
  289. -- | Convert a native 'FilePath' to a 'TarPath'.
  290. --
  291. -- The conversion may fail if the 'FilePath' is too long. See 'TarPath' for a
  292. -- description of the problem with splitting long 'FilePath's.
  293. --
  294. toTarPath :: Bool -- ^ Is the path for a directory? This is needed because for
  295. -- directories a 'TarPath' must always use a trailing @\/@.
  296. -> FilePath -> Either String TarPath
  297. toTarPath isDir = splitLongPath
  298. . addTrailingSep
  299. . FilePath.Posix.joinPath
  300. . FilePath.Native.splitDirectories
  301. where
  302. addTrailingSep | isDir = FilePath.Posix.addTrailingPathSeparator
  303. | otherwise = id
  304. -- | Take a sanitized path, split on directory separators and try to pack it
  305. -- into the 155 + 100 tar file name format.
  306. --
  307. -- The stragey is this: take the name-directory components in reverse order
  308. -- and try to fit as many components into the 100 long name area as possible.
  309. -- If all the remaining components fit in the 155 name area then we win.
  310. --
  311. splitLongPath :: FilePath -> Either String TarPath
  312. splitLongPath path =
  313. case packName nameMax (reverse (FilePath.Posix.splitPath path)) of
  314. Left err -> Left err
  315. Right (name, []) -> Right (TarPath name "")
  316. Right (name, first:rest) -> case packName prefixMax remainder of
  317. Left err -> Left err
  318. Right (_ , (_:_)) -> Left "File name too long (cannot split)"
  319. Right (prefix, []) -> Right (TarPath name prefix)
  320. where
  321. -- drop the '/' between the name and prefix:
  322. remainder = init first : rest
  323. where
  324. nameMax, prefixMax :: Int
  325. nameMax = 100
  326. prefixMax = 155
  327. packName _ [] = Left "File name empty"
  328. packName maxLen (c:cs)
  329. | n > maxLen = Left "File name too long"
  330. | otherwise = Right (packName' maxLen n [c] cs)
  331. where n = length c
  332. packName' maxLen n ok (c:cs)
  333. | n' <= maxLen = packName' maxLen n' (c:ok) cs
  334. where n' = n + length c
  335. packName' _ _ ok cs = (FilePath.Posix.joinPath ok, cs)
  336. -- | The tar format allows just 100 ASCII characters for the 'SymbolicLink' and
  337. -- 'HardLink' entry types.
  338. --
  339. newtype LinkTarget = LinkTarget FilePath
  340. deriving (Eq, Ord)
  341. -- | Convert a tar 'LinkTarget' to a native 'FilePath'.
  342. --
  343. fromLinkTarget :: LinkTarget -> FilePath
  344. fromLinkTarget (LinkTarget path) = adjustDirectory $
  345. FilePath.Native.joinPath $ FilePath.Posix.splitDirectories path
  346. where
  347. adjustDirectory | FilePath.Posix.hasTrailingPathSeparator path
  348. = FilePath.Native.addTrailingPathSeparator
  349. | otherwise = id
  350. --
  351. -- * Entries type
  352. --
  353. -- | A tar archive is a sequence of entries.
  354. data Entries = Next Entry Entries
  355. | Done
  356. | Fail String
  357. unfoldrEntries :: (a -> Either String (Maybe (Entry, a))) -> a -> Entries
  358. unfoldrEntries f = unfold
  359. where
  360. unfold x = case f x of
  361. Left err -> Fail err
  362. Right Nothing -> Done
  363. Right (Just (e, x')) -> Next e (unfold x')
  364. foldrEntries :: (Entry -> a -> a) -> a -> (String -> a) -> Entries -> a
  365. foldrEntries next done fail' = fold
  366. where
  367. fold (Next e es) = next e (fold es)
  368. fold Done = done
  369. fold (Fail err) = fail' err
  370. foldlEntries :: (a -> Entry -> a) -> a -> Entries -> Either String a
  371. foldlEntries f = fold
  372. where
  373. fold a (Next e es) = (fold $! f a e) es
  374. fold a Done = Right a
  375. fold _ (Fail err) = Left err
  376. mapEntries :: (Entry -> Entry) -> Entries -> Entries
  377. mapEntries f = foldrEntries (Next . f) Done Fail
  378. filterEntries :: (Entry -> Bool) -> Entries -> Entries
  379. filterEntries p =
  380. foldrEntries
  381. (\entry rest -> if p entry
  382. then Next entry rest
  383. else rest)
  384. Done Fail
  385. checkEntries :: (Entry -> Maybe String) -> Entries -> Entries
  386. checkEntries checkEntry =
  387. foldrEntries
  388. (\entry rest -> case checkEntry entry of
  389. Nothing -> Next entry rest
  390. Just err -> Fail err)
  391. Done Fail
  392. entriesIndex :: Entries -> Either String (Map.Map TarPath Entry)
  393. entriesIndex = foldlEntries (\m e -> Map.insert (entryTarPath e) e m) Map.empty
  394. --
  395. -- * Checking
  396. --
  397. -- | This function checks a sequence of tar entries for file name security
  398. -- problems. It checks that:
  399. --
  400. -- * file paths are not absolute
  401. --
  402. -- * file paths do not contain any path components that are \"@..@\"
  403. --
  404. -- * file names are valid
  405. --
  406. -- These checks are from the perspective of the current OS. That means we check
  407. -- for \"@C:\blah@\" files on Windows and \"\/blah\" files on unix. For archive
  408. -- entry types 'HardLink' and 'SymbolicLink' the same checks are done for the
  409. -- link target. A failure in any entry terminates the sequence of entries with
  410. -- an error.
  411. --
  412. checkSecurity :: Entries -> Entries
  413. checkSecurity = checkEntries checkEntrySecurity
  414. checkTarbomb :: FilePath -> Entries -> Entries
  415. checkTarbomb expectedTopDir = checkEntries (checkEntryTarbomb expectedTopDir)
  416. checkEntrySecurity :: Entry -> Maybe String
  417. checkEntrySecurity entry = case entryContent entry of
  418. HardLink link -> check (entryPath entry)
  419. `mplus` check (fromLinkTarget link)
  420. SymbolicLink link -> check (entryPath entry)
  421. `mplus` check (fromLinkTarget link)
  422. _ -> check (entryPath entry)
  423. where
  424. check name
  425. | not (FilePath.Native.isRelative name)
  426. = Just $ "Absolute file name in tar archive: " ++ show name
  427. | not (FilePath.Native.isValid name)
  428. = Just $ "Invalid file name in tar archive: " ++ show name
  429. | any (=="..") (FilePath.Native.splitDirectories name)
  430. = Just $ "Invalid file name in tar archive: " ++ show name
  431. | otherwise = Nothing
  432. checkEntryTarbomb :: FilePath -> Entry -> Maybe String
  433. checkEntryTarbomb _ entry | nonFilesystemEntry = Nothing
  434. where
  435. -- Ignore some special entries we will not unpack anyway
  436. nonFilesystemEntry =
  437. case entryContent entry of
  438. OtherEntryType 'g' _ _ -> True --PAX global header
  439. OtherEntryType 'x' _ _ -> True --PAX individual header
  440. _ -> False
  441. checkEntryTarbomb expectedTopDir entry =
  442. case FilePath.Native.splitDirectories (entryPath entry) of
  443. (topDir:_) | topDir == expectedTopDir -> Nothing
  444. _ -> Just $ "File in tar archive is not in the expected directory "
  445. ++ show expectedTopDir
  446. --
  447. -- * Reading
  448. --
  449. read :: ByteString -> Entries
  450. read = unfoldrEntries getEntry
  451. getEntry :: ByteString -> Either String (Maybe (Entry, ByteString))
  452. getEntry bs
  453. | BS.length header < 512 = Left "truncated tar archive"
  454. -- Tar files end with at least two blocks of all '0'. Checking this serves
  455. -- two purposes. It checks the format but also forces the tail of the data
  456. -- which is necessary to close the file if it came from a lazily read file.
  457. | BS.head bs == 0 = case BS.splitAt 1024 bs of
  458. (end, trailing)
  459. | BS.length end /= 1024 -> Left "short tar trailer"
  460. | not (BS.all (== 0) end) -> Left "bad tar trailer"
  461. | not (BS.all (== 0) trailing) -> Left "tar file has trailing junk"
  462. | otherwise -> Right Nothing
  463. | otherwise = partial $ do
  464. case (chksum_, format_) of
  465. (Ok chksum, _ ) | correctChecksum header chksum -> return ()
  466. (Ok _, Ok _) -> fail "tar checksum error"
  467. _ -> fail "data is not in tar format"
  468. -- These fields are partial, have to check them
  469. format <- format_; mode <- mode_;
  470. uid <- uid_; gid <- gid_;
  471. size <- size_; mtime <- mtime_;
  472. devmajor <- devmajor_; devminor <- devminor_;
  473. let content = BS.take size (BS.drop 512 bs)
  474. padding = (512 - size) `mod` 512
  475. bs' = BS.drop (512 + size + padding) bs
  476. entry = Entry {
  477. entryTarPath = TarPath name prefix,
  478. entryContent = case typecode of
  479. '\0' -> NormalFile content size
  480. '0' -> NormalFile content size
  481. '1' -> HardLink (LinkTarget linkname)
  482. '2' -> SymbolicLink (LinkTarget linkname)
  483. '3' -> CharacterDevice devmajor devminor
  484. '4' -> BlockDevice devmajor devminor
  485. '5' -> Directory
  486. '6' -> NamedPipe
  487. '7' -> NormalFile content size
  488. _ -> OtherEntryType typecode content size,
  489. entryPermissions = mode,
  490. entryOwnership = Ownership uname gname uid gid,
  491. entryTime = mtime,
  492. entryFormat = format
  493. }
  494. return (Just (entry, bs'))
  495. where
  496. header = BS.take 512 bs
  497. name = getString 0 100 header
  498. mode_ = getOct 100 8 header
  499. uid_ = getOct 108 8 header
  500. gid_ = getOct 116 8 header
  501. size_ = getOct 124 12 header
  502. mtime_ = getOct 136 12 header
  503. chksum_ = getOct 148 8 header
  504. typecode = getByte 156 header
  505. linkname = getString 157 100 header
  506. magic = getChars 257 8 header
  507. uname = getString 265 32 header
  508. gname = getString 297 32 header
  509. devmajor_ = getOct 329 8 header
  510. devminor_ = getOct 337 8 header
  511. prefix = getString 345 155 header
  512. -- trailing = getBytes 500 12 header
  513. format_ = case magic of
  514. "\0\0\0\0\0\0\0\0" -> return V7Format
  515. "ustar\NUL00" -> return UstarFormat
  516. "ustar \NUL" -> return GnuFormat
  517. _ -> fail "tar entry not in a recognised format"
  518. correctChecksum :: ByteString -> Int -> Bool
  519. correctChecksum header checksum = checksum == checksum'
  520. where
  521. -- sum of all 512 bytes in the header block,
  522. -- treating each byte as an 8-bit unsigned value
  523. checksum' = BS.Char8.foldl' (\x y -> x + ord y) 0 header'
  524. -- treating the 8 bytes of chksum as blank characters.
  525. header' = BS.concat [BS.take 148 header,
  526. BS.Char8.replicate 8 ' ',
  527. BS.drop 156 header]
  528. -- * TAR format primitive input
  529. getOct :: (Integral a, Bits a) => Int64 -> Int64 -> ByteString -> Partial a
  530. getOct off len header
  531. | BS.head bytes == 128 = parseBinInt (BS.unpack (BS.tail bytes))
  532. | null octstr = return 0
  533. | otherwise = case readOct octstr of
  534. [(x,[])] -> return x
  535. _ -> fail "tar header is malformed (bad numeric encoding)"
  536. where
  537. bytes = getBytes off len header
  538. octstr = BS.Char8.unpack
  539. . BS.Char8.takeWhile (\c -> c /= '\NUL' && c /= ' ')
  540. . BS.Char8.dropWhile (== ' ')
  541. $ bytes
  542. -- Some tar programs switch into a binary format when they try to represent
  543. -- field values that will not fit in the required width when using the text
  544. -- octal format. In particular, the UID/GID fields can only hold up to 2^21
  545. -- while in the binary format can hold up to 2^32. The binary format uses
  546. -- '\128' as the header which leaves 7 bytes. Only the last 4 are used.
  547. parseBinInt [0, 0, 0, byte3, byte2, byte1, byte0] =
  548. return $! shiftL (fromIntegral byte3) 24
  549. + shiftL (fromIntegral byte2) 16
  550. + shiftL (fromIntegral byte1) 8
  551. + shiftL (fromIntegral byte0) 0
  552. parseBinInt _ = fail "tar header uses non-standard number encoding"
  553. getBytes :: Int64 -> Int64 -> ByteString -> ByteString
  554. getBytes off len = BS.take len . BS.drop off
  555. getByte :: Int64 -> ByteString -> Char
  556. getByte off bs = BS.Char8.index bs off
  557. getChars :: Int64 -> Int64 -> ByteString -> String
  558. getChars off len = BS.Char8.unpack . getBytes off len
  559. getString :: Int64 -> Int64 -> ByteString -> String
  560. getString off len = BS.Char8.unpack . BS.Char8.takeWhile (/='\0') . getBytes off len
  561. data Partial a = Error String | Ok a
  562. partial :: Partial a -> Either String a
  563. partial (Error msg) = Left msg
  564. partial (Ok x) = Right x
  565. instance Monad Partial where
  566. return = Ok
  567. Error m >>= _ = Error m
  568. Ok x >>= k = k x
  569. fail = Error
  570. --
  571. -- * Writing
  572. --
  573. -- | Create the external representation of a tar archive by serialising a list
  574. -- of tar entries.
  575. --
  576. -- * The conversion is done lazily.
  577. --
  578. write :: [Entry] -> ByteString
  579. write es = BS.concat $ map putEntry es ++ [BS.replicate (512*2) 0]
  580. -- | Same as 'write', but for 'Entries'.
  581. writeEntries :: Entries -> ByteString
  582. writeEntries entries = BS.concat $ foldrEntries (\e res -> (putEntry e):res)
  583. [BS.replicate (512*2) 0] error entries
  584. putEntry :: Entry -> ByteString
  585. putEntry entry = case entryContent entry of
  586. NormalFile content size -> BS.concat [ header, content, padding size ]
  587. OtherEntryType _ content size -> BS.concat [ header, content, padding size ]
  588. _ -> header
  589. where
  590. header = putHeader entry
  591. padding size = BS.replicate paddingSize 0
  592. where paddingSize = fromIntegral (negate size `mod` 512)
  593. putHeader :: Entry -> ByteString
  594. putHeader entry =
  595. BS.concat $ [ BS.take 148 block
  596. , BS.Char8.pack $ putOct 7 checksum
  597. , BS.Char8.singleton ' '
  598. , BS.drop 156 block ]
  599. where
  600. -- putHeaderNoChkSum returns a String, so we convert it to the final
  601. -- representation before calculating the checksum.
  602. block = BS.Char8.pack . putHeaderNoChkSum $ entry
  603. checksum = BS.Char8.foldl' (\x y -> x + ord y) 0 block
  604. putHeaderNoChkSum :: Entry -> String
  605. putHeaderNoChkSum Entry {
  606. entryTarPath = TarPath name prefix,
  607. entryContent = content,
  608. entryPermissions = permissions,
  609. entryOwnership = ownership,
  610. entryTime = modTime,
  611. entryFormat = format
  612. } =
  613. concat
  614. [ putString 100 $ name
  615. , putOct 8 $ permissions
  616. , putOct 8 $ ownerId ownership
  617. , putOct 8 $ groupId ownership
  618. , putOct 12 $ contentSize
  619. , putOct 12 $ modTime
  620. , fill 8 $ ' ' -- dummy checksum
  621. , putChar8 $ typeCode
  622. , putString 100 $ linkTarget
  623. ] ++
  624. case format of
  625. V7Format ->
  626. fill 255 '\NUL'
  627. UstarFormat -> concat
  628. [ putString 8 $ "ustar\NUL00"
  629. , putString 32 $ ownerName ownership
  630. , putString 32 $ groupName ownership
  631. , putOct 8 $ deviceMajor
  632. , putOct 8 $ deviceMinor
  633. , putString 155 $ prefix
  634. , fill 12 $ '\NUL'
  635. ]
  636. GnuFormat -> concat
  637. [ putString 8 $ "ustar \NUL"
  638. , putString 32 $ ownerName ownership
  639. , putString 32 $ groupName ownership
  640. , putGnuDev 8 $ deviceMajor
  641. , putGnuDev 8 $ deviceMinor
  642. , putString 155 $ prefix
  643. , fill 12 $ '\NUL'
  644. ]
  645. where
  646. (typeCode, contentSize, linkTarget,
  647. deviceMajor, deviceMinor) = case content of
  648. NormalFile _ size -> ('0' , size, [], 0, 0)
  649. Directory -> ('5' , 0, [], 0, 0)
  650. SymbolicLink (LinkTarget link) -> ('2' , 0, link, 0, 0)
  651. HardLink (LinkTarget link) -> ('1' , 0, link, 0, 0)
  652. CharacterDevice major minor -> ('3' , 0, [], major, minor)
  653. BlockDevice major minor -> ('4' , 0, [], major, minor)
  654. NamedPipe -> ('6' , 0, [], 0, 0)
  655. OtherEntryType code _ size -> (code, size, [], 0, 0)
  656. putGnuDev w n = case content of
  657. CharacterDevice _ _ -> putOct w n
  658. BlockDevice _ _ -> putOct w n
  659. _ -> replicate w '\NUL'
  660. -- * TAR format primitive output
  661. type FieldWidth = Int
  662. putString :: FieldWidth -> String -> String
  663. putString n s = take n s ++ fill (n - length s) '\NUL'
  664. --TODO: check integer widths, eg for large file sizes
  665. putOct :: (Show a, Integral a) => FieldWidth -> a -> String
  666. putOct n x =
  667. let octStr = take (n-1) $ showOct x ""
  668. in fill (n - length octStr - 1) '0'
  669. ++ octStr
  670. ++ putChar8 '\NUL'
  671. putChar8 :: Char -> String
  672. putChar8 c = [c]
  673. fill :: FieldWidth -> Char -> String
  674. fill n c = replicate n c
  675. --
  676. -- * Unpacking
  677. --
  678. unpack :: FilePath -> Entries -> IO ()
  679. unpack baseDir entries = unpackEntries [] (checkSecurity entries)
  680. >>= emulateLinks
  681. where
  682. -- We're relying here on 'checkSecurity' to make sure we're not scribbling
  683. -- files all over the place.
  684. unpackEntries _ (Fail err) = fail err
  685. unpackEntries links Done = return links
  686. unpackEntries links (Next entry es) = case entryContent entry of
  687. NormalFile file _ -> extractFile entry path file
  688. >> unpackEntries links es
  689. Directory -> extractDir path
  690. >> unpackEntries links es
  691. HardLink link -> (unpackEntries $! saveLink path link links) es
  692. SymbolicLink link -> (unpackEntries $! saveLink path link links) es
  693. _ -> unpackEntries links es --ignore other file types
  694. where
  695. path = entryPath entry
  696. extractFile entry path content = do
  697. -- Note that tar archives do not make sure each directory is created
  698. -- before files they contain, indeed we may have to create several
  699. -- levels of directory.
  700. createDirectoryIfMissing True absDir
  701. BS.writeFile absPath content
  702. when (isExecutable (entryPermissions entry))
  703. (setFileExecutable absPath)
  704. where
  705. absDir = baseDir </> FilePath.Native.takeDirectory path
  706. absPath = baseDir </> path
  707. extractDir path = createDirectoryIfMissing True (baseDir </> path)
  708. saveLink path link links = seq (length path)
  709. $ seq (length link')
  710. $ (path, link'):links
  711. where link' = fromLinkTarget link
  712. emulateLinks = mapM_ $ \(relPath, relLinkTarget) ->
  713. let absPath = baseDir </> relPath
  714. absTarget = FilePath.Native.takeDirectory absPath </> relLinkTarget
  715. in copyFile absTarget absPath
  716. --
  717. -- * Packing
  718. --
  719. pack :: FilePath -- ^ Base directory
  720. -> [FilePath] -- ^ Files and directories to pack, relative to the base dir
  721. -> IO [Entry]
  722. pack baseDir paths0 = preparePaths baseDir paths0 >>= packPaths baseDir
  723. preparePaths :: FilePath -> [FilePath] -> IO [FilePath]
  724. preparePaths baseDir paths =
  725. fmap concat $ interleave
  726. [ do isDir <- doesDirectoryExist (baseDir </> path)
  727. if isDir
  728. then do entries <- getDirectoryContentsRecursive (baseDir </> path)
  729. return (FilePath.Native.addTrailingPathSeparator path
  730. : map (path </>) entries)
  731. else return [path]
  732. | path <- paths ]
  733. packPaths :: FilePath -> [FilePath] -> IO [Entry]
  734. packPaths baseDir paths =
  735. interleave
  736. [ do tarpath <- either fail return (toTarPath isDir relpath)
  737. if isDir then packDirectoryEntry filepath tarpath
  738. else packFileEntry filepath tarpath
  739. | relpath <- paths
  740. , let isDir = FilePath.Native.hasTrailingPathSeparator filepath
  741. filepath = baseDir </> relpath ]
  742. interleave :: [IO a] -> IO [a]
  743. interleave = unsafeInterleaveIO . go
  744. where
  745. go [] = return []
  746. go (x:xs) = do
  747. x' <- x
  748. xs' <- interleave xs
  749. return (x':xs')
  750. packFileEntry :: FilePath -- ^ Full path to find the file on the local disk
  751. -> TarPath -- ^ Path to use for the tar Entry in the archive
  752. -> IO Entry
  753. packFileEntry filepath tarpath = do
  754. mtime <- getModTime filepath
  755. perms <- getPermissions filepath
  756. file <- openBinaryFile filepath ReadMode
  757. size <- hFileSize file
  758. content <- BS.hGetContents file
  759. return (simpleEntry tarpath (NormalFile content (fromIntegral size))) {
  760. entryPermissions = if Permissions.executable perms
  761. then executableFilePermissions
  762. else ordinaryFilePermissions,
  763. entryTime = mtime
  764. }
  765. packDirectoryEntry :: FilePath -- ^ Full path to find the file on the local disk
  766. -> TarPath -- ^ Path to use for the tar Entry in the archive
  767. -> IO Entry
  768. packDirectoryEntry filepath tarpath = do
  769. mtime <- getModTime filepath
  770. return (directoryEntry tarpath) {
  771. entryTime = mtime
  772. }
  773. getDirectoryContentsRecursive :: FilePath -> IO [FilePath]
  774. getDirectoryContentsRecursive dir0 =
  775. fmap tail (recurseDirectories dir0 [""])
  776. recurseDirectories :: FilePath -> [FilePath] -> IO [FilePath]
  777. recurseDirectories _ [] = return []
  778. recurseDirectories base (dir:dirs) = unsafeInterleaveIO $ do
  779. (files, dirs') <- collect [] [] =<< getDirectoryContents (base </> dir)
  780. files' <- recurseDirectories base (dirs' ++ dirs)
  781. return (dir : files ++ files')
  782. where
  783. collect files dirs' [] = return (reverse files, reverse dirs')
  784. collect files dirs' (entry:entries) | ignore entry
  785. = collect files dirs' entries
  786. collect files dirs' (entry:entries) = do
  787. let dirEntry = dir </> entry
  788. dirEntry' = FilePath.Native.addTrailingPathSeparator dirEntry
  789. isDirectory <- doesDirectoryExist (base </> dirEntry)
  790. if isDirectory
  791. then collect files (dirEntry':dirs') entries
  792. else collect (dirEntry:files) dirs' entries
  793. ignore ['.'] = True
  794. ignore ['.', '.'] = True
  795. ignore _ = False