PageRenderTime 48ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/src/string_ext.lua

http://github.com/rrthomas/lua-stdlib
Lua | 299 lines | 235 code | 10 blank | 54 comment | 3 complexity | 4ed6aa62a299f940a72b0a85a040cd75 MD5 | raw file
  1. --- Additions to the string module
  2. -- TODO: Pretty printing (use in getopt); see source for details.
  3. module ("string", package.seeall)
  4. -- Write pretty-printing based on:
  5. --
  6. -- John Hughes's and Simon Peyton Jones's Pretty Printer Combinators
  7. --
  8. -- Based on "The Design of a Pretty-printing Library in Advanced
  9. -- Functional Programming", Johan Jeuring and Erik Meijer (eds), LNCS 925
  10. -- http://www.cs.chalmers.se/~rjmh/Papers/pretty.ps
  11. -- Heavily modified by Simon Peyton Jones, Dec 96
  12. --
  13. -- Haskell types:
  14. -- data Doc list of lines
  15. -- quote :: Char -> Char -> Doc -> Doc Wrap document in ...
  16. -- (<>) :: Doc -> Doc -> Doc Beside
  17. -- (<+>) :: Doc -> Doc -> Doc Beside, separated by space
  18. -- ($$) :: Doc -> Doc -> Doc Above; if there is no overlap it "dovetails" the two
  19. -- nest :: Int -> Doc -> Doc Nested
  20. -- punctuate :: Doc -> [Doc] -> [Doc] punctuate p [d1, ... dn] = [d1 <> p, d2 <> p, ... dn-1 <> p, dn]
  21. -- render :: Int Line length
  22. -- -> Float Ribbons per line
  23. -- -> (TextDetails -> a -> a) What to do with text
  24. -- -> a What to do at the end
  25. -- -> Doc The document
  26. -- -> a Result
  27. --- Give strings a subscription operator.
  28. -- @param s string
  29. -- @param i index
  30. -- @return <code>string.sub (s, i, i)</code> if i is a number, or
  31. -- falls back to any previous metamethod (by default, string methods)
  32. local old__index = getmetatable ("").__index
  33. getmetatable ("").__index = function (s, i)
  34. if type (i) == "number" then
  35. return sub (s, i, i)
  36. -- Fall back to old metamethods
  37. elseif type (old__index) == "function" then
  38. return old__index (s, i)
  39. else
  40. return old__index[i]
  41. end
  42. end
  43. --- Give strings an append metamethod.
  44. -- @param s string
  45. -- @param c character (1-character string)
  46. -- @return <code>s .. c</code>
  47. getmetatable ("").__append = function (s, c)
  48. return s .. c
  49. end
  50. --- Give strings a concat metamethod.
  51. -- @param s string
  52. -- @param o object
  53. -- @return s .. tostring (o)
  54. getmetatable ("").__concat = function (s, o)
  55. return tostring (s) .. tostring (o)
  56. end
  57. --- Capitalise each word in a string.
  58. -- @param s string
  59. -- @return capitalised string
  60. function caps (s)
  61. return (gsub (s, "(%w)([%w]*)",
  62. function (l, ls)
  63. return upper (l) .. ls
  64. end))
  65. end
  66. --- Remove any final newline from a string.
  67. -- @param s string to process
  68. -- @return processed string
  69. function chomp (s)
  70. return (gsub (s, "\n$", ""))
  71. end
  72. --- Escape a string to be used as a pattern
  73. -- @param s string to process
  74. -- @return
  75. -- @param s_: processed string
  76. function escapePattern (s)
  77. return (gsub (s, "(%W)", "%%%1"))
  78. end
  79. -- Escape a string to be used as a shell token.
  80. -- Quotes spaces, parentheses, brackets, quotes, apostrophes and
  81. -- whitespace.
  82. -- @param s string to process
  83. -- @return processed string
  84. function escapeShell (s)
  85. return (gsub (s, "([ %(%)%\\%[%]\"'])", "\\%1"))
  86. end
  87. --- Return the English suffix for an ordinal.
  88. -- @param n number of the day
  89. -- @return suffix
  90. function ordinalSuffix (n)
  91. n = math.mod (n, 100)
  92. local d = math.mod (n, 10)
  93. if d == 1 and n ~= 11 then
  94. return "st"
  95. elseif d == 2 and n ~= 12 then
  96. return "nd"
  97. elseif d == 3 and n ~= 13 then
  98. return "rd"
  99. else
  100. return "th"
  101. end
  102. end
  103. --- Extend to work better with one argument.
  104. -- If only one argument is passed, no formatting is attempted.
  105. -- @param f format
  106. -- @param ... arguments to format
  107. -- @return formatted string
  108. local _format = format
  109. function format (f, arg1, ...)
  110. if arg1 == nil then
  111. return f
  112. else
  113. return _format (f, arg1, ...)
  114. end
  115. end
  116. --- Justify a string.
  117. -- When the string is longer than w, it is truncated (left or right
  118. -- according to the sign of w).
  119. -- @param s string to justify
  120. -- @param w width to justify to (-ve means right-justify; +ve means
  121. -- left-justify)
  122. -- @param p string to pad with (default: <code>" "</code>)
  123. -- @return justified string
  124. function pad (s, w, p)
  125. p = rep (p or " ", math.abs (w))
  126. if w < 0 then
  127. return sub (p .. s, w)
  128. end
  129. return sub (s .. p, 1, w)
  130. end
  131. --- Wrap a string into a paragraph.
  132. -- @param s string to wrap
  133. -- @param w width to wrap to (default: 78)
  134. -- @param ind indent (default: 0)
  135. -- @param ind1 indent of first line (default: ind)
  136. -- @return wrapped paragraph
  137. function wrap (s, w, ind, ind1)
  138. w = w or 78
  139. ind = ind or 0
  140. ind1 = ind1 or ind
  141. assert (ind1 < w and ind < w,
  142. "the indents must be less than the line width")
  143. s = rep (" ", ind1) .. s
  144. local lstart, len = 1, len (s)
  145. while len - lstart > w - ind do
  146. local i = lstart + w - ind
  147. while i > lstart and sub (s, i, i) ~= " " do
  148. i = i - 1
  149. end
  150. local j = i
  151. while j > lstart and sub (s, j, j) == " " do
  152. j = j - 1
  153. end
  154. s = sub (s, 1, j) .. "\n" .. rep (" ", ind) ..
  155. sub (s, i + 1, -1)
  156. local change = ind + 1 - (i - j)
  157. lstart = j + change
  158. len = len + change
  159. end
  160. return s
  161. end
  162. --- Write a number using SI suffixes.
  163. -- The number is always written to 3 s.f.
  164. -- @param n number
  165. -- @return string
  166. function numbertosi (n)
  167. local SIprefix = {
  168. [-8] = "y", [-7] = "z", [-6] = "a", [-5] = "f",
  169. [-4] = "p", [-3] = "n", [-2] = "mu", [-1] = "m",
  170. [0] = "", [1] = "k", [2] = "M", [3] = "G",
  171. [4] = "T", [5] = "P", [6] = "E", [7] = "Z",
  172. [8] = "Y"
  173. }
  174. local t = format("% #.2e", n)
  175. local _, _, m, e = t:find(".(.%...)e(.+)")
  176. local man, exp = tonumber (m), tonumber (e)
  177. local siexp = math.floor (exp / 3)
  178. local shift = exp - siexp * 3
  179. local s = SIprefix[siexp] or "e" .. tostring (siexp)
  180. man = man * (10 ^ shift)
  181. return tostring (man) .. s
  182. end
  183. --- Do find, returning captures as a list.
  184. -- @param s target string
  185. -- @param p pattern
  186. -- @param init start position (default: 1)
  187. -- @param plain inhibit magic characters (default: nil)
  188. -- @return start of match, end of match, table of captures
  189. function tfind (s, p, init, plain)
  190. local function pack (from, to, ...)
  191. return from, to, {...}
  192. end
  193. return pack (p.find (s, p, init, plain))
  194. end
  195. --- Do multiple <code>find</code>s on a string.
  196. -- @param s target string
  197. -- @param p pattern
  198. -- @param init start position (default: 1)
  199. -- @param plain inhibit magic characters (default: nil)
  200. -- @return list of <code>{from, to; capt = {captures}}</code>
  201. function finds (s, p, init, plain)
  202. init = init or 1
  203. local l = {}
  204. local from, to, r
  205. repeat
  206. from, to, r = tfind (s, p, init, plain)
  207. if from ~= nil then
  208. table.insert (l, {from, to, capt = r})
  209. init = to + 1
  210. end
  211. until not from
  212. return l
  213. end
  214. --- Perform multiple calls to gsub.
  215. -- @param s string to call gsub on
  216. -- @param sub <code>{pattern1=replacement1 ...}</code>
  217. -- @param n upper limit on replacements (default: infinite)
  218. -- @return result string
  219. -- @return number of replacements made
  220. function gsubs (s, sub, n)
  221. local r = 0
  222. for i, v in pairs (sub) do
  223. local rep
  224. if n ~= nil then
  225. s, rep = gsub (s, i, v, n)
  226. r = r + rep
  227. n = n - rep
  228. if n == 0 then
  229. break
  230. end
  231. else
  232. s, rep = i.gsub (s, i, v)
  233. r = r + rep
  234. end
  235. end
  236. return s, r
  237. end
  238. --- Split a string at a given separator.
  239. -- FIXME: Consider Perl and Python versions.
  240. -- @param s string to split
  241. -- @param sep separator regex
  242. -- @return list of strings
  243. function split (s, sep)
  244. -- finds gets a list of {from, to, capt = {}} lists; we then
  245. -- flatten the result, discarding the captures, and prepend 0 (1
  246. -- before the first character) and append 0 (1 after the last
  247. -- character), and then read off the result in pairs.
  248. local pairs = list.concat ({0}, list.flatten (finds (s, sep)), {0})
  249. local l = {}
  250. for i = 1, #pairs, 2 do
  251. table.insert (l, sub (s, pairs[i] + 1, pairs[i + 1] - 1))
  252. end
  253. return l
  254. end
  255. --- Remove leading matter from a string.
  256. -- @param s string
  257. -- @param r leading regex (default: <code>"%s+"</code>)
  258. -- @return string without leading r
  259. function ltrim (s, r)
  260. r = r or "%s+"
  261. return (gsub (s, "^" .. r, ""))
  262. end
  263. --- Remove trailing matter from a string.
  264. -- @param s string
  265. -- @param r trailing regex (default: <code>"%s+"</code>)
  266. -- @return string without trailing r
  267. function rtrim (s, r)
  268. r = r or "%s+"
  269. return (gsub (s, r .. "$", ""))
  270. end
  271. --- Remove leading and trailing matter from a string.
  272. -- @param s string
  273. -- @param r leading/trailing regex (default: <code>"%s+"</code>)
  274. -- @return string without leading/trailing r
  275. function trim (s, r)
  276. return rtrim (ltrim (s, r), r)
  277. end