/xpath.lua

http://github.com/bkersten/lua-xpath · Lua · 358 lines · 314 code · 16 blank · 28 comment · 27 complexity · 0c9185d6dbcb4340e9c34226eb89c5f9 MD5 · raw file

  1. --[[
  2. Author: Ben Kersten
  3. Copyright © 2011 Quest Software
  4. Permission is hereby granted, free of charge, to any person obtaining
  5. a copy of this software and associated documentation files (the
  6. "Software"), to deal in the Software without restriction, including
  7. without limitation the rights to use, copy, modify, merge, publish,
  8. distribute, sublicense, and/or sell copies of the Software, and to
  9. permit persons to whom the Software is furnished to do so, subject
  10. to the following conditions:
  11. The above copyright notice and this permission notice shall be included
  12. in all copies or substantial portions of the Software.
  13. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  14. EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  15. MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  16. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  17. CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  18. TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  19. SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  20. ]]
  21. -- NOTE: no support for full axis names; uses abreviated syntax
  22. -- TODO: add support for node[last()]
  23. string.trim = string.trim or function(s)
  24. return (string.gsub(s, "^%s*(.-)%s*$", "%1"))
  25. end
  26. string.split = string.split or function(str, pat)
  27. -- http://lua-users.org/wiki/SplitJoin
  28. assert(type(str) == 'string', debug.traceback())
  29. local t = {}
  30. local fpat = "(.-)" .. pat
  31. local last_end = 1
  32. local s, e, cap = str:find(fpat, 1)
  33. while s do
  34. if s ~= 1 or cap ~= "" then
  35. t[#t+1] = cap
  36. end
  37. last_end = e+1
  38. s, e, cap = str:find(fpat, last_end)
  39. end
  40. if last_end <= #str then
  41. cap = str:sub(last_end)
  42. t[#t+1] = cap
  43. end
  44. return t
  45. end
  46. local xpath = {}
  47. local function insertToTable(t, leaf, selection)
  48. if type(leaf) == "table" then
  49. if selection == nil then
  50. t[#t+1] = leaf
  51. elseif selection == "text()" then
  52. t[#t+1] = leaf[1]
  53. elseif selection:find("@") == 1 then
  54. if selection[2] == '*' then
  55. for i,v in ipairs(leaf.attr) do
  56. t[#t+1] = leaf.attr[v]
  57. end
  58. else
  59. t[#t+1] = leaf.attr[selection:sub(2)]
  60. end
  61. end
  62. end
  63. end
  64. local function eval_predicate(node, expression)
  65. assert(type(expression) == 'string', debug.traceback())
  66. local expression_parts = string.split(expression, ' and ')
  67. for _,expr in ipairs(expression_parts) do
  68. local name_start = expr:find("@") or 0
  69. local name_end = expr:find("=") or 0
  70. local name = expr:sub(name_start+1, name_end-1)
  71. local val = nil
  72. if name_end > 0 then
  73. val = expr:sub(name_end+1)
  74. end
  75. if val then
  76. local n
  77. val,n = string.gsub(val, [[^%s*'([^']*)'%s*$]], "%1") -- trim and remove ' '
  78. if n == 0 then
  79. val = string.gsub(val, [[^%s*"([^"]*)"%s*$]], "%1") -- trim and remove " "
  80. end
  81. end
  82. if name_start > 0 then
  83. -- attribute
  84. if name == '*' then
  85. if #node.attr == 0 then
  86. return false
  87. end
  88. elseif node.attr[name] == nil then
  89. return false
  90. elseif val then
  91. if val ~= node.attr[name] then
  92. return false
  93. end
  94. end
  95. else
  96. -- child node
  97. local pred_result = false
  98. for _,child in ipairs(node) do
  99. if child.tag == name then
  100. if val then
  101. -- check text() value
  102. if child[1] and child[1] == val then
  103. pred_result = true
  104. end
  105. else
  106. pred_result = true
  107. end
  108. end
  109. end
  110. if not pred_result then
  111. return false
  112. end
  113. end
  114. end
  115. return true
  116. end
  117. local function match(node, tag_name, expression)
  118. if tag_name ~= node.tag and tag_name ~= '*' then
  119. return false
  120. end
  121. if expression == nil then
  122. return true
  123. end
  124. return eval_predicate(node, expression)
  125. end
  126. -- NOTE: descendant-or-self::node() // is represented by a space
  127. local function parseNodes(xmlNode, segments, idx, nodes, selection)
  128. if idx > #segments then
  129. return {}
  130. end
  131. local segment = segments[idx]
  132. if
  133. segment.tag == '..' --or segment.tag == "parent::node()"
  134. then
  135. return { idx+1 } -- return next idx to continue from at parent
  136. end
  137. local positions = {}
  138. local parent_positions = {}
  139. if
  140. segment.tag == "." --or segment.tag == "self::node()"
  141. then
  142. local pos_t = parseNodes(xmlNode, segments, idx+1, nodes, selection)
  143. for i=1,#pos_t do
  144. local pos = pos_t[i]
  145. positions[#positions+1] = pos
  146. end
  147. pos_t = nil
  148. end
  149. if
  150. segment.tag == " " --or segment.tag == "descendant-or-self::node()"
  151. then
  152. local pos_t = parseNodes(xmlNode, segments, idx+1, nodes, selection)
  153. for i=1,#pos_t do
  154. parent_positions[#parent_positions+1] = pos_t[i]
  155. end
  156. pos_t = nil
  157. for _,node in ipairs(xmlNode) do
  158. if type(node) == "table" then
  159. if node.tag ~= nil and node.attr ~= nil then
  160. local pos_t = parseNodes(node, segments, idx, nodes, selection)
  161. for i=1,#pos_t do
  162. positions[#positions+1] = pos_t[i]
  163. end
  164. pos_t = nil
  165. end
  166. end
  167. end
  168. end
  169. for _,node in ipairs(xmlNode) do
  170. if type(node) == "table" then
  171. if node.tag ~= nil and node.attr ~= nil then
  172. local found = match(node, segment.tag, segment.expression)
  173. if found then
  174. segment.cur_index = segment.cur_index+1
  175. local insert = true
  176. if segment.index then
  177. --print("index: " .. segment.index .. ", cur_index: " .. segment.cur_index)
  178. if segment.cur_index == segment.index then
  179. insert = true
  180. else
  181. insert = false
  182. end
  183. end
  184. if insert then
  185. if #segments == idx then
  186. insertToTable(nodes, node, selection)
  187. else
  188. local pos_t = parseNodes(node, segments, idx+1, nodes, selection)
  189. for i=1,#pos_t do
  190. local pos = pos_t[i]
  191. positions[#positions+1] = pos
  192. end
  193. pos_t = nil
  194. end
  195. end
  196. end
  197. end
  198. end
  199. end
  200. for i=1,#positions do
  201. local cur_pos = positions[i]
  202. if cur_pos > #segments then
  203. insertToTable(nodes, xmlNode, selection)
  204. elseif segments[cur_pos] == '..' then
  205. parent_positions[#parent_positions+1] = cur_pos+1
  206. else
  207. local pos_t = parseNodes(xmlNode, segments, cur_pos, nodes, selection)
  208. for i=1,#pos_t do
  209. parent_positions[#parent_positions+1] = pos_t[i]
  210. end
  211. pos_t = nil
  212. end
  213. end
  214. return parent_positions
  215. end
  216. local function select_nodes(xmlTree, query, nodes)
  217. if
  218. query:find("///") ~= nil or
  219. query:find("//%.%.") ~= nil
  220. then
  221. -- invalid queries
  222. return
  223. end
  224. local query = string.trim(query)
  225. if string.len(query) == 0 then
  226. return
  227. end
  228. query = string.gsub(query, "//", "/ /")
  229. local segments = string.split(query, '/')
  230. local last_tag = segments[#segments]
  231. local selection = nil
  232. if
  233. last_tag:find("@") == 1 or
  234. last_tag:find("text()") == 1
  235. then
  236. selection = last_tag
  237. segments[#segments] = nil
  238. end
  239. local query_segments = {}
  240. for _,segment in ipairs(segments) do
  241. local pred_start = segment:find("[[]") or 0
  242. local pred_end = segment:find("[]]")
  243. local tag = segment:sub(1, pred_start-1)
  244. --print("tag: '" .. tag .. "'")
  245. local expression = nil
  246. local index = nil
  247. if pred_start > 0 and pred_end then
  248. expression = segment:sub(pred_start+1, pred_end-1)
  249. if pred_end < #segment then
  250. local idx_start = segment:find("[[]", pred_end+1)
  251. local idx_end = segment:find("[]]", pred_end+1)
  252. if idx_start and idx_end then
  253. index = segment:sub(idx_start+1, idx_end-1)
  254. end
  255. elseif tonumber(expression) then
  256. index = expression
  257. expression = nil
  258. end
  259. end
  260. query_segments[#query_segments+1] = {
  261. ['tag'] = tag,
  262. ['expression'] = expression,
  263. ['index'] = tonumber(index),
  264. ['cur_index'] = 0,
  265. }
  266. end
  267. parseNodes(xmlTree, query_segments, 1, nodes, selection)
  268. end
  269. local function selectNodes(xml_root, query)
  270. assert(type(xml_root) == "table", debug.traceback())
  271. assert(type(query) == "string", debug.traceback())
  272. local queries = {}
  273. if query:find('|') ~= nil then
  274. queries = string.split(query, '|')
  275. else
  276. queries = { query }
  277. end
  278. local nodes = {}
  279. for i=1, #queries do
  280. local q = queries[i]
  281. local tree = xml_root
  282. local c = q:sub(1,1)
  283. --print(c)
  284. if c == '/' then
  285. -- push xml onto xmlTree so that parseNodes doesn't need a special case for root
  286. tree = { [1] = xml_root }
  287. end
  288. select_nodes(tree, q, nodes)
  289. end
  290. return nodes
  291. end
  292. xpath.selectNodes = selectNodes
  293. return xpath