PageRenderTime 72ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/v2/ansible/parsing/splitter.py

https://gitlab.com/18runt88/ansible
Python | 273 lines | 209 code | 15 blank | 49 comment | 37 complexity | cdbe7a5d16878567fac450d58738d5fa MD5 | raw file
  1. # (c) 2014 James Cammarata, <jcammarata@ansible.com>
  2. #
  3. # This file is part of Ansible
  4. #
  5. # Ansible is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # Ansible is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with Ansible. If not, see <http://www.gnu.org/licenses/>.
  17. # Make coding more python3-ish
  18. from __future__ import (absolute_import, division, print_function)
  19. __metaclass__ = type
  20. import re
  21. import codecs
  22. # Decode escapes adapted from rspeer's answer here:
  23. # http://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python
  24. _HEXCHAR = '[a-fA-F0-9]'
  25. _ESCAPE_SEQUENCE_RE = re.compile(r'''
  26. ( \\U{0} # 8-digit hex escapes
  27. | \\u{1} # 4-digit hex escapes
  28. | \\x{2} # 2-digit hex escapes
  29. | \\[0-7]{{1,3}} # Octal escapes
  30. | \\N\{{[^}}]+\}} # Unicode characters by name
  31. | \\[\\'"abfnrtv] # Single-character escapes
  32. )'''.format(_HEXCHAR*8, _HEXCHAR*4, _HEXCHAR*2), re.UNICODE | re.VERBOSE)
  33. def _decode_escapes(s):
  34. def decode_match(match):
  35. return codecs.decode(match.group(0), 'unicode-escape')
  36. return _ESCAPE_SEQUENCE_RE.sub(decode_match, s)
  37. def parse_kv(args, check_raw=False):
  38. '''
  39. Convert a string of key/value items to a dict. If any free-form params
  40. are found and the check_raw option is set to True, they will be added
  41. to a new parameter called '_raw_params'. If check_raw is not enabled,
  42. they will simply be ignored.
  43. '''
  44. ### FIXME: args should already be a unicode string
  45. from ansible.utils.unicode import to_unicode
  46. args = to_unicode(args, nonstring='passthru')
  47. options = {}
  48. if args is not None:
  49. try:
  50. vargs = split_args(args)
  51. except ValueError as ve:
  52. if 'no closing quotation' in str(ve).lower():
  53. raise errors.AnsibleError("error parsing argument string, try quoting the entire line.")
  54. else:
  55. raise
  56. raw_params = []
  57. for x in vargs:
  58. x = _decode_escapes(x)
  59. if "=" in x:
  60. pos = 0
  61. try:
  62. while True:
  63. pos = x.index('=', pos + 1)
  64. if pos > 0 and x[pos - 1] != '\\':
  65. break
  66. except ValueError:
  67. # ran out of string, but we must have some escaped equals,
  68. # so replace those and append this to the list of raw params
  69. raw_params.append(x.replace('\\=', '='))
  70. continue
  71. k = x[:pos]
  72. v = x[pos + 1:]
  73. # only internal variables can start with an underscore, so
  74. # we don't allow users to set them directy in arguments
  75. if k.startswith('_'):
  76. raise AnsibleError("invalid parameter specified: '%s'" % k)
  77. # FIXME: make the retrieval of this list of shell/command
  78. # options a function, so the list is centralized
  79. if check_raw and k not in ('creates', 'removes', 'chdir', 'executable', 'warn'):
  80. raw_params.append(x)
  81. else:
  82. options[k.strip()] = unquote(v.strip())
  83. else:
  84. raw_params.append(x)
  85. # recombine the free-form params, if any were found, and assign
  86. # them to a special option for use later by the shell/command module
  87. if len(raw_params) > 0:
  88. options[u'_raw_params'] = ' '.join(raw_params)
  89. return options
  90. def _get_quote_state(token, quote_char):
  91. '''
  92. the goal of this block is to determine if the quoted string
  93. is unterminated in which case it needs to be put back together
  94. '''
  95. # the char before the current one, used to see if
  96. # the current character is escaped
  97. prev_char = None
  98. for idx, cur_char in enumerate(token):
  99. if idx > 0:
  100. prev_char = token[idx-1]
  101. if cur_char in '"\'' and prev_char != '\\':
  102. if quote_char:
  103. if cur_char == quote_char:
  104. quote_char = None
  105. else:
  106. quote_char = cur_char
  107. return quote_char
  108. def _count_jinja2_blocks(token, cur_depth, open_token, close_token):
  109. '''
  110. this function counts the number of opening/closing blocks for a
  111. given opening/closing type and adjusts the current depth for that
  112. block based on the difference
  113. '''
  114. num_open = token.count(open_token)
  115. num_close = token.count(close_token)
  116. if num_open != num_close:
  117. cur_depth += (num_open - num_close)
  118. if cur_depth < 0:
  119. cur_depth = 0
  120. return cur_depth
  121. def split_args(args):
  122. '''
  123. Splits args on whitespace, but intelligently reassembles
  124. those that may have been split over a jinja2 block or quotes.
  125. When used in a remote module, we won't ever have to be concerned about
  126. jinja2 blocks, however this function is/will be used in the
  127. core portions as well before the args are templated.
  128. example input: a=b c="foo bar"
  129. example output: ['a=b', 'c="foo bar"']
  130. Basically this is a variation shlex that has some more intelligence for
  131. how Ansible needs to use it.
  132. '''
  133. # the list of params parsed out of the arg string
  134. # this is going to be the result value when we are done
  135. params = []
  136. # Initial split on white space
  137. args = args.strip()
  138. items = args.strip().split('\n')
  139. # iterate over the tokens, and reassemble any that may have been
  140. # split on a space inside a jinja2 block.
  141. # ex if tokens are "{{", "foo", "}}" these go together
  142. # These variables are used
  143. # to keep track of the state of the parsing, since blocks and quotes
  144. # may be nested within each other.
  145. quote_char = None
  146. inside_quotes = False
  147. print_depth = 0 # used to count nested jinja2 {{ }} blocks
  148. block_depth = 0 # used to count nested jinja2 {% %} blocks
  149. comment_depth = 0 # used to count nested jinja2 {# #} blocks
  150. # now we loop over each split chunk, coalescing tokens if the white space
  151. # split occurred within quotes or a jinja2 block of some kind
  152. for itemidx,item in enumerate(items):
  153. # we split on spaces and newlines separately, so that we
  154. # can tell which character we split on for reassembly
  155. # inside quotation characters
  156. tokens = item.strip().split(' ')
  157. line_continuation = False
  158. for idx,token in enumerate(tokens):
  159. # if we hit a line continuation character, but
  160. # we're not inside quotes, ignore it and continue
  161. # on to the next token while setting a flag
  162. if token == '\\' and not inside_quotes:
  163. line_continuation = True
  164. continue
  165. # store the previous quoting state for checking later
  166. was_inside_quotes = inside_quotes
  167. quote_char = _get_quote_state(token, quote_char)
  168. inside_quotes = quote_char is not None
  169. # multiple conditions may append a token to the list of params,
  170. # so we keep track with this flag to make sure it only happens once
  171. # append means add to the end of the list, don't append means concatenate
  172. # it to the end of the last token
  173. appended = False
  174. # if we're inside quotes now, but weren't before, append the token
  175. # to the end of the list, since we'll tack on more to it later
  176. # otherwise, if we're inside any jinja2 block, inside quotes, or we were
  177. # inside quotes (but aren't now) concat this token to the last param
  178. if inside_quotes and not was_inside_quotes:
  179. params.append(token)
  180. appended = True
  181. elif print_depth or block_depth or comment_depth or inside_quotes or was_inside_quotes:
  182. if idx == 0 and was_inside_quotes:
  183. params[-1] = "%s%s" % (params[-1], token)
  184. elif len(tokens) > 1:
  185. spacer = ''
  186. if idx > 0:
  187. spacer = ' '
  188. params[-1] = "%s%s%s" % (params[-1], spacer, token)
  189. else:
  190. params[-1] = "%s\n%s" % (params[-1], token)
  191. appended = True
  192. # if the number of paired block tags is not the same, the depth has changed, so we calculate that here
  193. # and may append the current token to the params (if we haven't previously done so)
  194. prev_print_depth = print_depth
  195. print_depth = _count_jinja2_blocks(token, print_depth, "{{", "}}")
  196. if print_depth != prev_print_depth and not appended:
  197. params.append(token)
  198. appended = True
  199. prev_block_depth = block_depth
  200. block_depth = _count_jinja2_blocks(token, block_depth, "{%", "%}")
  201. if block_depth != prev_block_depth and not appended:
  202. params.append(token)
  203. appended = True
  204. prev_comment_depth = comment_depth
  205. comment_depth = _count_jinja2_blocks(token, comment_depth, "{#", "#}")
  206. if comment_depth != prev_comment_depth and not appended:
  207. params.append(token)
  208. appended = True
  209. # finally, if we're at zero depth for all blocks and not inside quotes, and have not
  210. # yet appended anything to the list of params, we do so now
  211. if not (print_depth or block_depth or comment_depth) and not inside_quotes and not appended and token != '':
  212. params.append(token)
  213. # if this was the last token in the list, and we have more than
  214. # one item (meaning we split on newlines), add a newline back here
  215. # to preserve the original structure
  216. if len(items) > 1 and itemidx != len(items) - 1 and not line_continuation:
  217. params[-1] += '\n'
  218. # always clear the line continuation flag
  219. line_continuation = False
  220. # If we're done and things are not at zero depth or we're still inside quotes,
  221. # raise an error to indicate that the args were unbalanced
  222. if print_depth or block_depth or comment_depth or inside_quotes:
  223. raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
  224. return params
  225. def is_quoted(data):
  226. return len(data) > 0 and (data[0] == '"' and data[-1] == '"' or data[0] == "'" and data[-1] == "'")
  227. def unquote(data):
  228. ''' removes first and last quotes from a string, if the string starts and ends with the same quotes '''
  229. if is_quoted(data):
  230. return data[1:-1]
  231. return data