mirror of
https://github.com/ansible-collections/community.general.git
synced 2025-07-23 13:20:23 -07:00
Various unicode and backslash escape cleanups
* Do backslash escape parsing in parse_kv() [was being done in the copy module purely for newlines in the copy module's content param before] * Make parse_kv always return unicode * Add bandaid to transform args to unicode until we can fix things calling parse_kv to always send it unicode. * Make split_args deal with unicode internally. Warning, no bandaid for things calling split_args without giving it unicode (shouldn't matter as dealt with str internally before) * Fix copy and unarchive action plugins to not use setdefaultencoding * Remove escaping from copy (it was broken and made content into latin-1 sometimes). escaping is now in parse_kv. * Expect that content is now a unicode string so transform to bytes just before writing to the file. * Add initial unittests for split_args and parse_kv. 4 failing tests.because split_args is injecting extra newlines.
This commit is contained in:
parent
1cc2135a0d
commit
43c1a97447
4 changed files with 143 additions and 44 deletions
|
@ -19,6 +19,27 @@
|
||||||
from __future__ import (absolute_import, division, print_function)
|
from __future__ import (absolute_import, division, print_function)
|
||||||
__metaclass__ = type
|
__metaclass__ = type
|
||||||
|
|
||||||
|
import re
|
||||||
|
import codecs
|
||||||
|
|
||||||
|
# Decode escapes adapted from rspeer's answer here:
|
||||||
|
# http://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python
|
||||||
|
_HEXCHAR = '[a-fA-F0-9]'
|
||||||
|
_ESCAPE_SEQUENCE_RE = re.compile(r'''
|
||||||
|
( \\U{0} # 8-digit hex escapes
|
||||||
|
| \\u{1} # 4-digit hex escapes
|
||||||
|
| \\x{2} # 2-digit hex escapes
|
||||||
|
| \\[0-7]{{1,3}} # Octal escapes
|
||||||
|
| \\N\{{[^}}]+\}} # Unicode characters by name
|
||||||
|
| \\[\\'"abfnrtv] # Single-character escapes
|
||||||
|
)'''.format(_HEXCHAR*8, _HEXCHAR*4, _HEXCHAR*2), re.UNICODE | re.VERBOSE)
|
||||||
|
|
||||||
|
def _decode_escapes(s):
|
||||||
|
def decode_match(match):
|
||||||
|
return codecs.decode(match.group(0), 'unicode-escape')
|
||||||
|
|
||||||
|
return _ESCAPE_SEQUENCE_RE.sub(decode_match, s)
|
||||||
|
|
||||||
def parse_kv(args, check_raw=False):
|
def parse_kv(args, check_raw=False):
|
||||||
'''
|
'''
|
||||||
Convert a string of key/value items to a dict. If any free-form params
|
Convert a string of key/value items to a dict. If any free-form params
|
||||||
|
@ -27,6 +48,10 @@ def parse_kv(args, check_raw=False):
|
||||||
they will simply be ignored.
|
they will simply be ignored.
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
### FIXME: args should already be a unicode string
|
||||||
|
from ansible.utils.unicode import to_unicode
|
||||||
|
args = to_unicode(args, nonstring='passthru')
|
||||||
|
|
||||||
options = {}
|
options = {}
|
||||||
if args is not None:
|
if args is not None:
|
||||||
try:
|
try:
|
||||||
|
@ -39,6 +64,7 @@ def parse_kv(args, check_raw=False):
|
||||||
|
|
||||||
raw_params = []
|
raw_params = []
|
||||||
for x in vargs:
|
for x in vargs:
|
||||||
|
x = _decode_escapes(x)
|
||||||
if "=" in x:
|
if "=" in x:
|
||||||
pos = 0
|
pos = 0
|
||||||
try:
|
try:
|
||||||
|
@ -72,7 +98,7 @@ def parse_kv(args, check_raw=False):
|
||||||
# recombine the free-form params, if any were found, and assign
|
# recombine the free-form params, if any were found, and assign
|
||||||
# them to a special option for use later by the shell/command module
|
# them to a special option for use later by the shell/command module
|
||||||
if len(raw_params) > 0:
|
if len(raw_params) > 0:
|
||||||
options['_raw_params'] = ' '.join(raw_params)
|
options[u'_raw_params'] = ' '.join(raw_params)
|
||||||
|
|
||||||
return options
|
return options
|
||||||
|
|
||||||
|
@ -126,17 +152,11 @@ def split_args(args):
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# the list of params parsed out of the arg string
|
# the list of params parsed out of the arg string
|
||||||
# this is going to be the result value when we are donei
|
# this is going to be the result value when we are done
|
||||||
params = []
|
params = []
|
||||||
|
|
||||||
# here we encode the args, so we have a uniform charset to
|
# Initial split on white space
|
||||||
# work with, and split on white space
|
|
||||||
args = args.strip()
|
args = args.strip()
|
||||||
try:
|
|
||||||
args = args.encode('utf-8')
|
|
||||||
do_decode = True
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
do_decode = False
|
|
||||||
items = args.strip().split('\n')
|
items = args.strip().split('\n')
|
||||||
|
|
||||||
# iterate over the tokens, and reassemble any that may have been
|
# iterate over the tokens, and reassemble any that may have been
|
||||||
|
@ -242,10 +262,6 @@ def split_args(args):
|
||||||
if print_depth or block_depth or comment_depth or inside_quotes:
|
if print_depth or block_depth or comment_depth or inside_quotes:
|
||||||
raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
|
raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
|
||||||
|
|
||||||
# finally, we decode each param back to the unicode it was in the arg string
|
|
||||||
if do_decode:
|
|
||||||
params = [x.decode('utf-8') for x in params]
|
|
||||||
|
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def is_quoted(data):
|
def is_quoted(data):
|
||||||
|
|
|
@ -30,18 +30,7 @@ from ansible import constants as C
|
||||||
from ansible.plugins.action import ActionBase
|
from ansible.plugins.action import ActionBase
|
||||||
from ansible.utils.boolean import boolean
|
from ansible.utils.boolean import boolean
|
||||||
from ansible.utils.hashing import checksum
|
from ansible.utils.hashing import checksum
|
||||||
|
from ansible.utils.unicode import to_bytes
|
||||||
### FIXME: Find a different way to fix 3518 as sys.defaultencoding() breaks
|
|
||||||
# the python interpreter in subtle ways. It appears that this does not fix
|
|
||||||
# 3518 anyway (using binary files via lookup(). Instead, it tries to fix
|
|
||||||
# utf-8 strings in the content parameter. That should be fixable by properly
|
|
||||||
# encoding or decoding the value before we write it to a file.
|
|
||||||
#
|
|
||||||
## fixes https://github.com/ansible/ansible/issues/3518
|
|
||||||
# http://mypy.pythonblogs.com/12_mypy/archive/1253_workaround_for_python_bug_ascii_codec_cant_encode_character_uxa0_in_position_111_ordinal_not_in_range128.html
|
|
||||||
#import sys
|
|
||||||
#reload(sys)
|
|
||||||
#sys.setdefaultencoding("utf8")
|
|
||||||
|
|
||||||
|
|
||||||
class ActionModule(ActionBase):
|
class ActionModule(ActionBase):
|
||||||
|
@ -55,16 +44,6 @@ class ActionModule(ActionBase):
|
||||||
raw = boolean(self._task.args.get('raw', 'no'))
|
raw = boolean(self._task.args.get('raw', 'no'))
|
||||||
force = boolean(self._task.args.get('force', 'yes'))
|
force = boolean(self._task.args.get('force', 'yes'))
|
||||||
|
|
||||||
# content with newlines is going to be escaped to safely load in yaml
|
|
||||||
# now we need to unescape it so that the newlines are evaluated properly
|
|
||||||
# when writing the file to disk
|
|
||||||
if content:
|
|
||||||
if isinstance(content, unicode):
|
|
||||||
try:
|
|
||||||
content = content.decode('unicode-escape')
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# FIXME: first available file needs to be reworked somehow...
|
# FIXME: first available file needs to be reworked somehow...
|
||||||
#if (source is None and content is None and not 'first_available_file' in inject) or dest is None:
|
#if (source is None and content is None and not 'first_available_file' in inject) or dest is None:
|
||||||
# result=dict(failed=True, msg="src (or content) and dest are required")
|
# result=dict(failed=True, msg="src (or content) and dest are required")
|
||||||
|
@ -86,7 +65,7 @@ class ActionModule(ActionBase):
|
||||||
try:
|
try:
|
||||||
# If content comes to us as a dict it should be decoded json.
|
# If content comes to us as a dict it should be decoded json.
|
||||||
# We need to encode it back into a string to write it out.
|
# We need to encode it back into a string to write it out.
|
||||||
if type(content) is dict:
|
if isinstance(content, dict):
|
||||||
content_tempfile = self._create_content_tempfile(json.dumps(content))
|
content_tempfile = self._create_content_tempfile(json.dumps(content))
|
||||||
else:
|
else:
|
||||||
content_tempfile = self._create_content_tempfile(content)
|
content_tempfile = self._create_content_tempfile(content)
|
||||||
|
@ -316,7 +295,8 @@ class ActionModule(ActionBase):
|
||||||
def _create_content_tempfile(self, content):
|
def _create_content_tempfile(self, content):
|
||||||
''' Create a tempfile containing defined content '''
|
''' Create a tempfile containing defined content '''
|
||||||
fd, content_tempfile = tempfile.mkstemp()
|
fd, content_tempfile = tempfile.mkstemp()
|
||||||
f = os.fdopen(fd, 'w')
|
f = os.fdopen(fd, 'wb')
|
||||||
|
content = to_bytes(content)
|
||||||
try:
|
try:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
except Exception, err:
|
except Exception, err:
|
||||||
|
|
|
@ -17,16 +17,10 @@
|
||||||
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import pipes
|
||||||
|
|
||||||
from ansible.plugins.action import ActionBase
|
from ansible.plugins.action import ActionBase
|
||||||
|
|
||||||
## fixes https://github.com/ansible/ansible/issues/3518
|
|
||||||
# http://mypy.pythonblogs.com/12_mypy/archive/1253_workaround_for_python_bug_ascii_codec_cant_encode_character_uxa0_in_position_111_ordinal_not_in_range128.html
|
|
||||||
import sys
|
|
||||||
reload(sys)
|
|
||||||
sys.setdefaultencoding("utf8")
|
|
||||||
import pipes
|
|
||||||
|
|
||||||
|
|
||||||
class ActionModule(ActionBase):
|
class ActionModule(ActionBase):
|
||||||
|
|
||||||
|
|
109
v2/test/parsing/test_splitter.py
Normal file
109
v2/test/parsing/test_splitter.py
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
# coding: utf-8
|
||||||
|
# (c) 2015, Toshio Kuratomi <tkuratomi@ansible.com>
|
||||||
|
#
|
||||||
|
# This file is part of Ansible
|
||||||
|
#
|
||||||
|
# Ansible is free software: you can redistribute it and/or modify
|
||||||
|
# it under the terms of the GNU General Public License as published by
|
||||||
|
# the Free Software Foundation, either version 3 of the License, or
|
||||||
|
# (at your option) any later version.
|
||||||
|
#
|
||||||
|
# Ansible is distributed in the hope that it will be useful,
|
||||||
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
# GNU General Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License
|
||||||
|
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
# Make coding more python3-ish
|
||||||
|
from __future__ import (absolute_import, division, print_function)
|
||||||
|
__metaclass__ = type
|
||||||
|
|
||||||
|
from nose import tools
|
||||||
|
from ansible.compat.tests import unittest
|
||||||
|
|
||||||
|
from ansible.parsing.splitter import split_args, parse_kv
|
||||||
|
|
||||||
|
|
||||||
|
# Tests using nose's test generators cannot use unittest base class.
|
||||||
|
# http://nose.readthedocs.org/en/latest/writing_tests.html#test-generators
|
||||||
|
class TestSplitter_Gen:
|
||||||
|
SPLIT_DATA = (
|
||||||
|
(u'a',
|
||||||
|
[u'a'],
|
||||||
|
{u'_raw_params': u'a'}),
|
||||||
|
(u'a=b',
|
||||||
|
[u'a=b'],
|
||||||
|
{u'a': u'b'}),
|
||||||
|
(u'a="foo bar"',
|
||||||
|
[u'a="foo bar"'],
|
||||||
|
{u'a': u'foo bar'}),
|
||||||
|
(u'"foo bar baz"',
|
||||||
|
[u'"foo bar baz"'],
|
||||||
|
{u'_raw_params': '"foo bar baz"'}),
|
||||||
|
(u'foo bar baz',
|
||||||
|
[u'foo', u'bar', u'baz'],
|
||||||
|
{u'_raw_params': u'foo bar baz'}),
|
||||||
|
(u'a=b c="foo bar"',
|
||||||
|
[u'a=b', u'c="foo bar"'],
|
||||||
|
{u'a': u'b', u'c': u'foo bar'}),
|
||||||
|
(u'a="echo \\"hello world\\"" b=bar',
|
||||||
|
[u'a="echo \\"hello world\\""', u'b=bar'],
|
||||||
|
{u'a': u'echo "hello world"', u'b': u'bar'}),
|
||||||
|
(u'a="multi\nline"',
|
||||||
|
[u'a="multi\nline"'],
|
||||||
|
{u'a': u'multi\nline'}),
|
||||||
|
(u'a="blank\n\nline"',
|
||||||
|
[u'a="blank\n\nline"'],
|
||||||
|
{u'a': u'blank\n\nline'}),
|
||||||
|
(u'a="blank\n\n\nlines"',
|
||||||
|
[u'a="blank\n\n\nlines"'],
|
||||||
|
{u'a': u'blank\n\n\nlines'}),
|
||||||
|
(u'a="a long\nmessage\\\nabout a thing\n"',
|
||||||
|
[u'a="a long\nmessage\\\nabout a thing\n"'],
|
||||||
|
{u'a': u'a long\nmessage\\\nabout a thing\n'}),
|
||||||
|
(u'a="multiline\nmessage1\\\n" b="multiline\nmessage2\\\n"',
|
||||||
|
[u'a="multiline\nmessage1\\\n"', u'b="multiline\nmessage2\\\n"'],
|
||||||
|
{u'a': 'multiline\nmessage1\\\n', u'b': u'multiline\nmessage2\\\n'}),
|
||||||
|
(u'a={{jinja}}',
|
||||||
|
[u'a={{jinja}}'],
|
||||||
|
{u'a': u'{{jinja}}'}),
|
||||||
|
(u'a={{ jinja }}',
|
||||||
|
[u'a={{ jinja }}'],
|
||||||
|
{u'a': u'{{ jinja }}'}),
|
||||||
|
(u'a="{{jinja}}"',
|
||||||
|
[u'a="{{jinja}}"'],
|
||||||
|
{u'a': u'{{jinja}}'}),
|
||||||
|
(u'a={{ jinja }}{{jinja2}}',
|
||||||
|
[u'a={{ jinja }}{{jinja2}}'],
|
||||||
|
{u'a': u'{{ jinja }}{{jinja2}}'}),
|
||||||
|
(u'a="{{ jinja }}{{jinja2}}"',
|
||||||
|
[u'a="{{ jinja }}{{jinja2}}"'],
|
||||||
|
{u'a': u'{{ jinja }}{{jinja2}}'}),
|
||||||
|
(u'a={{jinja}} b={{jinja2}}',
|
||||||
|
[u'a={{jinja}}', u'b={{jinja2}}'],
|
||||||
|
{u'a': u'{{jinja}}', u'b': u'{{jinja2}}'}),
|
||||||
|
(u'a="café eñyei"',
|
||||||
|
[u'a="café eñyei"'],
|
||||||
|
{u'a': u'café eñyei'}),
|
||||||
|
(u'a=café b=eñyei',
|
||||||
|
[u'a=café', u'b=eñyei'],
|
||||||
|
{u'a': u'café', u'b': u'eñyei'}),
|
||||||
|
)
|
||||||
|
|
||||||
|
def check_split_args(self, args, expected):
|
||||||
|
tools.eq_(split_args(args), expected)
|
||||||
|
|
||||||
|
def test_split_args(self):
|
||||||
|
for datapoint in self.SPLIT_DATA:
|
||||||
|
yield self.check_split_args, datapoint[0], datapoint[1]
|
||||||
|
|
||||||
|
def check_parse_kv(self, args, expected):
|
||||||
|
tools.eq_(parse_kv(args), expected)
|
||||||
|
|
||||||
|
def test_parse_kv(self):
|
||||||
|
for datapoint in self.SPLIT_DATA:
|
||||||
|
try:
|
||||||
|
yield self.check_parse_kv, datapoint[0], datapoint[2]
|
||||||
|
except: pass
|
Loading…
Add table
Add a link
Reference in a new issue