diff --git a/v2/ansible/parsing/splitter.py b/v2/ansible/parsing/splitter.py
index 9705baf169..4af1c7b171 100644
--- a/v2/ansible/parsing/splitter.py
+++ b/v2/ansible/parsing/splitter.py
@@ -19,6 +19,27 @@
from __future__ import (absolute_import, division, print_function)
__metaclass__ = type
+import re
+import codecs
+
+# Decode escapes adapted from rspeer's answer here:
+# http://stackoverflow.com/questions/4020539/process-escape-sequences-in-a-string-in-python
+_HEXCHAR = '[a-fA-F0-9]'
+_ESCAPE_SEQUENCE_RE = re.compile(r'''
+ ( \\U{0} # 8-digit hex escapes
+ | \\u{1} # 4-digit hex escapes
+ | \\x{2} # 2-digit hex escapes
+ | \\[0-7]{{1,3}} # Octal escapes
+ | \\N\{{[^}}]+\}} # Unicode characters by name
+ | \\[\\'"abfnrtv] # Single-character escapes
+ )'''.format(_HEXCHAR*8, _HEXCHAR*4, _HEXCHAR*2), re.UNICODE | re.VERBOSE)
+
+def _decode_escapes(s):
+ def decode_match(match):
+ return codecs.decode(match.group(0), 'unicode-escape')
+
+ return _ESCAPE_SEQUENCE_RE.sub(decode_match, s)
+
def parse_kv(args, check_raw=False):
'''
Convert a string of key/value items to a dict. If any free-form params
@@ -27,6 +48,10 @@ def parse_kv(args, check_raw=False):
they will simply be ignored.
'''
+ ### FIXME: args should already be a unicode string
+ from ansible.utils.unicode import to_unicode
+ args = to_unicode(args, nonstring='passthru')
+
options = {}
if args is not None:
try:
@@ -39,6 +64,7 @@ def parse_kv(args, check_raw=False):
raw_params = []
for x in vargs:
+ x = _decode_escapes(x)
if "=" in x:
pos = 0
try:
@@ -72,7 +98,7 @@ def parse_kv(args, check_raw=False):
# recombine the free-form params, if any were found, and assign
# them to a special option for use later by the shell/command module
if len(raw_params) > 0:
- options['_raw_params'] = ' '.join(raw_params)
+ options[u'_raw_params'] = ' '.join(raw_params)
return options
@@ -126,17 +152,11 @@ def split_args(args):
'''
# the list of params parsed out of the arg string
- # this is going to be the result value when we are donei
+ # this is going to be the result value when we are done
params = []
- # here we encode the args, so we have a uniform charset to
- # work with, and split on white space
+ # Initial split on white space
args = args.strip()
- try:
- args = args.encode('utf-8')
- do_decode = True
- except UnicodeDecodeError:
- do_decode = False
items = args.strip().split('\n')
# iterate over the tokens, and reassemble any that may have been
@@ -242,10 +262,6 @@ def split_args(args):
if print_depth or block_depth or comment_depth or inside_quotes:
raise Exception("error while splitting arguments, either an unbalanced jinja2 block or quotes")
- # finally, we decode each param back to the unicode it was in the arg string
- if do_decode:
- params = [x.decode('utf-8') for x in params]
-
return params
def is_quoted(data):
diff --git a/v2/ansible/plugins/action/copy.py b/v2/ansible/plugins/action/copy.py
index 09990743bb..89c2fde7b3 100644
--- a/v2/ansible/plugins/action/copy.py
+++ b/v2/ansible/plugins/action/copy.py
@@ -30,18 +30,7 @@ from ansible import constants as C
from ansible.plugins.action import ActionBase
from ansible.utils.boolean import boolean
from ansible.utils.hashing import checksum
-
-### FIXME: Find a different way to fix 3518 as sys.defaultencoding() breaks
-# the python interpreter in subtle ways. It appears that this does not fix
-# 3518 anyway (using binary files via lookup(). Instead, it tries to fix
-# utf-8 strings in the content parameter. That should be fixable by properly
-# encoding or decoding the value before we write it to a file.
-#
-## fixes https://github.com/ansible/ansible/issues/3518
-# http://mypy.pythonblogs.com/12_mypy/archive/1253_workaround_for_python_bug_ascii_codec_cant_encode_character_uxa0_in_position_111_ordinal_not_in_range128.html
-#import sys
-#reload(sys)
-#sys.setdefaultencoding("utf8")
+from ansible.utils.unicode import to_bytes
class ActionModule(ActionBase):
@@ -55,16 +44,6 @@ class ActionModule(ActionBase):
raw = boolean(self._task.args.get('raw', 'no'))
force = boolean(self._task.args.get('force', 'yes'))
- # content with newlines is going to be escaped to safely load in yaml
- # now we need to unescape it so that the newlines are evaluated properly
- # when writing the file to disk
- if content:
- if isinstance(content, unicode):
- try:
- content = content.decode('unicode-escape')
- except UnicodeDecodeError:
- pass
-
# FIXME: first available file needs to be reworked somehow...
#if (source is None and content is None and not 'first_available_file' in inject) or dest is None:
# result=dict(failed=True, msg="src (or content) and dest are required")
@@ -86,7 +65,7 @@ class ActionModule(ActionBase):
try:
# If content comes to us as a dict it should be decoded json.
# We need to encode it back into a string to write it out.
- if type(content) is dict:
+ if isinstance(content, dict):
content_tempfile = self._create_content_tempfile(json.dumps(content))
else:
content_tempfile = self._create_content_tempfile(content)
@@ -316,7 +295,8 @@ class ActionModule(ActionBase):
def _create_content_tempfile(self, content):
''' Create a tempfile containing defined content '''
fd, content_tempfile = tempfile.mkstemp()
- f = os.fdopen(fd, 'w')
+ f = os.fdopen(fd, 'wb')
+ content = to_bytes(content)
try:
f.write(content)
except Exception, err:
diff --git a/v2/ansible/plugins/action/unarchive.py b/v2/ansible/plugins/action/unarchive.py
index f99d7e28e6..1b6cb354f0 100644
--- a/v2/ansible/plugins/action/unarchive.py
+++ b/v2/ansible/plugins/action/unarchive.py
@@ -17,16 +17,10 @@
# along with Ansible. If not, see .
import os
+import pipes
from ansible.plugins.action import ActionBase
-## fixes https://github.com/ansible/ansible/issues/3518
-# http://mypy.pythonblogs.com/12_mypy/archive/1253_workaround_for_python_bug_ascii_codec_cant_encode_character_uxa0_in_position_111_ordinal_not_in_range128.html
-import sys
-reload(sys)
-sys.setdefaultencoding("utf8")
-import pipes
-
class ActionModule(ActionBase):
diff --git a/v2/test/parsing/test_splitter.py b/v2/test/parsing/test_splitter.py
new file mode 100644
index 0000000000..fc2c05d36f
--- /dev/null
+++ b/v2/test/parsing/test_splitter.py
@@ -0,0 +1,109 @@
+# coding: utf-8
+# (c) 2015, Toshio Kuratomi
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible. If not, see .
+
+# Make coding more python3-ish
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+from nose import tools
+from ansible.compat.tests import unittest
+
+from ansible.parsing.splitter import split_args, parse_kv
+
+
+# Tests using nose's test generators cannot use unittest base class.
+# http://nose.readthedocs.org/en/latest/writing_tests.html#test-generators
+class TestSplitter_Gen:
+ SPLIT_DATA = (
+ (u'a',
+ [u'a'],
+ {u'_raw_params': u'a'}),
+ (u'a=b',
+ [u'a=b'],
+ {u'a': u'b'}),
+ (u'a="foo bar"',
+ [u'a="foo bar"'],
+ {u'a': u'foo bar'}),
+ (u'"foo bar baz"',
+ [u'"foo bar baz"'],
+ {u'_raw_params': '"foo bar baz"'}),
+ (u'foo bar baz',
+ [u'foo', u'bar', u'baz'],
+ {u'_raw_params': u'foo bar baz'}),
+ (u'a=b c="foo bar"',
+ [u'a=b', u'c="foo bar"'],
+ {u'a': u'b', u'c': u'foo bar'}),
+ (u'a="echo \\"hello world\\"" b=bar',
+ [u'a="echo \\"hello world\\""', u'b=bar'],
+ {u'a': u'echo "hello world"', u'b': u'bar'}),
+ (u'a="multi\nline"',
+ [u'a="multi\nline"'],
+ {u'a': u'multi\nline'}),
+ (u'a="blank\n\nline"',
+ [u'a="blank\n\nline"'],
+ {u'a': u'blank\n\nline'}),
+ (u'a="blank\n\n\nlines"',
+ [u'a="blank\n\n\nlines"'],
+ {u'a': u'blank\n\n\nlines'}),
+ (u'a="a long\nmessage\\\nabout a thing\n"',
+ [u'a="a long\nmessage\\\nabout a thing\n"'],
+ {u'a': u'a long\nmessage\\\nabout a thing\n'}),
+ (u'a="multiline\nmessage1\\\n" b="multiline\nmessage2\\\n"',
+ [u'a="multiline\nmessage1\\\n"', u'b="multiline\nmessage2\\\n"'],
+ {u'a': 'multiline\nmessage1\\\n', u'b': u'multiline\nmessage2\\\n'}),
+ (u'a={{jinja}}',
+ [u'a={{jinja}}'],
+ {u'a': u'{{jinja}}'}),
+ (u'a={{ jinja }}',
+ [u'a={{ jinja }}'],
+ {u'a': u'{{ jinja }}'}),
+ (u'a="{{jinja}}"',
+ [u'a="{{jinja}}"'],
+ {u'a': u'{{jinja}}'}),
+ (u'a={{ jinja }}{{jinja2}}',
+ [u'a={{ jinja }}{{jinja2}}'],
+ {u'a': u'{{ jinja }}{{jinja2}}'}),
+ (u'a="{{ jinja }}{{jinja2}}"',
+ [u'a="{{ jinja }}{{jinja2}}"'],
+ {u'a': u'{{ jinja }}{{jinja2}}'}),
+ (u'a={{jinja}} b={{jinja2}}',
+ [u'a={{jinja}}', u'b={{jinja2}}'],
+ {u'a': u'{{jinja}}', u'b': u'{{jinja2}}'}),
+ (u'a="café eñyei"',
+ [u'a="café eñyei"'],
+ {u'a': u'café eñyei'}),
+ (u'a=café b=eñyei',
+ [u'a=café', u'b=eñyei'],
+ {u'a': u'café', u'b': u'eñyei'}),
+ )
+
+ def check_split_args(self, args, expected):
+ tools.eq_(split_args(args), expected)
+
+ def test_split_args(self):
+ for datapoint in self.SPLIT_DATA:
+ yield self.check_split_args, datapoint[0], datapoint[1]
+
+ def check_parse_kv(self, args, expected):
+ tools.eq_(parse_kv(args), expected)
+
+ def test_parse_kv(self):
+ for datapoint in self.SPLIT_DATA:
+ try:
+ yield self.check_parse_kv, datapoint[0], datapoint[2]
+ except: pass