Various python3 updates for module_utils: (#17345)

* Port set_*_if_different functions to python3
* Add surrogate_or_strict and surrogate_or_replace error handlers for
  to_text, to_bytes, to_native
* Set default error handler to surrogate_or_replace
* Make use of the new error handlers in the already ported code
* Move the unittests for module_utils._text as they aren't in basic.py
* Cleanup around SEQUENCETYPE.  On python2.6+ SEQUENCETYPE includes
  strings so make sure code omits those explicitly if necessary
* Allow arg_spec aliases to be other sequence types
This commit is contained in:
Toshio Kuratomi 2016-09-01 04:19:03 -07:00 committed by GitHub
commit 28227546fa
5 changed files with 101 additions and 62 deletions

View file

@ -35,6 +35,13 @@
from ansible.module_utils.six import PY3, text_type, binary_type
import codecs
try:
codecs.lookup_error('surrogateescape')
HAS_SURROGATEESCAPE = True
except LookupError:
HAS_SURROGATEESCAPE = False
def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
"""Make sure that a string is a byte string
@ -47,8 +54,22 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
:kwarg errors: The error handler to use if the text string is not
encodable using the specified encoding. Any valid `codecs error
handler <https://docs.python.org/2/library/codecs.html#codec-base-classes>`_
may be specified. On Python3 this defaults to 'surrogateescape'. On
Python2, this defaults to 'replace'.
may be specified. There are two additional error strategies
specifically aimed at helping people to port code:
:surrogate_or_strict: Will use surrogateescape if it is a valid
handler, otherwise it will use strict
:surrogate_or_replace: Will use surrogateescape if it is a valid
handler, otherwise it will use replace.
Because surrogateescape was added in Python3 this usually means that
Python3 will use surrogateescape and Python2 will use the fallback
error handler. Note that the code checks for surrogateescape when the
module is imported. If you have a backport of surrogateescape for
python2, be sure to register the error handler prior to importing this
module.
The default is `surrogate_or_replace`
:kwarg nonstring: The strategy to use if a nonstring is specified in
``obj``. Default is 'simplerepr'. Valid values are:
@ -71,11 +92,16 @@ def to_bytes(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
if isinstance(obj, binary_type):
return obj
if errors is None:
if PY3:
if errors in (None, 'surrogate_or_replace'):
if HAS_SURROGATEESCAPE:
errors = 'surrogateescape'
else:
errors = 'replace'
elif errors == 'surrogate_or_strict':
if HAS_SURROGATEESCAPE:
errors = 'surrogateescape'
else:
errors = 'strict'
if isinstance(obj, text_type):
return obj.encode(encoding, errors)
@ -126,6 +152,17 @@ def to_text(obj, encoding='utf-8', errors=None, nonstring='simplerepr'):
if isinstance(obj, text_type):
return obj
if errors in (None, 'surrogate_or_replace'):
if HAS_SURROGATEESCAPE:
errors = 'surrogateescape'
else:
errors = 'replace'
elif errors == 'surrogate_or_strict':
if HAS_SURROGATEESCAPE:
errors = 'surrogateescape'
else:
errors = 'strict'
if errors is None:
if PY3:
errors = 'surrogateescape'