Fix to bytes surrogate and nonencodable chars (#21180)

* Add a surrogate_then_replace error strategy to keep to_bytes from tracebacking by default
* Port all code that explicitly used surrogate_or_replace to surrogate_then_replace
This commit is contained in:
Toshio Kuratomi 2017-02-09 17:13:40 -08:00 committed by GitHub
parent 149dd9ca86
commit 98541b7c8b
5 changed files with 94 additions and 39 deletions

View file

@ -403,9 +403,9 @@ def remove_values(value, no_log_strings):
native_str_value = native_str_value.replace(omit_me, '*' * 8)
if value_is_text and isinstance(native_str_value, binary_type):
value = to_text(native_str_value, encoding='utf-8', errors='surrogate_or_replace')
value = to_text(native_str_value, encoding='utf-8', errors='surrogate_then_replace')
elif not value_is_text and isinstance(native_str_value, text_type):
value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_or_replace')
value = to_bytes(native_str_value, encoding='utf-8', errors='surrogate_then_replace')
else:
value = native_str_value
elif isinstance(value, SEQUENCETYPE):