Make sure to encode to utf-8, not the default encoding

This allows text fields to take unicode strings under python 2.
This commit is contained in:
Mike Lundy 2016-03-03 14:29:32 -08:00
parent c625981eb6
commit 8c3c57aa32
6 changed files with 24 additions and 7 deletions

View file

@ -679,7 +679,7 @@ cdef _setBytes(_DynamicSetterClasses thisptr, field, value):
thisptr.set(field, temp) thisptr.set(field, temp)
cdef _setBaseString(_DynamicSetterClasses thisptr, field, value): cdef _setBaseString(_DynamicSetterClasses thisptr, field, value):
encoded_value = value.encode() encoded_value = value.encode('utf-8')
cdef capnp.StringPtr temp_string = capnp.StringPtr(<char*>encoded_value, len(encoded_value)) cdef capnp.StringPtr temp_string = capnp.StringPtr(<char*>encoded_value, len(encoded_value))
cdef C_DynamicValue.Reader temp = C_DynamicValue.Reader(temp_string) cdef C_DynamicValue.Reader temp = C_DynamicValue.Reader(temp_string)
thisptr.set(field, temp) thisptr.set(field, temp)
@ -690,7 +690,7 @@ cdef _setBytesField(DynamicStruct_Builder thisptr, _StructSchemaField field, val
thisptr.setByField(field.thisptr, temp) thisptr.setByField(field.thisptr, temp)
cdef _setBaseStringField(DynamicStruct_Builder thisptr, _StructSchemaField field, value): cdef _setBaseStringField(DynamicStruct_Builder thisptr, _StructSchemaField field, value):
encoded_value = value.encode() encoded_value = value.encode('utf-8')
cdef capnp.StringPtr temp_string = capnp.StringPtr(<char*>encoded_value, len(encoded_value)) cdef capnp.StringPtr temp_string = capnp.StringPtr(<char*>encoded_value, len(encoded_value))
cdef C_DynamicValue.Reader temp = C_DynamicValue.Reader(temp_string) cdef C_DynamicValue.Reader temp = C_DynamicValue.Reader(temp_string)
thisptr.setByField(field.thisptr, temp) thisptr.setByField(field.thisptr, temp)

View file

@ -90,7 +90,7 @@ cpdef _set_{{field.name}}(self, value):
if type(value) is bytes: if type(value) is bytes:
temp_string = StringPtr(<char*>value, len(value)) temp_string = StringPtr(<char*>value, len(value))
else: else:
encoded_value = value.encode() encoded_value = value.encode('utf-8')
temp_string = StringPtr(<char*>encoded_value, len(encoded_value)) temp_string = StringPtr(<char*>encoded_value, len(encoded_value))
self.thisptr_child.set{{field.c_name}}(temp_string) self.thisptr_child.set{{field.c_name}}(temp_string)
{% elif 'data' == field['type'] -%} {% elif 'data' == field['type'] -%}
@ -99,7 +99,7 @@ cpdef _set_{{field.name}}(self, value):
if type(value) is bytes: if type(value) is bytes:
temp_string = StringPtr(<char*>value, len(value)) temp_string = StringPtr(<char*>value, len(value))
else: else:
encoded_value = value.encode() encoded_value = value.encode('utf-8')
temp_string = StringPtr(<char*>encoded_value, len(encoded_value)) temp_string = StringPtr(<char*>encoded_value, len(encoded_value))
self.thisptr_child.set{{field.c_name}}(ArrayPtr[byte](<byte *>temp_string.begin(), temp_string.size())) self.thisptr_child.set{{field.c_name}}(ArrayPtr[byte](<byte *>temp_string.begin(), temp_string.size()))
{% else -%} {% else -%}

Binary file not shown.

Binary file not shown.

View file

@ -25,7 +25,7 @@
uInt64Field = 345678901234567890, uInt64Field = 345678901234567890,
float32Field = -1.25e-10, float32Field = -1.25e-10,
float64Field = 345, float64Field = 345,
textField = "baz", textField = "\xe2\x98\x83",
dataField = "qux", dataField = "qux",
structField = ( structField = (
voidField = void, voidField = void,

View file

@ -1,10 +1,19 @@
# -*- coding: utf-8 -*-
import pytest import pytest
import capnp import capnp
import os import os
import math import math
import sys
this_dir = os.path.dirname(__file__) this_dir = os.path.dirname(__file__)
if sys.version_info[0] < 3:
EXPECT_BYTES = True
else:
EXPECT_BYTES = False
@pytest.fixture @pytest.fixture
def addressbook(): def addressbook():
return capnp.load(os.path.join(this_dir, 'addressbook.capnp')) return capnp.load(os.path.join(this_dir, 'addressbook.capnp'))
@ -300,7 +309,7 @@ def init_all_types(builder):
subBuilder.uInt64Field = 345678901234567890 subBuilder.uInt64Field = 345678901234567890
subBuilder.float32Field = -1.25e-10 subBuilder.float32Field = -1.25e-10
subBuilder.float64Field = 345 subBuilder.float64Field = 345
subBuilder.textField = "baz" subBuilder.textField = b"\xe2\x98\x83".decode('utf-8') # This is u"☃", but py3.2 doesn't support u
subBuilder.dataField = b"qux" subBuilder.dataField = b"qux"
subSubBuilder = subBuilder.structField subSubBuilder = subBuilder.structField
subSubBuilder.textField = "nested" subSubBuilder.textField = "nested"
@ -390,7 +399,15 @@ def check_all_types(reader):
assert subReader.uInt64Field == 345678901234567890 assert subReader.uInt64Field == 345678901234567890
assert_almost(subReader.float32Field, -1.25e-10) assert_almost(subReader.float32Field, -1.25e-10)
assert subReader.float64Field == 345 assert subReader.float64Field == 345
assert subReader.textField == "baz"
assert subReader.textField == ""
# This assertion highlights the encoding we expect to see here, since
# otherwise this appears a bit magical...
if EXPECT_BYTES:
assert len(subReader.textField) == 3
else:
assert len(subReader.textField) == 1
assert subReader.dataField == b"qux" assert subReader.dataField == b"qux"
subSubReader = subReader.structField subSubReader = subReader.structField