From e65bce67ffdb5da836b722b2f57acc7ec15abe00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Wed, 7 Apr 2021 12:48:51 +0200 Subject: [PATCH 1/2] Fix string conversion for h5py >= 3 --- fact/VERSION | 2 +- fact/io.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fact/VERSION b/fact/VERSION index 30f6cf8..894542a 100644 --- a/fact/VERSION +++ b/fact/VERSION @@ -1 +1 @@ -0.26.1 +0.26.2 diff --git a/fact/io.py b/fact/io.py index fadd2e5..e5a35a9 100644 --- a/fact/io.py +++ b/fact/io.py @@ -177,9 +177,9 @@ def read_h5py( dataset = group[col] array = to_native_byteorder(dataset[first:last]) - # pandas cannot handle bytes, convert to str - if array.dtype.kind == 'S': - array = array.astype(str) + # decode unicode strings to str + if array.dtype.kind in {'S', 'O'}: + array = array.astype('U') if parse_dates and dataset.attrs.get('timeformat') is not None: array = pd.to_datetime(array, infer_datetime_format=True) From 4f650cdf8ebac8eef62d86340cb2d48032883b28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maximilian=20N=C3=B6the?= Date: Wed, 7 Apr 2021 13:01:57 +0200 Subject: [PATCH 2/2] Add test for string writing --- tests/test_io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_io.py b/tests/test_io.py index 11e0fdc..650d123 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -295,7 +295,8 @@ def test_read_data_h5py(): df = pd.DataFrame({ 'x': np.random.normal(size=50).astype('float32'), - 'N': np.random.randint(0, 10, dtype='uint8', size=50) + 'N': np.random.randint(0, 10, dtype='uint8', size=50), + 'name': [f"s{i}" for i in range(50)], }).sort_index(1) with tempfile.NamedTemporaryFile(suffix='.hdf5') as f: