Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update time coding tests to assert exact equality #9961

Merged
merged 2 commits into from
Jan 22, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ Documentation

Internal Changes
~~~~~~~~~~~~~~~~
- Updated time coding tests to assert exact equality rather than equality with
a tolerance, since xarray's minimum supported version of cftime is greater
than 1.2.1 (:pull:`9961`). By `Spencer Clark <https://github.com/spencerkclark>`_.

.. _whats-new.2025.01.1:

Expand Down
14 changes: 4 additions & 10 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -619,16 +619,13 @@
dtype = actual.t.dtype
expected_decoded_t = expected_decoded_t.astype(dtype)
expected_decoded_t0 = expected_decoded_t0.astype(dtype)
abs_diff = abs(actual.t.values - expected_decoded_t)
assert (abs_diff <= np.timedelta64(1, "s")).all()
assert_array_equal(actual.t.values, expected_decoded_t)
assert (
actual.t.encoding["units"]
== "days since 0001-01-01 00:00:00.000000"
)
assert actual.t.encoding["calendar"] == expected_calendar

abs_diff = abs(actual.t0.values - expected_decoded_t0)
assert (abs_diff <= np.timedelta64(1, "s")).all()
assert_array_equal(actual.t0.values, expected_decoded_t0)
assert actual.t0.encoding["units"] == "days since 0001-01-01"
assert actual.t.encoding["calendar"] == expected_calendar

Expand Down Expand Up @@ -4709,11 +4706,8 @@
expected_decoded_t0 = np.array([date_type(1, 1, 1)])

with self.roundtrip(expected) as actual:
abs_diff = abs(actual.t.values - expected_decoded_t)
assert (abs_diff <= np.timedelta64(1, "s")).all()

abs_diff = abs(actual.t0.values - expected_decoded_t0)
assert (abs_diff <= np.timedelta64(1, "s")).all()
assert_array_equal(actual.t.values, expected_decoded_t)
assert_array_equal(actual.t0.values, expected_decoded_t0)

def test_write_store(self) -> None:
# Override method in DatasetIOBase - not applicable to dask
Expand Down Expand Up @@ -5215,7 +5209,7 @@
yield actual, expected

def test_cmp_local_file(self) -> None:
with self.create_datasets() as (actual, expected):

Check failure on line 5212 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12 flaky

TestPydapOnline.test_cmp_local_file webob.exc.HTTPError: 503 Service Unavailable <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> <html><head> <title>503 Service Unavailable</title> </head><body> <h1>Service Unavailable</h1> <p>The server is temporarily unable to service your request due to maintenance downtime or capacity problems. Please try again later.</p> </body></html>
assert_equal(actual, expected)

# global attributes should be global attributes on the dataset
Expand Down Expand Up @@ -5249,7 +5243,7 @@

def test_compatible_to_netcdf(self) -> None:
# make sure it can be saved as a netcdf
with self.create_datasets() as (actual, expected):

Check failure on line 5246 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12 flaky

TestPydapOnline.test_compatible_to_netcdf webob.exc.HTTPError: 503 Service Unavailable <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> <html><head> <title>503 Service Unavailable</title> </head><body> <h1>Service Unavailable</h1> <p>The server is temporarily unable to service your request due to maintenance downtime or capacity problems. Please try again later.</p> </body></html>
with create_tmp_file() as tmp_file:
actual.to_netcdf(tmp_file)
with open_dataset(tmp_file) as actual2:
Expand All @@ -5258,7 +5252,7 @@

@requires_dask
def test_dask(self) -> None:
with self.create_datasets(chunks={"j": 2}) as (actual, expected):

Check failure on line 5255 in xarray/tests/test_backends.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12 flaky

TestPydapOnline.test_dask webob.exc.HTTPError: 503 Service Unavailable <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> <html><head> <title>503 Service Unavailable</title> </head><body> <h1>Service Unavailable</h1> <p>The server is temporarily unable to service your request due to maintenance downtime or capacity problems. Please try again later.</p> </body></html>
assert_equal(actual, expected)


Expand Down
139 changes: 55 additions & 84 deletions xarray/tests/test_coding_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,36 +65,36 @@
_ALL_CALENDARS = sorted(_NON_STANDARD_CALENDARS_SET.union(_STANDARD_CALENDARS))
_NON_STANDARD_CALENDARS = sorted(_NON_STANDARD_CALENDARS_SET)
_CF_DATETIME_NUM_DATES_UNITS = [
(np.arange(10), "days since 2000-01-01"),
(np.arange(10).astype("float64"), "days since 2000-01-01"),
(np.arange(10).astype("float32"), "days since 2000-01-01"),
(np.arange(10).reshape(2, 5), "days since 2000-01-01"),
(12300 + np.arange(5), "hours since 1680-01-01 00:00:00"),
(np.arange(10), "days since 2000-01-01", "s"),
(np.arange(10).astype("float64"), "days since 2000-01-01", "s"),
(np.arange(10).astype("float32"), "days since 2000-01-01", "s"),
(np.arange(10).reshape(2, 5), "days since 2000-01-01", "s"),
(12300 + np.arange(5), "hours since 1680-01-01 00:00:00", "s"),
# here we add a couple minor formatting errors to test
# the robustness of the parsing algorithm.
(12300 + np.arange(5), "hour since 1680-01-01 00:00:00"),
(12300 + np.arange(5), "Hour since 1680-01-01 00:00:00"),
(12300 + np.arange(5), " Hour since 1680-01-01 00:00:00 "),
(10, "days since 2000-01-01"),
([10], "daYs since 2000-01-01"),
([[10]], "days since 2000-01-01"),
([10, 10], "days since 2000-01-01"),
(np.array(10), "days since 2000-01-01"),
(0, "days since 1000-01-01"),
([0], "days since 1000-01-01"),
([[0]], "days since 1000-01-01"),
(np.arange(2), "days since 1000-01-01"),
(np.arange(0, 100000, 20000), "days since 1900-01-01"),
(np.arange(0, 100000, 20000), "days since 1-01-01"),
(17093352.0, "hours since 1-1-1 00:00:0.0"),
([0.5, 1.5], "hours since 1900-01-01T00:00:00"),
(0, "milliseconds since 2000-01-01T00:00:00"),
(0, "microseconds since 2000-01-01T00:00:00"),
(np.int32(788961600), "seconds since 1981-01-01"), # GH2002
(12300 + np.arange(5), "hour since 1680-01-01 00:00:00.500000"),
(164375, "days since 1850-01-01 00:00:00"),
(164374.5, "days since 1850-01-01 00:00:00"),
([164374.5, 168360.5], "days since 1850-01-01 00:00:00"),
(12300 + np.arange(5), "hour since 1680-01-01 00:00:00", "s"),
(12300 + np.arange(5), "Hour since 1680-01-01 00:00:00", "s"),
(12300 + np.arange(5), " Hour since 1680-01-01 00:00:00 ", "s"),
(10, "days since 2000-01-01", "s"),
([10], "daYs since 2000-01-01", "s"),
([[10]], "days since 2000-01-01", "s"),
([10, 10], "days since 2000-01-01", "s"),
(np.array(10), "days since 2000-01-01", "s"),
(0, "days since 1000-01-01", "s"),
([0], "days since 1000-01-01", "s"),
([[0]], "days since 1000-01-01", "s"),
(np.arange(2), "days since 1000-01-01", "s"),
(np.arange(0, 100000, 20000), "days since 1900-01-01", "s"),
(np.arange(0, 100000, 20000), "days since 1-01-01", "s"),
(17093352.0, "hours since 1-1-1 00:00:0.0", "s"),
([0.5, 1.5], "hours since 1900-01-01T00:00:00", "s"),
(0, "milliseconds since 2000-01-01T00:00:00", "s"),
(0, "microseconds since 2000-01-01T00:00:00", "s"),
(np.int32(788961600), "seconds since 1981-01-01", "s"), # GH2002
(12300 + np.arange(5), "hour since 1680-01-01 00:00:00.500000", "us"),
(164375, "days since 1850-01-01 00:00:00", "s"),
(164374.5, "days since 1850-01-01 00:00:00", "s"),
([164374.5, 168360.5], "days since 1850-01-01 00:00:00", "s"),
]
_CF_DATETIME_TESTS = [
num_dates_units + (calendar,)
Expand Down Expand Up @@ -122,9 +122,15 @@ def _all_cftime_date_types():
@requires_cftime
@pytest.mark.filterwarnings("ignore:Ambiguous reference date string")
@pytest.mark.filterwarnings("ignore:Times can't be serialized faithfully")
@pytest.mark.parametrize(["num_dates", "units", "calendar"], _CF_DATETIME_TESTS)
@pytest.mark.parametrize(
["num_dates", "units", "minimum_resolution", "calendar"], _CF_DATETIME_TESTS
)
def test_cf_datetime(
num_dates, units, calendar, time_unit: PDDatetimeUnitOptions
num_dates,
units: str,
minimum_resolution: PDDatetimeUnitOptions,
calendar: str,
time_unit: PDDatetimeUnitOptions,
) -> None:
import cftime

Expand All @@ -137,25 +143,23 @@ def test_cf_datetime(
actual = decode_cf_datetime(num_dates, units, calendar, time_unit=time_unit)

if actual.dtype.kind != "O":
expected = cftime_to_nptime(expected, time_unit=time_unit)

abs_diff = np.asarray(abs(actual - expected)).ravel()
abs_diff = pd.to_timedelta(abs_diff.tolist()).to_numpy()
if np.timedelta64(1, time_unit) > np.timedelta64(1, minimum_resolution):
expected_unit = minimum_resolution
else:
expected_unit = time_unit
expected = cftime_to_nptime(expected, time_unit=expected_unit)
Comment on lines +146 to +150
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I needed to add this logic specifically for this test:

   (12300 + np.arange(5), "hour since 1680-01-01 00:00:00.500000", "us"),

This is because the reference date required the dates be decoded to microsecond resolution (really only millisecond resolution would be required, but we follow pandas's lead here). Otherwise for time_unit="s" we would truncate precision when converting the cftime objects to np.datetime64 values, which prevented asserting exact equality.


# once we no longer support versions of netCDF4 older than 1.1.5,
# we could do this check with near microsecond accuracy:
# https://github.com/Unidata/netcdf4-python/issues/355
assert (abs_diff <= np.timedelta64(1, "s")).all()
assert_array_equal(actual, expected)
encoded1, _, _ = encode_cf_datetime(actual, units, calendar)

assert_duckarray_allclose(num_dates, encoded1)
assert_array_equal(num_dates, encoded1)

if hasattr(num_dates, "ndim") and num_dates.ndim == 1 and "1000" not in units:
# verify that wrapping with a pandas.Index works
# note that it *does not* currently work to put
# non-datetime64 compatible dates into a pandas.Index
encoded2, _, _ = encode_cf_datetime(pd.Index(actual), units, calendar)
assert_duckarray_allclose(num_dates, encoded2)
assert_array_equal(num_dates, encoded2)


@requires_cftime
Expand Down Expand Up @@ -206,11 +210,7 @@ def test_decode_cf_datetime_non_iso_strings() -> None:
]
for num_dates, units in cases:
actual = decode_cf_datetime(num_dates, units)
abs_diff = abs(actual - expected.values)
# once we no longer support versions of netCDF4 older than 1.1.5,
# we could do this check with near microsecond accuracy:
# https://github.com/Unidata/netcdf4-python/issues/355
assert (abs_diff <= np.timedelta64(1, "s")).all()
assert_array_equal(actual, expected)


@requires_cftime
Expand All @@ -220,7 +220,7 @@ def test_decode_standard_calendar_inside_timestamp_range(
) -> None:
import cftime

units = "days since 0001-01-01"
units = "hours since 0001-01-01"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using encoding units of "days" unnecessarily led the times to be encoded with floats, which prevented asserting exact equality in this test. Testing floating point time decoding was not the point of this test, so I changed to encoding the times with units of "hours" instead.

times = pd.date_range(
"2001-04-01-00", end="2001-04-30-23", unit=time_unit, freq="h"
)
Expand All @@ -233,11 +233,7 @@ def test_decode_standard_calendar_inside_timestamp_range(
# representable with nanosecond resolution.
actual = decode_cf_datetime(time, units, calendar=calendar, time_unit=time_unit)
assert actual.dtype == np.dtype(f"=M8[{time_unit}]")
abs_diff = abs(actual - expected)
# once we no longer support versions of netCDF4 older than 1.1.5,
# we could do this check with near microsecond accuracy:
# https://github.com/Unidata/netcdf4-python/issues/355
assert (abs_diff <= np.timedelta64(1, "s")).all()
assert_array_equal(actual, expected)


@requires_cftime
Expand All @@ -256,11 +252,7 @@ def test_decode_non_standard_calendar_inside_timestamp_range(calendar) -> None:

actual = decode_cf_datetime(non_standard_time, units, calendar=calendar)
assert actual.dtype == expected_dtype
abs_diff = abs(actual - expected)
# once we no longer support versions of netCDF4 older than 1.1.5,
# we could do this check with near microsecond accuracy:
# https://github.com/Unidata/netcdf4-python/issues/355
assert (abs_diff <= np.timedelta64(1, "s")).all()
assert_array_equal(actual, expected)


@requires_cftime
Expand All @@ -287,11 +279,7 @@ def test_decode_dates_outside_timestamp_range(
warnings.filterwarnings("ignore", "Unable to decode time axis")
actual = decode_cf_datetime(time, units, calendar=calendar, time_unit=time_unit)
assert all(isinstance(value, expected_date_type) for value in actual)
abs_diff = abs(actual - expected)
# once we no longer support versions of netCDF4 older than 1.1.5,
# we could do this check with near microsecond accuracy:
# https://github.com/Unidata/netcdf4-python/issues/355
assert (abs_diff <= np.timedelta64(1, "us")).all()
assert_array_equal(actual, expected)


@requires_cftime
Expand Down Expand Up @@ -367,14 +355,8 @@ def test_decode_standard_calendar_multidim_time_inside_timestamp_range(
mdim_time, units, calendar=calendar, time_unit=time_unit
)
assert actual.dtype == np.dtype(f"=M8[{time_unit}]")

abs_diff1 = abs(actual[:, 0] - expected1)
abs_diff2 = abs(actual[:, 1] - expected2)
# once we no longer support versions of netCDF4 older than 1.1.5,
# we could do this check with near microsecond accuracy:
# https://github.com/Unidata/netcdf4-python/issues/355
assert (abs_diff1 <= np.timedelta64(1, "s")).all()
assert (abs_diff2 <= np.timedelta64(1, "s")).all()
assert_array_equal(actual[:, 0], expected1)
assert_array_equal(actual[:, 1], expected2)


@requires_cftime
Expand Down Expand Up @@ -409,13 +391,8 @@ def test_decode_nonstandard_calendar_multidim_time_inside_timestamp_range(
actual = decode_cf_datetime(mdim_time, units, calendar=calendar)

assert actual.dtype == expected_dtype
abs_diff1 = abs(actual[:, 0] - expected1)
abs_diff2 = abs(actual[:, 1] - expected2)
# once we no longer support versions of netCDF4 older than 1.1.5,
# we could do this check with near microsecond accuracy:
# https://github.com/Unidata/netcdf4-python/issues/355
assert (abs_diff1 <= np.timedelta64(1, "s")).all()
assert (abs_diff2 <= np.timedelta64(1, "s")).all()
assert_array_equal(actual[:, 0], expected1)
assert_array_equal(actual[:, 1], expected2)


@requires_cftime
Expand Down Expand Up @@ -455,14 +432,8 @@ def test_decode_multidim_time_outside_timestamp_range(
dtype = np.dtype(f"=M8[{time_unit}]")

assert actual.dtype == dtype

abs_diff1 = abs(actual[:, 0] - expected1)
abs_diff2 = abs(actual[:, 1] - expected2)
# once we no longer support versions of netCDF4 older than 1.1.5,
# we could do this check with near microsecond accuracy:
# https://github.com/Unidata/netcdf4-python/issues/355
assert (abs_diff1 <= np.timedelta64(1, "s")).all()
assert (abs_diff2 <= np.timedelta64(1, "s")).all()
assert_array_equal(actual[:, 0], expected1)
assert_array_equal(actual[:, 1], expected2)


@requires_cftime
Expand Down
Loading