Skip to content

Commit

Permalink
Handle deprecation of naive datetime functions like utcnow() (#185)
Browse files Browse the repository at this point in the history
Following the deprecation of utcnow and utcfromtimestamp in
Python 3.12, we add a `tzaware=False` argument to datetime utils,
which defaults to generating naive datetimes but will generate a
timezone-aware datetime with timezone set to UTC if set to True.

All instances of deprecated datetime methods have also been replaced.

---------

Co-authored-by: Ilya Kreymer <[email protected]>
  • Loading branch information
tw4l and ikreymer authored Nov 12, 2024
1 parent d9d2497 commit 5cf0d40
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 23 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ name: CI

on:
push:
branches:
- master
pull_request:

jobs:
Expand Down
4 changes: 2 additions & 2 deletions warcio/recordbuilder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import datetime
import six
import tempfile

from datetime import datetime, timezone
from io import BytesIO

from warcio.recordloader import ArcWarcRecord, ArcWarcRecordLoader
Expand Down Expand Up @@ -153,7 +153,7 @@ def _make_warc_id(cls):

@classmethod
def _make_warc_date(cls, use_micros=False):
return datetime_to_iso_date(datetime.datetime.utcnow(), use_micros=use_micros)
return datetime_to_iso_date(datetime.now(timezone.utc).replace(tzinfo=None), use_micros=use_micros)

def ensure_digest(self, record, block=True, payload=True):
if block:
Expand Down
73 changes: 54 additions & 19 deletions warcio/timeutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@

import re
import time
import datetime
import calendar

from datetime import datetime, timezone
from email.utils import parsedate, formatdate

#=================================================================
Expand All @@ -25,7 +25,7 @@
PAD_MICRO = '000000'


def iso_date_to_datetime(string):
def iso_date_to_datetime(string, tz_aware=False):
"""
>>> iso_date_to_datetime('2013-12-26T10:11:12Z')
datetime.datetime(2013, 12, 26, 10, 11, 12)
Expand All @@ -47,6 +47,12 @@ def iso_date_to_datetime(string):
>>> iso_date_to_datetime('2013-12-26T10:11:12.000000Z')
datetime.datetime(2013, 12, 26, 10, 11, 12)
>>> iso_date_to_datetime('2013-12-26T10:11:12Z', tz_aware=True)
datetime.datetime(2013, 12, 26, 10, 11, 12, tzinfo=datetime.timezone.utc)
>>> iso_date_to_datetime('2013-12-26T10:11:12.000000Z', tz_aware=True)
datetime.datetime(2013, 12, 26, 10, 11, 12, tzinfo=datetime.timezone.utc)
"""

nums = DATE_TIMESPLIT.split(string)
Expand All @@ -57,21 +63,32 @@ def iso_date_to_datetime(string):
nums[6] = nums[6][:6]
nums[6] += PAD_MICRO[len(nums[6]):]

the_datetime = datetime.datetime(*(int(num) for num in nums))
tzinfo = None
if tz_aware:
tzinfo = timezone.utc

the_datetime = datetime(*(int(num) for num in nums), tzinfo=tzinfo)
return the_datetime


def http_date_to_datetime(string):
def http_date_to_datetime(string, tz_aware=False):
"""
>>> http_date_to_datetime('Thu, 26 Dec 2013 09:50:10 GMT')
datetime.datetime(2013, 12, 26, 9, 50, 10)
>>> http_date_to_datetime('Thu, 26 Dec 2013 09:50:10 GMT', tz_aware=True)
datetime.datetime(2013, 12, 26, 9, 50, 10, tzinfo=datetime.timezone.utc)
"""
return datetime.datetime(*parsedate(string)[:6])
tzinfo = None
if tz_aware:
tzinfo = timezone.utc

return datetime(*parsedate(string)[:6], tzinfo=tzinfo)


def datetime_to_http_date(the_datetime):
"""
>>> datetime_to_http_date(datetime.datetime(2013, 12, 26, 9, 50, 10))
>>> datetime_to_http_date(datetime(2013, 12, 26, 9, 50, 10))
'Thu, 26 Dec 2013 09:50:10 GMT'
# Verify inverses
Expand All @@ -87,19 +104,19 @@ def datetime_to_http_date(the_datetime):

def datetime_to_iso_date(the_datetime, use_micros=False):
"""
>>> datetime_to_iso_date(datetime.datetime(2013, 12, 26, 10, 11, 12))
>>> datetime_to_iso_date(datetime(2013, 12, 26, 10, 11, 12))
'2013-12-26T10:11:12Z'
>>> datetime_to_iso_date(datetime.datetime(2013, 12, 26, 10, 11, 12, 456789))
>>> datetime_to_iso_date(datetime(2013, 12, 26, 10, 11, 12, 456789))
'2013-12-26T10:11:12Z'
>>> datetime_to_iso_date(datetime.datetime(2013, 12, 26, 10, 11, 12), use_micros=True)
>>> datetime_to_iso_date(datetime(2013, 12, 26, 10, 11, 12), use_micros=True)
'2013-12-26T10:11:12Z'
>>> datetime_to_iso_date(datetime.datetime(2013, 12, 26, 10, 11, 12, 456789), use_micros=True)
>>> datetime_to_iso_date(datetime(2013, 12, 26, 10, 11, 12, 456789), use_micros=True)
'2013-12-26T10:11:12.456789Z'
>>> datetime_to_iso_date(datetime.datetime(2013, 12, 26, 10, 11, 12, 1), use_micros=True)
>>> datetime_to_iso_date(datetime(2013, 12, 26, 10, 11, 12, 1), use_micros=True)
'2013-12-26T10:11:12.000001Z'
"""
Expand All @@ -112,7 +129,7 @@ def datetime_to_iso_date(the_datetime, use_micros=False):

def datetime_to_timestamp(the_datetime):
"""
>>> datetime_to_timestamp(datetime.datetime(2013, 12, 26, 10, 11, 12))
>>> datetime_to_timestamp(datetime(2013, 12, 26, 10, 11, 12))
'20131226101112'
"""

Expand All @@ -124,7 +141,7 @@ def timestamp_now():
>>> len(timestamp_now())
14
"""
return datetime_to_timestamp(datetime.datetime.utcnow())
return datetime_to_timestamp(datetime.now(timezone.utc))


def timestamp20_now():
Expand All @@ -139,7 +156,7 @@ def timestamp20_now():
20
"""
now = datetime.datetime.utcnow()
now = datetime.now(timezone.utc)
return now.strftime('%Y%m%d%H%M%S%f')


Expand Down Expand Up @@ -203,7 +220,7 @@ def pad_timestamp(string, pad_str=PAD_6_UP):
return string


def timestamp_to_datetime(string):
def timestamp_to_datetime(string, tz_aware=False):
"""
# >14-digit -- rest ignored
>>> timestamp_to_datetime('2014122609501011')
Expand Down Expand Up @@ -285,6 +302,18 @@ def timestamp_to_datetime(string):
>>> timestamp_to_datetime('2010abc')
datetime.datetime(2010, 12, 31, 23, 59, 59)
# 14-digit with tzinfo
>>> timestamp_to_datetime('20141226095010', tz_aware=True)
datetime.datetime(2014, 12, 26, 9, 50, 10, tzinfo=datetime.timezone.utc)
# 6-digit padding with tzinfo
>>> timestamp_to_datetime('201410', tz_aware=True)
datetime.datetime(2014, 10, 31, 23, 59, 59, tzinfo=datetime.timezone.utc)
# not a number! with tzinfo
>>> timestamp_to_datetime('2010abc', tz_aware=True)
datetime.datetime(2010, 12, 31, 23, 59, 59, tzinfo=datetime.timezone.utc)
"""

# pad to 6 digits
Expand Down Expand Up @@ -312,12 +341,17 @@ def extract(string, start, end, min_, max_):
minute = extract(string, 10, 12, 0, 59)
second = extract(string, 12, 14, 0, 59)

return datetime.datetime(year=year,
tzinfo = None
if tz_aware:
tzinfo = timezone.utc

return datetime(year=year,
month=month,
day=day,
hour=hour,
minute=minute,
second=second)
second=second,
tzinfo=tzinfo)

#return time.strptime(pad_timestamp(string), TIMESTAMP_14)

Expand All @@ -332,7 +366,8 @@ def timestamp_to_sec(string):
1420070399
"""

return calendar.timegm(timestamp_to_datetime(string).utctimetuple())
dt = timestamp_to_datetime(string, tz_aware=True)
return calendar.timegm(dt.utctimetuple())


def sec_to_timestamp(secs):
Expand All @@ -344,7 +379,7 @@ def sec_to_timestamp(secs):
'20141231235959'
"""

return datetime_to_timestamp(datetime.datetime.utcfromtimestamp(secs))
return datetime_to_timestamp(datetime.fromtimestamp(secs, timezone.utc))


def timestamp_to_http_date(string):
Expand Down

0 comments on commit 5cf0d40

Please sign in to comment.