Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove six dependency #188

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ Background

This library provides a fast, standalone way to read and write `WARC
Format <https://en.wikipedia.org/wiki/Web_ARChive>`__ commonly used in
web archives. Python 3.7+ (minimally only needing
`six <https://pythonhosted.org/six/>`__ as an external dependency)
web archives. Python 3.7+.

warcio supports reading and writing of WARC files compliant with both the `WARC 1.0 <http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf>`__
and `WARC 1.1 <http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1-1_latestdraft.pdf>`__ ISO standards.
Expand Down Expand Up @@ -59,7 +58,7 @@ the format (ARC or WARC), record type, the record headers, http headers

.. code:: python

class ArcWarcRecord(object):
class ArcWarcRecord:
def __init__(self, *args):
(self.format, self.rec_type, self.rec_headers, self.raw_stream,
self.http_headers, self.content_type, self.length) = args
Expand Down
4 changes: 1 addition & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,7 @@
provides=[
'warcio',
],
install_requires=[
'six',
],
install_requires=[],
zip_safe=True,
entry_points="""
[console_scripts]
Expand Down
2 changes: 1 addition & 1 deletion test/test_archiveiterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


#==============================================================================
class TestArchiveIterator(object):
class TestArchiveIterator:
def _load_archive(self, filename, offset=0, cls=ArchiveIterator,
errs_expected=0, **kwargs):

Expand Down
4 changes: 1 addition & 3 deletions test/test_bufferedreaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,6 @@

from contextlib import closing

import six

import zlib
import pytest

Expand Down Expand Up @@ -177,7 +175,7 @@ def test_err_chunk_cut_off():


def print_str(string):
return string.decode('utf-8') if six.PY3 else string
return string.decode('utf-8')



Expand Down
2 changes: 1 addition & 1 deletion test/test_capture_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@


# ==================================================================
class TestCaptureHttpBin(object):
class TestCaptureHttpBin:
@classmethod
def setup_class(cls):
from httpbin import app as httpbin_app
Expand Down
2 changes: 1 addition & 1 deletion test/test_check_digest_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def pytest_generate_tests(metafunc):
metafunc.parametrize('test_filename', files)


class TestExamplesDigest(object):
class TestExamplesDigest:
def check_helper(self, args, expected_exit_value, capsys):
exit_value = None
try:
Expand Down
2 changes: 1 addition & 1 deletion test/test_limitreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from io import BytesIO

class TestLimitReader(object):
class TestLimitReader:
def test_limit_reader_1(self):
assert b'abcdefghji' == LimitReader(BytesIO(b'abcdefghjiklmnopqrstuvwxyz'), 10).read(26)

Expand Down
2 changes: 1 addition & 1 deletion test/test_statusandheaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@


from warcio.statusandheaders import StatusAndHeadersParser, StatusAndHeaders
from six import StringIO
from io import StringIO
import pytest


Expand Down
8 changes: 4 additions & 4 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
pass


class TestUtils(object):
class TestUtils:
def test_headers_to_str_headers(self):
result = [('foo', 'bar'), ('baz', 'barf')]

Expand Down Expand Up @@ -77,14 +77,14 @@ def test_to_native_str(self):
def test_open_exclusive(self):
temp_dir = tempfile.mkdtemp('warctest')
full_name = os.path.join(temp_dir, 'foo.txt')
with utils.open(full_name, 'xb') as fh:
with open(full_name, 'xb') as fh:
fh.write(b'test\r\nfoo')

with pytest.raises(OSError):
with utils.open(full_name, 'xb') as fh:
with open(full_name, 'xb') as fh:
fh.write(b'test\r\nfoo')

with utils.open(full_name, 'rb') as fh:
with open(full_name, 'rb') as fh:
assert fh.read() == b'test\r\nfoo'

os.remove(full_name)
Expand Down
2 changes: 1 addition & 1 deletion test/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def record_sampler(request):


# ============================================================================
class TestWarcWriter(object):
class TestWarcWriter:
@classmethod
def _validate_record_content_len(cls, stream):
for record in ArchiveIterator(stream, no_record_parse=True):
Expand Down
5 changes: 2 additions & 3 deletions warcio/archiveiterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from warcio.utils import BUFF_SIZE

import sys
import six

# ============================================================================
class UnseekableYetTellable:
Expand All @@ -23,7 +22,7 @@ def read(self, size=-1):
return result

# ============================================================================
class ArchiveIterator(six.Iterator):
class ArchiveIterator:
""" Iterate over records in WARC and ARC files, both gzip chunk
compressed and uncompressed

Expand Down Expand Up @@ -91,7 +90,7 @@ def __iter__(self):
return self.the_iter

def __next__(self):
return six.next(self.the_iter)
return next(self.the_iter)

def close(self):
self.record = None
Expand Down
2 changes: 1 addition & 1 deletion warcio/bufferedreaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def brotli_decompressor():


#=================================================================
class BufferedReader(object):
class BufferedReader:
"""
A wrapping line reader which wraps an existing reader.
Read operations operate on underlying buffer, which is filled to
Expand Down
8 changes: 4 additions & 4 deletions warcio/capture_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

from io import BytesIO

from six.moves import http_client as httplib
import http.client as httplib

from contextlib import contextmanager

from array import array

from warcio.utils import to_native_str, BUFF_SIZE, open
from warcio.utils import to_native_str, BUFF_SIZE
from warcio.warcwriter import WARCWriter, BufferWARCWriter

from tempfile import SpooledTemporaryFile
Expand All @@ -19,7 +19,7 @@


# ============================================================================
class RecordingStream(object):
class RecordingStream:
def __init__(self, fp, recorder):
self.fp = fp
self.recorder = recorder
Expand Down Expand Up @@ -130,7 +130,7 @@ def putrequest(self, *args, **kwargs):


# ============================================================================
class RequestRecorder(object):
class RequestRecorder:
def __init__(self, writer, filter_func=None, record_ip=True):
self.writer = writer
self.filter_func = filter_func
Expand Down
2 changes: 1 addition & 1 deletion warcio/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def _read_entire_stream(stream):
break


class Checker(object):
class Checker:
def __init__(self, cmd):
self.inputs = cmd.inputs
self.verbose = cmd.verbose
Expand Down
2 changes: 1 addition & 1 deletion warcio/digestverifyingreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@


# ============================================================================
class DigestChecker(object):
class DigestChecker:
def __init__(self, kind=None):
self._problem = []
self._passed = None
Expand Down
2 changes: 1 addition & 1 deletion warcio/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


# ============================================================================
class Extractor(object):
class Extractor:
READ_SIZE = BUFF_SIZE * 4

def __init__(self, filename, offset):
Expand Down
2 changes: 1 addition & 1 deletion warcio/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


# ============================================================================
class Indexer(object):
class Indexer:
field_names = {}

def __init__(self, fields, inputs, output, verify_http=False):
Expand Down
2 changes: 1 addition & 1 deletion warcio/limitreader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ============================================================================
class LimitReader(object):
class LimitReader:
"""
A reader which will not read more than specified limit
"""
Expand Down
2 changes: 1 addition & 1 deletion warcio/recompressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


# ============================================================================
class Recompressor(object):
class Recompressor:
def __init__(self, filename, output, verbose=False):
self.filename = filename
self.output = output
Expand Down
5 changes: 2 additions & 3 deletions warcio/recordbuilder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import six
import tempfile

from datetime import datetime, timezone
Expand All @@ -10,7 +9,7 @@
from warcio.utils import to_native_str, BUFF_SIZE, Digester

#=================================================================
class RecordBuilder(object):
class RecordBuilder:
REVISIT_PROFILE = 'http://netpreserve.org/warc/1.0/revisit/identical-payload-digest'
REVISIT_PROFILE_1_1 = 'http://netpreserve.org/warc/1.1/revisit/identical-payload-digest'

Expand Down Expand Up @@ -44,7 +43,7 @@ def create_warcinfo_record(self, filename, info):
warc_headers.add_header('WARC-Date', self.curr_warc_date())

warcinfo = BytesIO()
for name, value in six.iteritems(info):
for name, value in info.items():
if not value:
continue

Expand Down
8 changes: 3 additions & 5 deletions warcio/recordloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@

from warcio.timeutils import timestamp_to_iso_date

from six.moves import zip

import logging
logger = logging.getLogger(__name__)


#=================================================================
class ArcWarcRecord(object):
class ArcWarcRecord:
def __init__(self, *args, **kwargs):
(self.format, self.rec_type, self.rec_headers, self.raw_stream,
self.http_headers, self.content_type, self.length) = args
Expand Down Expand Up @@ -45,7 +43,7 @@ def content_stream(self):


#=================================================================
class ArcWarcRecordLoader(object):
class ArcWarcRecordLoader:
WARC_TYPES = ['WARC/1.1', 'WARC/1.0', 'WARC/0.17', 'WARC/0.18']

HTTP_TYPES = ['HTTP/1.0', 'HTTP/1.1']
Expand Down Expand Up @@ -269,7 +267,7 @@ def _ensure_target_uri_format(self, rec_headers):


#=================================================================
class ARCHeadersParser(object):
class ARCHeadersParser:
# ARC 1.0 headers
ARC_HEADERS = ["uri", "ip-address", "archive-date",
"content-type", "length"]
Expand Down
8 changes: 3 additions & 5 deletions warcio/statusandheaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,15 @@
Representation and parsing of HTTP-style status + headers
"""

from six.moves import range
from six import iteritems
from warcio.utils import to_native_str, headers_to_str_headers
import uuid

from six.moves.urllib.parse import quote
from urllib.parse import quote
import re


#=================================================================
class StatusAndHeaders(object):
class StatusAndHeaders:
ENCODE_HEADER_RX = re.compile(r'[=]["\']?([^;"]+)["\']?(?=[;]?)')
"""
Representation of parsed http-style status line and headers
Expand Down Expand Up @@ -224,7 +222,7 @@ def _strip_count(string, total_read):


#=================================================================
class StatusAndHeadersParser(object):
class StatusAndHeadersParser:
"""
Parser which consumes a stream support readline() to read
status and headers and return a StatusAndHeaders object
Expand Down
Loading