diff --git a/test/data/example-space-in-url.arc b/test/data/example-space-in-url.arc new file mode 100644 index 00000000..da248a83 --- /dev/null +++ b/test/data/example-space-in-url.arc @@ -0,0 +1,69 @@ +filedesc://live-web-example.arc.gz 127.0.0.1 20140216050221 text/plain 75 +1 0 LiveWeb Capture +URL IP-address Archive-date Content-type Archive-length + +http://example.com/index.cfm?FuseAction=Email&EmailTitle=Examples From The Live Web&IsPopUp=False 93.184.216.119 20140216050221 text/html 1591 +HTTP/1.1 200 OK +Accept-Ranges: bytes +Cache-Control: max-age=604800 +Content-Type: text/html +Date: Sun, 16 Feb 2014 05:02:20 GMT +Etag: "359670651" +Expires: Sun, 23 Feb 2014 05:02:20 GMT +Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT +Server: ECS (sjc/4FCE) +X-Cache: HIT +x-ec-custom-error: 1 +Content-Length: 1270 + + + + + Example Domain + + + + + + + + +
+

Example Domain

+

This domain is established to be used for illustrative examples in documents. You may use this + domain in examples without prior coordination or asking for permission.

+

More information...

+
+ + + diff --git a/test/test_archiveiterator.py b/test/test_archiveiterator.py index 8cba7600..93be6544 100644 --- a/test/test_archiveiterator.py +++ b/test/test_archiveiterator.py @@ -112,6 +112,10 @@ def test_example_arc_gz(self): expected = ['arc_header', 'response'] assert self._load_archive('example.arc.gz') == expected + def test_example_space_in_url_arc(self): + expected = ['arc_header', 'response'] + assert self._load_archive('example-space-in-url.arc') == expected + def test_example_arc(self): expected = ['arc_header', 'response'] assert self._load_archive('example.arc') == expected diff --git a/warcio/recordloader.py b/warcio/recordloader.py index 2467bde3..148480cd 100644 --- a/warcio/recordloader.py +++ b/warcio/recordloader.py @@ -288,7 +288,7 @@ def parse(self, stream, headerline=None): total_read += len(version) total_read += len(spec) - parts = headerline.split(' ') + parts = headerline.rsplit(' ', len(headernames)-1) if len(parts) != len(headernames): msg = 'Wrong # of headers, expected arc headers {0}, Found {1}'