From ac26cf3506593ef3f779c17c8beb5f5d26895ddf Mon Sep 17 00:00:00 2001 From: Michael Smith Date: Thu, 27 Jun 2024 17:05:01 -0700 Subject: [PATCH] Support wildcard http_cookie_names (#509) Adds wildcard ('*') support to the `http_cookie_names` connect property to preserve all cookies returned by the server. Preserves prior behavior for any other value of `http_cookie_names`. --- impala/_thrift_api.py | 29 ++++++++++----- impala/dbapi.py | 12 +++---- impala/tests/test_util.py | 75 ++++++++++++++++++++++++++++++++++++++- impala/util.py | 21 ++++++++++- 4 files changed, 121 insertions(+), 16 deletions(-) diff --git a/impala/_thrift_api.py b/impala/_thrift_api.py index 6b97bb8c1..6f580e243 100644 --- a/impala/_thrift_api.py +++ b/impala/_thrift_api.py @@ -38,7 +38,7 @@ from impala.error import HttpError from impala.util import get_logger_and_init_null -from impala.util import get_all_matching_cookies, get_cookie_expiry +from impala.util import get_all_matching_cookies, get_all_cookies, get_cookie_expiry # Declare namedtuple for Cookie with named fields - cookie and expiry_time Cookie = namedtuple('Cookie', ['cookie', 'expiry_time']) @@ -83,7 +83,8 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non cookie-based authentication or session management. If there's only one name in the cookie name list, a str value can be specified instead of the list. If a cookie with one of these names is returned in an http response by the server or an intermediate - proxy then it will be included in each subsequent request for the same connection. + proxy then it will be included in each subsequent request for the same connection. If + it is set as wildcards, all cookies in an http response will be preserved. """ if port is not None: warnings.warn( @@ -129,8 +130,13 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non self.realhost = self.realport = self.proxy_auth = None if not http_cookie_names: # 'http_cookie_names' was explicitly set as an empty value ([], or '') in connect(). + self.__preserve_all_cookies = False self.__http_cookie_dict = None self.__auth_cookie_names = None + elif str(http_cookie_names).strip() == "*": + self.__preserve_all_cookies = True + self.__http_cookie_dict = dict() + self.__auth_cookie_names = set() else: if isinstance(http_cookie_names, six.string_types): http_cookie_names = [http_cookie_names] @@ -140,7 +146,7 @@ def __init__(self, uri_or_host, port=None, path=None, cafile=None, cert_file=Non # Store the auth cookie names in __auth_cookie_names. # Assume auth cookie names end with ".auth". self.__auth_cookie_names = \ - [ cn for cn in http_cookie_names if cn.endswith(".auth") ] + { cn for cn in http_cookie_names if cn.endswith(".auth") } # Set __are_matching_cookies_found as True if matching cookies are found in response. self.__are_matching_cookies_found = False self.__wbuf = BytesIO() @@ -248,13 +254,20 @@ def getHttpCookieHeaderForRequest(self): # Extract cookies from response and save those cookies for which the cookie names # are in the cookie name list specified in the connect() API. def extractHttpCookiesFromResponse(self): - if self.__http_cookie_dict is not None: + if self.__preserve_all_cookies: + matching_cookies = get_all_cookies(self.path, self.headers) + elif self.__http_cookie_dict is not None: matching_cookies = get_all_matching_cookies( self.__http_cookie_dict.keys(), self.path, self.headers) - if matching_cookies: - self.__are_matching_cookies_found = True - for c in matching_cookies: - self.__http_cookie_dict[c.key] = Cookie(c, get_cookie_expiry(c)) + else: + matching_cookies = None + + if matching_cookies: + self.__are_matching_cookies_found = True + for c in matching_cookies: + self.__http_cookie_dict[c.key] = Cookie(c, get_cookie_expiry(c)) + if c.key.endswith(".auth"): + self.__auth_cookie_names.add(c.key) # Return True if there are any saved cookies which are sent in previous request. def areHttpCookiesSaved(self): diff --git a/impala/dbapi.py b/impala/dbapi.py index 3b9d5da86..8bfa596b9 100644 --- a/impala/dbapi.py +++ b/impala/dbapi.py @@ -91,11 +91,11 @@ def connect(host='localhost', port=21050, database=None, timeout=None, name only, a str value can be specified instead of a list. If a cookie with one of these names is returned in an http response by the server or an intermediate proxy then it will be included in each subsequent request for - the same connection. + the same connection. If set to wildcard ('*'), all cookies in an http response + will be preserved. By default 'http_cookie_names' is set to '*'. Used only when `use_http_transport` is True. - By default 'http_cookie_names' is set to the list of HTTP cookie names used by - Impala and Hive. The names of authentication cookies are expected to end with - ".auth" string, for example, "impala.auth" for Impala authentication cookies. + The names of authentication cookies are expected to end with ".auth" string, for + example, "impala.auth" for Impala authentication cookies. If 'http_cookie_names' is explicitly set to a not None empty value ([], or ''), Impyla won't attempt to do cookie based authentication or session management. Currently cookie retention is supported for GSSAPI/LDAP/SASL/NOSASL/JWT over http. @@ -191,8 +191,8 @@ def connect(host='localhost', port=21050, database=None, timeout=None, warn_deprecate('auth_cookie_names', 'http_cookie_names') http_cookie_names = auth_cookie_names elif http_cookie_names is None: - # Set default value as the list of HTTP cookie names used by Impala, Hive and Knox. - http_cookie_names = ['impala.auth', 'impala.session.id', 'hive.server2.auth', 'KNOXSESSIONID', 'KNOX_BACKEND-HIVE', 'JSESSIONID'] + # Preserve all cookies. + http_cookie_names = '*' service = hs2.connect(host=host, port=port, timeout=timeout, use_ssl=use_ssl, diff --git a/impala/tests/test_util.py b/impala/tests/test_util.py index d13f9140d..665cafe08 100644 --- a/impala/tests/test_util.py +++ b/impala/tests/test_util.py @@ -24,7 +24,8 @@ import unittest import pytest -from impala.util import cookie_matches_path, get_cookie_expiry, get_all_matching_cookies +from impala.util import (cookie_matches_path, get_cookie_expiry, get_all_cookies, + get_all_matching_cookies) class ImpalaUtilTests(unittest.TestCase): @@ -78,6 +79,73 @@ def test_get_cookie_expiry(self): now = datetime.now() assert now + days2k <= get_cookie_expiry({'max-age': '172800000'}) <= now + days2k + sec + def test_get_matching_cookies(self): + cookies = get_all_cookies('/path', {}) + assert not cookies + + headers = make_cookie_headers([ + ('c_cookie', 'c_value'), + ('b_cookie', 'b_value'), + ('a_cookie', 'a_value') + ]) + cookies = csort(get_all_cookies('/path', headers)) + assert len(cookies) == 3 + assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value' + assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value' + assert cookies[2].key == 'c_cookie' and cookies[2].value == 'c_value' + + headers = make_cookie_headers([ + ('c_cookie', 'c_value;Path=/'), + ('b_cookie', 'b_value;Path=/path'), + ('a_cookie', 'a_value;Path=/') + ]) + cookies = csort(get_all_cookies('/path', headers)) + assert len(cookies) == 3 + assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value' + assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value' + assert cookies[2].key == 'c_cookie' and cookies[2].value == 'c_value' + + headers = make_cookie_headers([ + ('c_cookie', 'c_value;Path=/'), + ('b_cookie', 'b_value;Path=/path'), + ('a_cookie', 'a_value;Path=/path/path2') + ]) + cookies = csort(get_all_cookies('/path', headers)) + assert len(cookies) == 2 + assert cookies[0].key == 'b_cookie' and cookies[0].value == 'b_value' + assert cookies[1].key == 'c_cookie' and cookies[1].value == 'c_value' + cookies = csort(get_all_cookies('/path', headers)) + assert len(cookies) == 2 + assert cookies[0].key == 'b_cookie' and cookies[0].value == 'b_value' + assert cookies[1].key == 'c_cookie' and cookies[1].value == 'c_value' + + headers = make_cookie_headers([ + ('c_cookie', 'c_value;Path=/'), + ('b_cookie', 'b_value;Path=/path'), + ('a_cookie', 'a_value;Path=/path') + ]) + cookies = csort(get_all_cookies('/path/path1', headers)) + assert len(cookies) == 3 + assert cookies[0].key == 'a_cookie' and cookies[0].value == 'a_value' + assert cookies[1].key == 'b_cookie' and cookies[1].value == 'b_value' + assert cookies[2].key == 'c_cookie' and cookies[2].value == 'c_value' + + headers = make_cookie_headers([ + ('b_cookie', 'b_value;Path=/path1'), + ('a_cookie', 'a_value;Path=/path2') + ]) + cookies = get_all_cookies('/path', headers) + assert not cookies + + headers = make_cookie_headers([ + ('c_cookie', 'c_value;Path=/'), + ('b_cookie', 'b_value;Path=/path1'), + ('a_cookie', 'a_value;Path=/path2') + ]) + cookies = get_all_cookies('/path', headers) + assert len(cookies) == 1 + assert cookies[0].key == 'c_cookie' and cookies[0].value == 'c_value' + def test_get_all_matching_cookies(self): cookies = get_all_matching_cookies(['a', 'b'], '/path', {}) assert not cookies @@ -173,3 +241,8 @@ def make_cookie_headers(cookie_vals): value = pair[1] headers.add_header('Set-Cookie', name + "=" + value) return headers + +def csort(cookies): + """Sort list of SimpleCookie as header order is not guaranteed.""" + cookies.sort(key=lambda c: c.key) + return cookies diff --git a/impala/util.py b/impala/util.py index af0fb4330..cfd13f7f4 100644 --- a/impala/util.py +++ b/impala/util.py @@ -202,7 +202,7 @@ def get_cookie_expiry(c): return None -def get_all_matching_cookies(cookie_names, path, resp_headers): +def get_cookies(resp_headers): if 'Set-Cookie' not in resp_headers: return None @@ -214,9 +214,28 @@ def get_all_matching_cookies(cookie_names, path, resp_headers): cookie_headers = resp_headers.get_all('Set-Cookie') for header in cookie_headers: cookies.load(header) + return cookies except Exception: return None + +def get_all_cookies(path, resp_headers): + cookies = get_cookies(resp_headers) + if not cookies: + return None + + matching_cookies = [] + for c in cookies.values(): + if c and cookie_matches_path(c, path): + matching_cookies.append(c) + return matching_cookies + + +def get_all_matching_cookies(cookie_names, path, resp_headers): + cookies = get_cookies(resp_headers) + if not cookies: + return None + matching_cookies = [] for cn in cookie_names: if cn in cookies: