From ad96297a0608bfd5196598cc3bbcce1f4aa03bc9 Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 15 Apr 2019 03:23:04 +0200 Subject: [PATCH 1/5] more robust handling of pagination Link headers during a cursory investigation for #163 I found that the code handling Link headers would not handle non-numeric post IDs like pleroma's flakeIDs correctly IDs starting with a number would be truncated to the first non-digit, and IDs not starting with a number would throw. Thankfully, all flakeIDs generated so far start with 9. Maybe 8 for the earliest ones, I'm not sure. Either way, so far it would only have misbehaved when using the pagination functions or accessing the _pagination_prev and _pagination_next attributes directly --- mastodon/Mastodon.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index fc585ba..0550d85 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -2272,13 +2272,17 @@ class Mastodon: if url['rel'] == 'next': # Be paranoid and extract max_id specifically next_url = url['url'] - matchgroups = re.search(r"max_id=([0-9]*)", next_url) + matchgroups = re.search(r"max_id=([^&]+)", next_url) if matchgroups: next_params = copy.deepcopy(params) next_params['_pagination_method'] = method next_params['_pagination_endpoint'] = endpoint - next_params['max_id'] = int(matchgroups.group(1)) + max_id = matchgroups.group(1) + if max_id.is_digit(): + next_params['max_id'] = int(max_id) + else: + next_params['max_id'] = max_id if "since_id" in next_params: del next_params['since_id'] response[-1]._pagination_next = next_params @@ -2286,13 +2290,17 @@ class Mastodon: if url['rel'] == 'prev': # Be paranoid and extract since_id specifically prev_url = url['url'] - matchgroups = re.search(r"since_id=([0-9]*)", prev_url) + matchgroups = re.search(r"since_id=([^&]+)", prev_url) if matchgroups: prev_params = copy.deepcopy(params) prev_params['_pagination_method'] = method prev_params['_pagination_endpoint'] = endpoint - prev_params['since_id'] = int(matchgroups.group(1)) + since_id = matchgroups.group(1) + if since_id.is_digit(): + prev_params['since_id'] = int(since_id) + else: + prev_params['since_id'] = since_id if "max_id" in prev_params: del prev_params['max_id'] response[0]._pagination_prev = prev_params From 62a47f4e92bd58639beeb682cb7cc53ebba8b803 Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 15 Apr 2019 14:19:45 +0200 Subject: [PATCH 2/5] oops. str.is_digit() -> str.isdigit() --- mastodon/Mastodon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index 0550d85..123d1a6 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -2279,7 +2279,7 @@ class Mastodon: next_params['_pagination_method'] = method next_params['_pagination_endpoint'] = endpoint max_id = matchgroups.group(1) - if max_id.is_digit(): + if max_id.isdigit(): next_params['max_id'] = int(max_id) else: next_params['max_id'] = max_id @@ -2297,7 +2297,7 @@ class Mastodon: prev_params['_pagination_method'] = method prev_params['_pagination_endpoint'] = endpoint since_id = matchgroups.group(1) - if since_id.is_digit(): + if since_id.isdigit(): prev_params['since_id'] = int(since_id) else: prev_params['since_id'] = since_id From eca31ea732d1c2c8f0491a138b2940e828a45973 Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 15 Apr 2019 14:26:43 +0200 Subject: [PATCH 3/5] improve link header parameter extraction regex --- mastodon/Mastodon.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index 123d1a6..3f594cf 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -2272,7 +2272,7 @@ class Mastodon: if url['rel'] == 'next': # Be paranoid and extract max_id specifically next_url = url['url'] - matchgroups = re.search(r"max_id=([^&]+)", next_url) + matchgroups = re.search(r"[?&]max_id=([^&]+)", next_url) if matchgroups: next_params = copy.deepcopy(params) @@ -2290,7 +2290,7 @@ class Mastodon: if url['rel'] == 'prev': # Be paranoid and extract since_id specifically prev_url = url['url'] - matchgroups = re.search(r"since_id=([^&]+)", prev_url) + matchgroups = re.search(r"[?&]since_id=([^&]+)", prev_url) if matchgroups: prev_params = copy.deepcopy(params) From a815c10f9bb390aadc88fe0f35ee141ba96cfb45 Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 15 Apr 2019 15:48:45 +0200 Subject: [PATCH 4/5] add test for link headers. this adds requests-mock as a dependency --- Pipfile | 1 + Pipfile.lock | 8 ++++++++ tests/test_pagination.py | 20 ++++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/Pipfile b/Pipfile index 365589a..d0d3ce5 100644 --- a/Pipfile +++ b/Pipfile @@ -13,3 +13,4 @@ pytest-cov = "*" vcrpy = "*" pytest-vcr = "<1" pytest-mock = "*" +requests-mock = "*" diff --git a/Pipfile.lock b/Pipfile.lock index d1939fc..db1193b 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -313,6 +313,14 @@ ], "version": "==2.20.1" }, + "requests-mock": { + "hashes": [ + "sha256:7a5fa99db5e3a2a961b6f20ed40ee6baeff73503cf0a553cc4d679409e6170fb", + "sha256:8ca0628dc66d3f212878932fd741b02aa197ad53fd2228164800a169a4a826af" + ], + "index": "pypi", + "version": "==1.5.2" + }, "six": { "hashes": [ "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", diff --git a/tests/test_pagination.py b/tests/test_pagination.py index 599b2f4..d2c0bd5 100644 --- a/tests/test_pagination.py +++ b/tests/test_pagination.py @@ -1,5 +1,10 @@ import pytest from contextlib import contextmanager +try: + from mock import MagicMock +except ImportError: + from unittest.mock import MagicMock +import requests_mock UNLIKELY_HASHTAG = "fgiztsshwiaqqiztpmmjbtvmescsculuvmgjgopwoeidbcrixp" @@ -44,3 +49,18 @@ def test_fetch_remaining(api): hashtag_remaining = api.fetch_remaining(hashtag) assert hashtag_remaining assert len(hashtag_remaining) >= 30 + +def test_link_headers(api): + rmock = requests_mock.Adapter() + api.session.mount(api.api_base_url, rmock) + + _id='abc1234' + + rmock.register_uri('GET', requests_mock.ANY, json=[{"foo": "bar"}], headers={"link":""" + <{base}/api/v1/timelines/tag/{tag}?max_id={_id}>; rel="next", <{base}/api/v1/timelines/tag/{tag}?since_id={_id}>; rel="prev" + """.format(base=api.api_base_url, tag=UNLIKELY_HASHTAG, _id=_id).strip() + }) + + resp = api.timeline_hashtag(UNLIKELY_HASHTAG) + assert resp[0]._pagination_next['max_id'] == _id + assert resp[0]._pagination_prev['since_id'] == _id From de329e8cf6549da18e94dd7bde753da025f04167 Mon Sep 17 00:00:00 2001 From: codl Date: Mon, 15 Apr 2019 15:59:39 +0200 Subject: [PATCH 5/5] oh. forgot to update setup.py as well --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 009063d..72e3c8b 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ from setuptools import setup -test_deps = ['pytest', 'pytest-runner', 'pytest-cov', 'vcrpy', 'pytest-vcr', 'pytest-mock'] +test_deps = ['pytest', 'pytest-runner', 'pytest-cov', 'vcrpy', 'pytest-vcr', 'pytest-mock', 'requests-mock'] extras = { "test": test_deps }