From e4e3a8eb93721bf12e4a5b2bf45dc3c2a473dc6f Mon Sep 17 00:00:00 2001 From: Lorenz Diener Date: Fri, 25 Nov 2016 18:17:39 +0100 Subject: [PATCH 1/2] Ratelimit code --- mastodon/Mastodon.py | 150 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 122 insertions(+), 28 deletions(-) diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index 7bd6473..8af739d 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -7,6 +7,7 @@ import mimetypes import time import random import string +from datetime import datetime class Mastodon: """ @@ -21,6 +22,7 @@ class Mastodon: """ __DEFAULT_BASE_URL = 'https://mastodon.social' + ### # Registering apps ### @@ -32,6 +34,9 @@ class Mastodon: Specify redirect_uris if you want users to be redirected to a certain page after authenticating. Specify to_file to persist your apps info to a file so you can use them in the constructor. Specify api_base_url if you want to register an app on an instance different from the flagship one. + + Presently, app registration is open by default, but this is not guaranteed to be the case for all + future mastodon instances or even the flagship instance in the future. Returns client_id and client_secret. """ @@ -57,13 +62,22 @@ class Mastodon: ### # Authentication, including constructor ### - def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False): + def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 0.9): """ Creates a new API wrapper instance based on the given client_secret and client_id. If you give a client_id and it is not a file, you must also give a secret. You can also directly specify an access_token, directly or as a file. - + + Mastodon.py can try to respect rate limits in several ways, controlled by ratelimit_method. + "throw" makes functions throw a MastodonRatelimitError when the rate + limit is hit. "wait" mode will, once the limit is hit, wait and retry the request as soon + as the rate limit resets, until it succeeds. "pace" works like throw, but tries to wait in + between calls so that the limit is generally not hit (How hard it tries to not hit the rate + limit can be controlled by ratelimit_pacefactor). The default setting is "wait". Note that + even in "wait" and "pace" mode, requests can still fail due to network or other problems! Also + note that "pace" and "wait" are NOT thread safe. + Specify api_base_url if you wish to talk to an instance other than the flagship one. If a file is given as client_id, read client ID and secret from that file """ @@ -72,6 +86,13 @@ class Mastodon: self.client_secret = client_secret self.access_token = access_token self.debug_requests = debug_requests + self.ratelimit_method = ratelimit_method + + self.ratelimit_limit = 150 + self.ratelimit_reset = time.time() + self.ratelimit_remaining = 150 + self.ratelimit_lastcall = time.time() + self.ratelimit_pacefactor = 0.9 if os.path.isfile(self.client_id): with open(self.client_id, 'r') as secret_file: @@ -79,7 +100,7 @@ class Mastodon: self.client_secret = secret_file.readline().rstrip() else: if self.client_secret == None: - raise ValueError('Specified client id directly, but did not supply secret') + raise MastodonIllegalArgumentError('Specified client id directly, but did not supply secret') if self.access_token != None and os.path.isfile(self.access_token): with open(self.access_token, 'r') as token_file: @@ -87,8 +108,10 @@ class Mastodon: def log_in(self, username, password, scopes = ['read', 'write', 'follow'], to_file = None): """ - Logs in and sets access_token to what was returned. - Can persist access token to file. + Logs in and sets access_token to what was returned. Note that your + username is the e-mail you use to log in into mastodon. + + Can persist access token to file, to be used in the constructor. Will throw an exception if username / password are wrong, scopes are not valid or granted scopes differ from requested. @@ -105,13 +128,13 @@ class Mastodon: response = self.__api_request('POST', '/oauth/token', params) self.access_token = response['access_token'] except: - raise ValueError('Invalid user name, password or scopes.') + raise MastodonIllegalArgumentError('Invalid user name, password or scopes.') requested_scopes = " ".join(sorted(scopes)) received_scopes = " ".join(sorted(response["scope"].split(" "))) if requested_scopes != received_scopes: - raise ValueError('Granted scopes "' + received_scopes + '" differ from requested scopes "' + requested_scopes + '".') + raise MastodonAPIError('Granted scopes "' + received_scopes + '" differ from requested scopes "' + requested_scopes + '".') if to_file != None: with open(to_file, 'w') as token_file: @@ -352,8 +375,8 @@ class Mastodon: the ID that can then be used in status_post() to attach the media to a toot. - Throws a ValueError if the mime type of the passed data or file can - not be determined properly. + Throws a MastodonIllegalArgumentError if the mime type of the + passed data or file can not be determined properly. """ if os.path.isfile(media_file): @@ -361,7 +384,7 @@ class Mastodon: media_file = open(media_file, 'rb') if mime_type == None: - raise ValueError('Could not determine mime type or data passed directly without mime type.') + raise MastodonIllegalArgumentError('Could not determine mime type or data passed directly without mime type.') random_suffix = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10)) file_name = "mastodonpyupload_" + str(time.time()) + "_" + str(random_suffix) + mimetypes.guess_extension(mime_type) @@ -375,11 +398,32 @@ class Mastodon: def __api_request(self, method, endpoint, params = {}, files = {}): """ Internal API request helper. + + TODO FIXME: time.time() does not match server time neccesarily. Using the time from the request + would be correct. + + TODO FIXME: Date parsing can fail. Should probably use a proper "date parsing" module rather than + rely on the server to return the right thing. """ response = None headers = None - + # "pace" mode ratelimiting: Assume constant rate of requests, sleep a little less long than it + # would take to not hit the rate limit at that request rate. + if self.ratelimit_method == "pace": + if self.ratelimit_remaining == 0: + to_next = self.ratelimit_reset - time.time() + if to_next > 0: + time.sleep(to_next) + else: + time_waited = time.time() - self.ratelimit_lastcall + time_wait = float(self.ratelimit_reset - time.time()) / float(self.ratelimit_remaining) + remaining_wait = time_wait - time_waited + + if remaining_wait > 0: + time.sleep(remaining_wait * self.ratelimit_pacefactor) + + # Generate request headers if self.access_token != None: headers = {'Authorization': 'Bearer ' + self.access_token} @@ -389,26 +433,60 @@ class Mastodon: print('Headers: ' + str(headers)) print('Files: ' + str(files)) - if method == 'GET': - response = requests.get(self.api_base_url + endpoint, data = params, headers = headers, files = files) - - if method == 'POST': - response = requests.post(self.api_base_url + endpoint, data = params, headers = headers, files = files) + # Make request + request_complete = False + while not request_complete: + request_complete = True - if method == 'DELETE': - response = requests.delete(self.api_base_url + endpoint, data = params, headers = headers, files = files) + response_object = None + try: + if method == 'GET': + response_object = requests.get(self.api_base_url + endpoint, data = params, headers = headers, files = files) + + if method == 'POST': + response_object = requests.post(self.api_base_url + endpoint, data = params, headers = headers, files = files) + + if method == 'DELETE': + response_object = requests.delete(self.api_base_url + endpoint, data = params, headers = headers, files = files) + except: + raise MastodonNetworkError("Could not complete request.") - if response.status_code == 404: - raise IOError('Endpoint not found.') - - if response.status_code == 500: - raise IOError('General API problem.') - - try: - response = response.json() - except: - raise ValueError("Could not parse response as JSON, respose code was " + str(response.status_code)) + if response_object == None: + raise MastodonIllegalArgumentError("Illegal request.") + + # Handle response + if self.debug_requests == True: + print('Mastodon: Response received with code ' + str(response_object.status_code) + '.') + print('Respose headers: ' + str(response_object.headers)) + print('Response text content: ' + str(response_object.text)) + + if response_object.status_code == 404: + raise MastodonAPIError('Endpoint not found.') + + if response_object.status_code == 500: + raise MastodonAPIError('General API problem.') + + try: + response = response_object.json() + except: + raise MastodonAPIError("Could not parse response as JSON, respose code was " + str(response_object.status_code)) + # Handle rate limiting + self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining']) + self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit']) + self.ratelimit_reset = (datetime.strptime(response_object.headers['X-RateLimit-Reset'], "%Y-%m-%dT%H:%M:%S.%fZ") - datetime(1970, 1, 1)).total_seconds() + self.ratelimit_lastcall = time.time() + + if "error" in response and response["error"] == "Throttled": + if self.ratelimit_method == "throw": + raise MastodonRatelimitError("Hit rate limit.") + + if self.ratelimit_method == "wait" or self.ratelimit_method == "pace": + to_next = self.ratelimit_reset - time.time() + if to_next > 0: + time.sleep(to_next) + request_complete = False + return response def __generate_params(self, params, exclude = []): @@ -430,3 +508,19 @@ class Mastodon: del params[key] return params + +## +# Exceptions +## +class MastodonIllegalArgumentError(ValueError): + pass + +class MastodonNetworkError(IOError): + pass + +class MastodonAPIError(Exception): + pass + +class MastodonRatelimitError(Exception): + pass + From 61775d90831704d012b9f3d6c5453ca738bc0724 Mon Sep 17 00:00:00 2001 From: Lorenz Diener Date: Fri, 25 Nov 2016 23:14:00 +0100 Subject: [PATCH 2/2] Rate limiting now works. --- docs/index.rst | 27 +++++++++++++++ mastodon/Mastodon.py | 82 +++++++++++++++++++++++++++++++------------- 2 files changed, 85 insertions(+), 24 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index e7c9366..02676a4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -39,6 +39,33 @@ as a single python module. By default, it talks to the `Mastodon flagship instance`_, but it can be set to talk to any node running Mastodon. +A note about rate limits +------------------------ +Mastodons API rate limits per IP. Mastodon.py has three modes for dealing +with rate limiting that you can pass to the constructor, "throw", "wait" +and "pace", "wait" being the default. + +In "throw" mode, Mastodon.py makes no attempt to stick to rate limits. When +a request hits the rate limit, it simply throws a MastodonRateLimitError. This is +for applications that need to handle all rate limiting themselves (i.e. interactive apps), +or applications wanting to use Mastodon.py in a multi-threaded context ("wait" and "pace" +modes are not thread safe). + +In "wait" mode, once a request hits the rate limit, Mastodon.py will wait until +the rate limit resets and then try again, until the request succeeds or an error +is encountered. This mode is for applications that would rather just not worry about rate limits +much, don't poll the api all that often, and are okay with a call sometimes just taking +a while. + +In "pace" mode, Mastodon.py will delay each new request after the first one such that, +if requests were to continue at the same rate, only a certain fraction (set in the +constructor as ratelimit_pacefactor) of the rate limit will be used up. The fraction can +be (and by default, is) greater than one. If the rate limit is hit, "pace" behaves like +"wait". This mode is probably the most advanced one and allows you to just poll in +a loop without ever sleeping at all yourself. It is for applications that would rather +just pretend there is no such thing as a rate limit and are fine with sometimes not +being very interactive. + A note about IDs ---------------- Mastodons API uses IDs in several places: User IDs, Toot IDs, ... diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py index bc1c52b..bb16d95 100644 --- a/mastodon/Mastodon.py +++ b/mastodon/Mastodon.py @@ -7,7 +7,10 @@ import mimetypes import time import random import string -from datetime import datetime +import pytz +import datetime +import dateutil +import dateutil.parser class Mastodon: """ @@ -62,12 +65,12 @@ class Mastodon: ### # Authentication, including constructor ### - def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 0.9): + def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 1.1): """ Create a new API wrapper instance based on the given client_secret and client_id. If you give a client_id and it is not a file, you must also give a secret. - You can also directly specify an access_token, directly or as a file. + You can also specify an access_token, directly or as a file (as written by log_in). Mastodon.py can try to respect rate limits in several ways, controlled by ratelimit_method. "throw" makes functions throw a MastodonRatelimitError when the rate @@ -92,7 +95,7 @@ class Mastodon: self.ratelimit_reset = time.time() self.ratelimit_remaining = 150 self.ratelimit_lastcall = time.time() - self.ratelimit_pacefactor = 0.9 + self.ratelimit_pacefactor = ratelimit_pacefactor if os.path.isfile(self.client_id): with open(self.client_id, 'r') as secret_file: @@ -426,15 +429,26 @@ class Mastodon: ### # Internal helpers, dragons probably ### + def __datetime_to_epoch(self, date_time): + """ + Converts a python datetime to unix epoch, accounting for + time zones and such. + + Assumes UTC if timezone is not given. + """ + date_time_utc = None + if date_time.tzinfo == None: + date_time_utc = date_time.replace(tzinfo = pytz.utc) + else: + date_time_utc = date_time.astimezone(pytz.utc) + + epoch_utc = datetime.datetime.utcfromtimestamp(0).replace(tzinfo = pytz.utc) + + return (date_time_utc - epoch_utc).total_seconds() + def __api_request(self, method, endpoint, params = {}, files = {}, do_ratelimiting = True): """ Internal API request helper. - - TODO FIXME: time.time() does not match server time neccesarily. Using the time from the request - would be correct. - - TODO FIXME: Date parsing can fail. Should probably use a proper "date parsing" module rather than - rely on the server to return the right thing. """ response = None headers = None @@ -445,6 +459,8 @@ class Mastodon: if self.ratelimit_remaining == 0: to_next = self.ratelimit_reset - time.time() if to_next > 0: + # As a precaution, never sleep longer than 5 minutes + to_next = min(to_next, 5 * 60) time.sleep(to_next) else: time_waited = time.time() - self.ratelimit_lastcall @@ -452,7 +468,9 @@ class Mastodon: remaining_wait = time_wait - time_waited if remaining_wait > 0: - time.sleep(remaining_wait * self.ratelimit_pacefactor) + to_next = remaining_wait / self.ratelimit_pacefactor + to_next = min(to_next, 5 * 60) + time.sleep(to_next) # Generate request headers if self.access_token != None: @@ -503,21 +521,34 @@ class Mastodon: raise MastodonAPIError("Could not parse response as JSON, respose code was " + str(response_object.status_code)) # Handle rate limiting - if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting: - self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining']) - self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit']) - self.ratelimit_reset = (datetime.strptime(response_object.headers['X-RateLimit-Reset'], "%Y-%m-%dT%H:%M:%S.%fZ") - datetime(1970, 1, 1)).total_seconds() - self.ratelimit_lastcall = time.time() + try: + if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting: + self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining']) + self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit']) - if "error" in response and response["error"] == "Throttled": - if self.ratelimit_method == "throw": - raise MastodonRatelimitError("Hit rate limit.") + ratelimit_reset_datetime = dateutil.parser.parse(response_object.headers['X-RateLimit-Reset']) + self.ratelimit_reset = self.__datetime_to_epoch(ratelimit_reset_datetime) - if self.ratelimit_method == "wait" or self.ratelimit_method == "pace": - to_next = self.ratelimit_reset - time.time() - if to_next > 0: - time.sleep(to_next) - request_complete = False + # Adjust server time to local clock + server_time_datetime = dateutil.parser.parse(response_object.headers['Date']) + server_time = self.__datetime_to_epoch(server_time_datetime) + server_time_diff = time.time() - server_time + self.ratelimit_reset += server_time_diff + self.ratelimit_lastcall = time.time() + + if "error" in response and response["error"] == "Throttled": + if self.ratelimit_method == "throw": + raise MastodonRatelimitError("Hit rate limit.") + + if self.ratelimit_method == "wait" or self.ratelimit_method == "pace": + to_next = self.ratelimit_reset - time.time() + if to_next > 0: + # As a precaution, never sleep longer than 5 minutes + to_next = min(to_next, 5 * 60) + time.sleep(to_next) + request_complete = False + except: + raise MastodonRatelimitError("Rate limit time calculations failed.") return response @@ -547,6 +578,9 @@ class Mastodon: class MastodonIllegalArgumentError(ValueError): pass +class MastodonFileNotFoundError(IOError): + pass + class MastodonNetworkError(IOError): pass