From e4e3a8eb93721bf12e4a5b2bf45dc3c2a473dc6f Mon Sep 17 00:00:00 2001
From: Lorenz Diener <lorenz.diener@uni-bremen.de>
Date: Fri, 25 Nov 2016 18:17:39 +0100
Subject: [PATCH 1/2] Ratelimit code

---
 mastodon/Mastodon.py | 150 +++++++++++++++++++++++++++++++++++--------
 1 file changed, 122 insertions(+), 28 deletions(-)

diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py
index 7bd6473..8af739d 100644
--- a/mastodon/Mastodon.py
+++ b/mastodon/Mastodon.py
@@ -7,6 +7,7 @@ import mimetypes
 import time
 import random
 import string
+from datetime import datetime
 
 class Mastodon:
     """ 
@@ -21,6 +22,7 @@ class Mastodon:
     """
     __DEFAULT_BASE_URL = 'https://mastodon.social'
     
+    
     ###
     # Registering apps
     ###
@@ -32,6 +34,9 @@ class Mastodon:
         Specify redirect_uris if you want users to be redirected to a certain page after authenticating.
         Specify to_file to persist your apps info to a file so you can use them in the constructor.
         Specify api_base_url if you want to register an app on an instance different from the flagship one.
+        
+        Presently, app registration is open by default, but this is not guaranteed to be the case for all
+        future mastodon instances or even the flagship instance in the future.
            
         Returns client_id and client_secret.
         """
@@ -57,13 +62,22 @@ class Mastodon:
     ###
     # Authentication, including constructor
     ###
-    def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False):
+    def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 0.9):
         """
         Creates a new API wrapper instance based on the given client_secret and client_id. If you
         give a client_id and it is not a file, you must also give a secret.
            
         You can also directly specify an access_token, directly or as a file.
-            
+        
+        Mastodon.py can try to respect rate limits in several ways, controlled by ratelimit_method.
+        "throw" makes functions throw a MastodonRatelimitError when the rate
+        limit is hit. "wait" mode will, once the limit is hit, wait and retry the request as soon
+        as the rate limit resets, until it succeeds. "pace" works like throw, but tries to wait in
+        between calls so that the limit is generally not hit (How hard it tries to not hit the rate 
+        limit can be controlled by ratelimit_pacefactor). The default setting is "wait". Note that
+        even in "wait" and "pace" mode, requests can still fail due to network or other problems! Also
+        note that "pace" and "wait" are NOT thread safe.
+        
         Specify api_base_url if you wish to talk to an instance other than the flagship one.
         If a file is given as client_id, read client ID and secret from that file
         """
@@ -72,6 +86,13 @@ class Mastodon:
         self.client_secret = client_secret
         self.access_token = access_token
         self.debug_requests = debug_requests
+        self.ratelimit_method = ratelimit_method
+        
+        self.ratelimit_limit = 150
+        self.ratelimit_reset = time.time()
+        self.ratelimit_remaining = 150
+        self.ratelimit_lastcall = time.time()
+        self.ratelimit_pacefactor = 0.9
         
         if os.path.isfile(self.client_id):
             with open(self.client_id, 'r') as secret_file:
@@ -79,7 +100,7 @@ class Mastodon:
                 self.client_secret = secret_file.readline().rstrip()
         else:
             if self.client_secret == None:
-                raise ValueError('Specified client id directly, but did not supply secret')
+                raise MastodonIllegalArgumentError('Specified client id directly, but did not supply secret')
                 
         if self.access_token != None and os.path.isfile(self.access_token):
             with open(self.access_token, 'r') as token_file:
@@ -87,8 +108,10 @@ class Mastodon:
                 
     def log_in(self, username, password, scopes = ['read', 'write', 'follow'], to_file = None):
         """
-        Logs in and sets access_token to what was returned.
-        Can persist access token to file.
+        Logs in and sets access_token to what was returned. Note that your
+        username is the e-mail you use to log in into mastodon.
+        
+        Can persist access token to file, to be used in the constructor.
         
         Will throw an exception if username / password are wrong, scopes are not
         valid or granted scopes differ from requested.
@@ -105,13 +128,13 @@ class Mastodon:
             response = self.__api_request('POST', '/oauth/token', params)      
             self.access_token = response['access_token']
         except:
-            raise ValueError('Invalid user name, password or scopes.')
+            raise MastodonIllegalArgumentError('Invalid user name, password or scopes.')
         
         requested_scopes = " ".join(sorted(scopes))
         received_scopes = " ".join(sorted(response["scope"].split(" ")))
         
         if requested_scopes != received_scopes:
-            raise ValueError('Granted scopes "' + received_scopes + '" differ from requested scopes "' + requested_scopes + '".')
+            raise MastodonAPIError('Granted scopes "' + received_scopes + '" differ from requested scopes "' + requested_scopes + '".')
         
         if to_file != None:
             with open(to_file, 'w') as token_file:
@@ -352,8 +375,8 @@ class Mastodon:
         the ID that can then be used in status_post() to attach the media to
         a toot.
         
-        Throws a ValueError if the mime type of the passed data or file can
-        not be determined properly.
+        Throws a MastodonIllegalArgumentError if the mime type of the 
+        passed data or file can not be determined properly.
         """
         
         if os.path.isfile(media_file):
@@ -361,7 +384,7 @@ class Mastodon:
             media_file = open(media_file, 'rb')
             
         if mime_type == None:
-            raise ValueError('Could not determine mime type or data passed directly without mime type.')
+            raise MastodonIllegalArgumentError('Could not determine mime type or data passed directly without mime type.')
         
         random_suffix = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(10))
         file_name = "mastodonpyupload_" + str(time.time()) + "_" + str(random_suffix) + mimetypes.guess_extension(mime_type)
@@ -375,11 +398,32 @@ class Mastodon:
     def __api_request(self, method, endpoint, params = {}, files = {}):
         """
         Internal API request helper.
+        
+        TODO FIXME: time.time() does not match server time neccesarily. Using the time from the request
+        would be correct.
+        
+        TODO FIXME: Date parsing can fail. Should probably use a proper "date parsing" module rather than
+        rely on the server to return the right thing.
         """
         response = None
         headers = None
         
-
+        # "pace" mode ratelimiting: Assume constant rate of requests, sleep a little less long than it
+        # would take to not hit the rate limit at that request rate.
+        if self.ratelimit_method == "pace":
+            if self.ratelimit_remaining == 0:
+                to_next = self.ratelimit_reset - time.time()
+                if to_next > 0:
+                    time.sleep(to_next)
+            else:
+                time_waited = time.time() - self.ratelimit_lastcall
+                time_wait = float(self.ratelimit_reset - time.time()) / float(self.ratelimit_remaining)
+                remaining_wait = time_wait - time_waited
+            
+            if remaining_wait > 0:
+                time.sleep(remaining_wait * self.ratelimit_pacefactor)
+                
+        # Generate request headers
         if self.access_token != None:
             headers = {'Authorization': 'Bearer ' + self.access_token}
         
@@ -389,26 +433,60 @@ class Mastodon:
             print('Headers: ' + str(headers))
             print('Files: ' + str(files))
 
-        if method == 'GET':
-            response = requests.get(self.api_base_url + endpoint, data = params, headers = headers, files = files)
-        
-        if method == 'POST':
-            response = requests.post(self.api_base_url + endpoint, data = params, headers = headers, files = files)
+        # Make request
+        request_complete = False
+        while not request_complete:
+            request_complete = True
             
-        if method == 'DELETE':
-            response = requests.delete(self.api_base_url + endpoint, data = params, headers = headers, files = files)
+            response_object = None
+            try:
+                if method == 'GET':
+                    response_object = requests.get(self.api_base_url + endpoint, data = params, headers = headers, files = files)
+                
+                if method == 'POST':
+                    response_object = requests.post(self.api_base_url + endpoint, data = params, headers = headers, files = files)
+                    
+                if method == 'DELETE':
+                    response_object = requests.delete(self.api_base_url + endpoint, data = params, headers = headers, files = files)
+            except:
+                raise MastodonNetworkError("Could not complete request.")
         
-        if response.status_code == 404:
-            raise IOError('Endpoint not found.')
-        
-        if response.status_code == 500:
-            raise IOError('General API problem.')
-        
-        try:
-            response = response.json()
-        except:
-            raise ValueError("Could not parse response as JSON, respose code was " + str(response.status_code))
+            if response_object == None:
+                raise MastodonIllegalArgumentError("Illegal request.")
+            
+            # Handle response
+            if self.debug_requests == True:
+                print('Mastodon: Response received with code ' + str(response_object.status_code) + '.')
+                print('Respose headers: ' + str(response_object.headers))
+                print('Response text content: ' + str(response_object.text))
+            
+            if response_object.status_code == 404:
+                raise MastodonAPIError('Endpoint not found.')
+            
+            if response_object.status_code == 500:
+                raise MastodonAPIError('General API problem.')
+            
+            try:
+                response = response_object.json()
+            except:
+                raise MastodonAPIError("Could not parse response as JSON, respose code was " + str(response_object.status_code))
         
+            # Handle rate limiting
+            self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])
+            self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit'])
+            self.ratelimit_reset = (datetime.strptime(response_object.headers['X-RateLimit-Reset'], "%Y-%m-%dT%H:%M:%S.%fZ") - datetime(1970, 1, 1)).total_seconds()
+            self.ratelimit_lastcall = time.time()
+            
+            if "error" in response and response["error"] == "Throttled":
+                if self.ratelimit_method == "throw":
+                    raise MastodonRatelimitError("Hit rate limit.")
+                
+                if self.ratelimit_method == "wait" or self.ratelimit_method == "pace":
+                    to_next = self.ratelimit_reset - time.time()
+                    if to_next > 0:
+                        time.sleep(to_next)
+                    request_complete = False
+                    
         return response
     
     def __generate_params(self, params, exclude = []):
@@ -430,3 +508,19 @@ class Mastodon:
                 del params[key]
                 
         return params
+
+##
+# Exceptions
+##
+class MastodonIllegalArgumentError(ValueError):
+    pass
+
+class MastodonNetworkError(IOError):
+    pass
+
+class MastodonAPIError(Exception):
+    pass
+
+class MastodonRatelimitError(Exception):
+    pass
+

From 61775d90831704d012b9f3d6c5453ca738bc0724 Mon Sep 17 00:00:00 2001
From: Lorenz Diener <lorenzd@gmail.com>
Date: Fri, 25 Nov 2016 23:14:00 +0100
Subject: [PATCH 2/2] Rate limiting now works.

---
 docs/index.rst       | 27 +++++++++++++++
 mastodon/Mastodon.py | 82 +++++++++++++++++++++++++++++++-------------
 2 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index e7c9366..02676a4 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -39,6 +39,33 @@ as a single python module. By default, it talks to the
 `Mastodon flagship instance`_, but it can be set to talk to any 
 node running Mastodon.
 
+A note about rate limits
+------------------------
+Mastodons API rate limits per IP. Mastodon.py has three modes for dealing
+with rate limiting that you can pass to the constructor, "throw", "wait"
+and "pace", "wait" being the default.
+
+In "throw" mode, Mastodon.py makes no attempt to stick to rate limits. When
+a request hits the rate limit, it simply throws a MastodonRateLimitError. This is
+for applications that need to handle all rate limiting themselves (i.e. interactive apps), 
+or applications wanting to use Mastodon.py in a multi-threaded context ("wait" and "pace" 
+modes are not thread safe).
+
+In "wait" mode, once a request hits the rate limit, Mastodon.py will wait until
+the rate limit resets and then try again, until the request succeeds or an error
+is encountered. This mode is for applications that would rather just not worry about rate limits
+much, don't poll the api all that often, and are okay with a call sometimes just taking
+a while.
+
+In "pace" mode, Mastodon.py will delay each new request after the first one such that, 
+if requests were to continue at the same rate, only a certain fraction (set in the
+constructor as ratelimit_pacefactor) of the rate limit will be used up. The fraction can
+be (and by default, is) greater than one. If the rate limit is hit, "pace" behaves like
+"wait". This mode is probably the most advanced one and allows you to just poll in
+a loop without ever sleeping at all yourself. It is for applications that would rather
+just pretend there is no such thing as a rate limit and are fine with sometimes not
+being very interactive.
+
 A note about IDs
 ----------------
 Mastodons API uses IDs in several places: User IDs, Toot IDs, ...
diff --git a/mastodon/Mastodon.py b/mastodon/Mastodon.py
index bc1c52b..bb16d95 100644
--- a/mastodon/Mastodon.py
+++ b/mastodon/Mastodon.py
@@ -7,7 +7,10 @@ import mimetypes
 import time
 import random
 import string
-from datetime import datetime
+import pytz
+import datetime
+import dateutil
+import dateutil.parser
 
 class Mastodon:
     """ 
@@ -62,12 +65,12 @@ class Mastodon:
     ###
     # Authentication, including constructor
     ###
-    def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 0.9):
+    def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 1.1):
         """
         Create a new API wrapper instance based on the given client_secret and client_id. If you
         give a client_id and it is not a file, you must also give a secret.
            
-        You can also directly specify an access_token, directly or as a file.
+        You can also specify an access_token, directly or as a file (as written by log_in).
         
         Mastodon.py can try to respect rate limits in several ways, controlled by ratelimit_method.
         "throw" makes functions throw a MastodonRatelimitError when the rate
@@ -92,7 +95,7 @@ class Mastodon:
         self.ratelimit_reset = time.time()
         self.ratelimit_remaining = 150
         self.ratelimit_lastcall = time.time()
-        self.ratelimit_pacefactor = 0.9
+        self.ratelimit_pacefactor = ratelimit_pacefactor
         
         if os.path.isfile(self.client_id):
             with open(self.client_id, 'r') as secret_file:
@@ -426,15 +429,26 @@ class Mastodon:
     ###
     # Internal helpers, dragons probably
     ###
+    def __datetime_to_epoch(self, date_time):
+        """
+        Converts a python datetime to unix epoch, accounting for
+        time zones and such.
+        
+        Assumes UTC if timezone is not given.
+        """
+        date_time_utc = None
+        if date_time.tzinfo == None:
+            date_time_utc = date_time.replace(tzinfo = pytz.utc)
+        else:
+            date_time_utc = date_time.astimezone(pytz.utc)
+        
+        epoch_utc = datetime.datetime.utcfromtimestamp(0).replace(tzinfo = pytz.utc)
+        
+        return (date_time_utc - epoch_utc).total_seconds()
+    
     def __api_request(self, method, endpoint, params = {}, files = {}, do_ratelimiting = True):
         """
         Internal API request helper.
-        
-        TODO FIXME: time.time() does not match server time neccesarily. Using the time from the request
-        would be correct.
-        
-        TODO FIXME: Date parsing can fail. Should probably use a proper "date parsing" module rather than
-        rely on the server to return the right thing.
         """
         response = None
         headers = None
@@ -445,6 +459,8 @@ class Mastodon:
             if self.ratelimit_remaining == 0:
                 to_next = self.ratelimit_reset - time.time()
                 if to_next > 0:
+                    # As a precaution, never sleep longer than 5 minutes
+                    to_next = min(to_next, 5 * 60)
                     time.sleep(to_next)
             else:
                 time_waited = time.time() - self.ratelimit_lastcall
@@ -452,7 +468,9 @@ class Mastodon:
                 remaining_wait = time_wait - time_waited
             
             if remaining_wait > 0:
-                time.sleep(remaining_wait * self.ratelimit_pacefactor)
+                to_next = remaining_wait / self.ratelimit_pacefactor
+                to_next = min(to_next, 5 * 60)
+                time.sleep(to_next)
                 
         # Generate request headers
         if self.access_token != None:
@@ -503,21 +521,34 @@ class Mastodon:
                 raise MastodonAPIError("Could not parse response as JSON, respose code was " + str(response_object.status_code))
         
             # Handle rate limiting
-            if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:
-                self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])
-                self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit'])
-                self.ratelimit_reset = (datetime.strptime(response_object.headers['X-RateLimit-Reset'], "%Y-%m-%dT%H:%M:%S.%fZ") - datetime(1970, 1, 1)).total_seconds()
-                self.ratelimit_lastcall = time.time()
+            try:
+                if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:
+                    self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])
+                    self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit'])
 
-                if "error" in response and response["error"] == "Throttled":
-                    if self.ratelimit_method == "throw":
-                        raise MastodonRatelimitError("Hit rate limit.")
+                    ratelimit_reset_datetime = dateutil.parser.parse(response_object.headers['X-RateLimit-Reset'])
+                    self.ratelimit_reset = self.__datetime_to_epoch(ratelimit_reset_datetime)
 
-                    if self.ratelimit_method == "wait" or self.ratelimit_method == "pace":
-                        to_next = self.ratelimit_reset - time.time()
-                        if to_next > 0:
-                            time.sleep(to_next)
-                        request_complete = False
+                    # Adjust server time to local clock
+                    server_time_datetime = dateutil.parser.parse(response_object.headers['Date'])
+                    server_time = self.__datetime_to_epoch(server_time_datetime)
+                    server_time_diff = time.time() - server_time
+                    self.ratelimit_reset += server_time_diff
+                    self.ratelimit_lastcall = time.time()
+
+                    if "error" in response and response["error"] == "Throttled":
+                        if self.ratelimit_method == "throw":
+                            raise MastodonRatelimitError("Hit rate limit.")
+
+                        if self.ratelimit_method == "wait" or self.ratelimit_method == "pace":
+                            to_next = self.ratelimit_reset - time.time()
+                            if to_next > 0:
+                                # As a precaution, never sleep longer than 5 minutes
+                                to_next = min(to_next, 5 * 60) 
+                                time.sleep(to_next)
+                            request_complete = False
+            except:
+                raise MastodonRatelimitError("Rate limit time calculations failed.")
                     
         return response
     
@@ -547,6 +578,9 @@ class Mastodon:
 class MastodonIllegalArgumentError(ValueError):
     pass
 
+class MastodonFileNotFoundError(IOError):
+    pass
+
 class MastodonNetworkError(IOError):
     pass