Rate limiting now works.

2016-11-25 23:14:00 +01:00 · 2016-11-25 23:14:00 +01:00 · 61775d9083
commit 61775d9083
--- a/docs/index.rst
+++ b/docs/index.rst
@ -39,6 +39,33 @@ as a single python module. By default, it talks to the
 `Mastodon flagship instance`_, but it can be set to talk to any 
 node running Mastodon.

+A note about rate limits
+------------------------
+Mastodons API rate limits per IP. Mastodon.py has three modes for dealing
+with rate limiting that you can pass to the constructor, "throw", "wait"
+and "pace", "wait" being the default.
+
+In "throw" mode, Mastodon.py makes no attempt to stick to rate limits. When
+a request hits the rate limit, it simply throws a MastodonRateLimitError. This is
+for applications that need to handle all rate limiting themselves (i.e. interactive apps), 
+or applications wanting to use Mastodon.py in a multi-threaded context ("wait" and "pace" 
+modes are not thread safe).
+
+In "wait" mode, once a request hits the rate limit, Mastodon.py will wait until
+the rate limit resets and then try again, until the request succeeds or an error
+is encountered. This mode is for applications that would rather just not worry about rate limits
+much, don't poll the api all that often, and are okay with a call sometimes just taking
+a while.
+
+In "pace" mode, Mastodon.py will delay each new request after the first one such that, 
+if requests were to continue at the same rate, only a certain fraction (set in the
+constructor as ratelimit_pacefactor) of the rate limit will be used up. The fraction can
+be (and by default, is) greater than one. If the rate limit is hit, "pace" behaves like
+"wait". This mode is probably the most advanced one and allows you to just poll in
+a loop without ever sleeping at all yourself. It is for applications that would rather
+just pretend there is no such thing as a rate limit and are fine with sometimes not
+being very interactive.
+
 A note about IDs
 ----------------
 Mastodons API uses IDs in several places: User IDs, Toot IDs, ...
--- a/mastodon/Mastodon.py
+++ b/mastodon/Mastodon.py
@ -7,7 +7,10 @@ import mimetypes
 import time
 import random
 import string
-from datetime import datetime
+import pytz
+import datetime
+import dateutil
+import dateutil.parser

 class Mastodon:
    """ 
@ -62,12 +65,12 @@ class Mastodon:
    ###
    # Authentication, including constructor
    ###
-    def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 0.9):
+    def __init__(self, client_id, client_secret = None, access_token = None, api_base_url = __DEFAULT_BASE_URL, debug_requests = False, ratelimit_method = "wait", ratelimit_pacefactor = 1.1):
        """
        Create a new API wrapper instance based on the given client_secret and client_id. If you
        give a client_id and it is not a file, you must also give a secret.
           
-        You can also directly specify an access_token, directly or as a file.
+        You can also specify an access_token, directly or as a file (as written by log_in).
        
        Mastodon.py can try to respect rate limits in several ways, controlled by ratelimit_method.
        "throw" makes functions throw a MastodonRatelimitError when the rate
@ -92,7 +95,7 @@ class Mastodon:
        self.ratelimit_reset = time.time()
        self.ratelimit_remaining = 150
        self.ratelimit_lastcall = time.time()
-        self.ratelimit_pacefactor = 0.9
+        self.ratelimit_pacefactor = ratelimit_pacefactor
        
        if os.path.isfile(self.client_id):
            with open(self.client_id, 'r') as secret_file:
@ -426,15 +429,26 @@ class Mastodon:
    ###
    # Internal helpers, dragons probably
    ###
+    def __datetime_to_epoch(self, date_time):
+        """
+        Converts a python datetime to unix epoch, accounting for
+        time zones and such.
+        
+        Assumes UTC if timezone is not given.
+        """
+        date_time_utc = None
+        if date_time.tzinfo == None:
+            date_time_utc = date_time.replace(tzinfo = pytz.utc)
+        else:
+            date_time_utc = date_time.astimezone(pytz.utc)
+        
+        epoch_utc = datetime.datetime.utcfromtimestamp(0).replace(tzinfo = pytz.utc)
+        
+        return (date_time_utc - epoch_utc).total_seconds()
+    
    def __api_request(self, method, endpoint, params = {}, files = {}, do_ratelimiting = True):
        """
        Internal API request helper.
-        
-        TODO FIXME: time.time() does not match server time neccesarily. Using the time from the request
-        would be correct.
-        
-        TODO FIXME: Date parsing can fail. Should probably use a proper "date parsing" module rather than
-        rely on the server to return the right thing.
        """
        response = None
        headers = None
@ -445,6 +459,8 @@ class Mastodon:
            if self.ratelimit_remaining == 0:
                to_next = self.ratelimit_reset - time.time()
                if to_next > 0:
+                    # As a precaution, never sleep longer than 5 minutes
+                    to_next = min(to_next, 5 * 60)
                    time.sleep(to_next)
            else:
                time_waited = time.time() - self.ratelimit_lastcall
@ -452,7 +468,9 @@ class Mastodon:
                remaining_wait = time_wait - time_waited
            
            if remaining_wait > 0:
-                time.sleep(remaining_wait * self.ratelimit_pacefactor)
+                to_next = remaining_wait / self.ratelimit_pacefactor
+                to_next = min(to_next, 5 * 60)
+                time.sleep(to_next)
                
        # Generate request headers
        if self.access_token != None:
@ -503,21 +521,34 @@ class Mastodon:
                raise MastodonAPIError("Could not parse response as JSON, respose code was " + str(response_object.status_code))
        
            # Handle rate limiting
-            if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:
-                self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])
-                self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit'])
-                self.ratelimit_reset = (datetime.strptime(response_object.headers['X-RateLimit-Reset'], "%Y-%m-%dT%H:%M:%S.%fZ") - datetime(1970, 1, 1)).total_seconds()
-                self.ratelimit_lastcall = time.time()
+            try:
+                if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:
+                    self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])
+                    self.ratelimit_limit = int(response_object.headers['X-RateLimit-Limit'])

-                if "error" in response and response["error"] == "Throttled":
-                    if self.ratelimit_method == "throw":
-                        raise MastodonRatelimitError("Hit rate limit.")
+                    ratelimit_reset_datetime = dateutil.parser.parse(response_object.headers['X-RateLimit-Reset'])
+                    self.ratelimit_reset = self.__datetime_to_epoch(ratelimit_reset_datetime)

-                    if self.ratelimit_method == "wait" or self.ratelimit_method == "pace":
-                        to_next = self.ratelimit_reset - time.time()
-                        if to_next > 0:
-                            time.sleep(to_next)
-                        request_complete = False
+                    # Adjust server time to local clock
+                    server_time_datetime = dateutil.parser.parse(response_object.headers['Date'])
+                    server_time = self.__datetime_to_epoch(server_time_datetime)
+                    server_time_diff = time.time() - server_time
+                    self.ratelimit_reset += server_time_diff
+                    self.ratelimit_lastcall = time.time()
+
+                    if "error" in response and response["error"] == "Throttled":
+                        if self.ratelimit_method == "throw":
+                            raise MastodonRatelimitError("Hit rate limit.")
+
+                        if self.ratelimit_method == "wait" or self.ratelimit_method == "pace":
+                            to_next = self.ratelimit_reset - time.time()
+                            if to_next > 0:
+                                # As a precaution, never sleep longer than 5 minutes
+                                to_next = min(to_next, 5 * 60) 
+                                time.sleep(to_next)
+                            request_complete = False
+            except:
+                raise MastodonRatelimitError("Rate limit time calculations failed.")
                    
        return response
    
@ -547,6 +578,9 @@ class Mastodon:
 class MastodonIllegalArgumentError(ValueError):
    pass

+class MastodonFileNotFoundError(IOError):
+    pass
+
 class MastodonNetworkError(IOError):
    pass