Pagination

2017-06-16 01:23:19 +02:00 · 2017-06-16 01:23:19 +02:00 · d4b37995fe
commit d4b37995fe
--- a/docs/index.rst
+++ b/docs/index.rst
@ -73,6 +73,28 @@ a loop without ever sleeping at all yourself. It is for applications that would
 just pretend there is no such thing as a rate limit and are fine with sometimes not
 being very interactive.

+A note about pagination
+-----------------------
+Many of Mastodons API endpoints are paginated. What this means is that if you request
+data from them, you might not get all the data at once - instead, you might only get the
+first few results.
+
+All endpoints that are paginated have three parameters: since_id, max_id and limit.
+since_id allows you to specify the smallest id you want in the returned data. max_id,
+similarly, allows you to specify the largest. By specifying either one (generally,
+only one, not both) of them you can go through pages forwards and backwards.
+
+limit allows you to specify how many results you would like returned. Note that an
+instance may choose to return less results than you requested.
+
+The responses returned by paginated endpoints contain a "link" header that specifies
+which parameters to use to get the next and previous pages. Mastodon.py parses these
+and stores them (if present) in the first (for the previous page) and last (for the 
+next page) item of the returned list as _pagination_prev and _pagination_next.
+
+There are convenience functions available for fetching the previous and next page of
+a paginated request as well as for fetching all pages starting from a first page.
+
 A note about IDs
 ----------------
 Mastodons API uses IDs in several places: User IDs, Toot IDs, ...
@ -257,8 +279,9 @@ you can simply pass them to the constructor of the class, too!
 Note that while it is perfectly reasonable to log back in whenever 
 your app starts, registering a new application on every 
 startup is not, so don't do that - instead, register an application 
-once, and then persist your client id and secret. Convenience
-methods for this are provided.
+once, and then persist your client id and secret. A convenient method
+for this is provided by the functions dealing with registering the app,
+logging in and the Mastodon classes constructor.

 To talk to an instance different from the flagship instance, specify
 the api_base_url (usually, just the URL of the instance, i.e. 
@ -405,12 +428,20 @@ Writing data: Reports

 Writing data: Domain blocks
 ---------------------------
-These methods allow you to block and unblock all statuses from a domain
+These functions allow you to block and unblock all statuses from a domain
 for the logged-in user.

 .. automethod:: Mastodon.domain_block
 .. automethod:: Mastodon.domain_unblock

+Pagination
+----------
+These functions allow for convenient retrieval of paginated data.
+
+.. automethod:: Mastodon.fetch_next
+.. automethod:: Mastodon.fetch_previous
+.. automethod:: Mastodon.fetch_remaining
+
 Streaming
 ---------
 These functions allow access to the streaming API.
--- a/mastodon/Mastodon.py
+++ b/mastodon/Mastodon.py
@ -14,10 +14,12 @@ import requests
 from requests.models import urlencode
 import dateutil
 import dateutil.parser
+import re
+import copy

 class Mastodon:
    """
-    Super basic but thorough and easy to use mastodon.social
+    Super basic but thorough and easy to use Mastodon
    api wrapper in python.

    If anything is unclear, check the official API docs at
@ -743,6 +745,76 @@ class Mastodon:
        params = self.__generate_params(locals())
        return self.__api_request('DELETE', '/api/v1/domain_blocks', params)
    
+    ###
+    # Pagination
+    ###
+    def fetch_next(self, previous_page):
+        """
+        Fetches the next page of results of a paginated request. Pass in the
+        previous page in its entirety, or the pagination information dict 
+        returned as a part of that pages last status ('_pagination_next').
+        
+        Returns the next page or None if no further data is available.
+        """
+        if isinstance(previous_page, list):
+            if '_pagination_next' in previous_page[-1]:
+                params = previous_page[-1]['_pagination_next']
+            else:
+                return None
+        else:
+            params = previous_page
+        
+        method = params['_pagination_method']
+        del params['_pagination_method']
+        
+        endpoint = params['_pagination_endpoint']
+        del params['_pagination_endpoint']
+        
+        return self.__api_request(method, endpoint, params)
+    
+    def fetch_previous(self, next_page):
+        """
+        Fetches the previous page of results of a paginated request. Pass in the
+        previous page in its entirety, or the pagination information dict 
+        returned as a part of that pages first status ('_pagination_prev').
+        
+        Returns the previous page or None if no further data is available.
+        """
+        if isinstance(next_page, list):
+            if '_pagination_prev' in next_page[-1]:
+                params = next_page[-1]['_pagination_prev']
+            else:
+                return None
+        else:
+            params = next_page
+        
+        method = params['_pagination_method']
+        del params['_pagination_method']
+        
+        endpoint = params['_pagination_endpoint']
+        del params['_pagination_endpoint']
+        
+        return self.__api_request(method, endpoint, params)
+    
+    def fetch_remaining(self, first_page):
+        """
+        Fetches all the remaining pages of a paginated request starting from a 
+        first page and returns the entire set of results (including the first page
+        that was passed in) as a big list.
+        
+        Be careful, as this might generate a lot of requests, depending on what you are
+        fetching, and might cause you to run into rate limits very quickly.
+        """
+        first_page = copy.deepcopy(first_page)
+        
+        all_pages = []
+        current_page = first_page
+        while current_page != None:
+            all_pages.extend(current_page)
+            current_page = self.fetch_next(current_page)
+            
+        return all_pages
+    
    ###
    # Streaming
    ###
@ -786,7 +858,7 @@ class Mastodon:
        incoming events.
        """
        return self.__stream('/api/v1/streaming/hashtag', listener, params={'tag': tag})
-
+    
    ###
    # Internal helpers, dragons probably
    ###
@ -884,6 +956,34 @@ class Mastodon:
            except:
                raise MastodonAPIError("Could not parse response as JSON, response code was %s, bad json content was '%s'" % (response_object.status_code, response_object.content))

+            # Parse link headers
+            if isinstance(response, list) and 'Link' in response_object.headers:
+                tmp_urls = requests.utils.parse_header_links(response_object.headers['Link'].rstrip('>').replace('>,<', ',<'))   
+                for url in tmp_urls:
+                    if url['rel'] == 'next':
+                        # Be paranoid and extract max_id specifically
+                        next_url = url['url']
+                        matchgroups = re.search(r"max_id=([0-9]*)", next_url)
+                        
+                        if matchgroups:
+                            next_params = copy.deepcopy(params)
+                            next_params['_pagination_method'] = method
+                            next_params['_pagination_endpoint'] = endpoint
+                            next_params['max_id'] = int(matchgroups.group(1))
+                            response[-1]['_pagination_next'] = next_params
+                            
+                    if url['rel'] == 'prev':
+                        # Be paranoid and extract since_id specifically
+                        prev_url = url['url']
+                        matchgroups = re.search(r"since_id=([0-9]*)", prev_url)
+                        
+                        if matchgroups:
+                            prev_params = copy.deepcopy(params)
+                            prev_params['_pagination_method'] = method
+                            prev_params['_pagination_endpoint'] = endpoint
+                            prev_params['max_id'] = int(matchgroups.group(1))
+                            response[0]['_pagination_prev'] = prev_params
+                
            # Handle rate limiting
            if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:
                self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])