This commit is contained in:
Lorenz Diener 2017-06-16 01:23:19 +02:00
pare 8e202fbdc0
commit d4b37995fe
S'han modificat 2 arxius amb 136 adicions i 5 eliminacions

Veure arxiu

@ -73,6 +73,28 @@ a loop without ever sleeping at all yourself. It is for applications that would
just pretend there is no such thing as a rate limit and are fine with sometimes not just pretend there is no such thing as a rate limit and are fine with sometimes not
being very interactive. being very interactive.
A note about pagination
-----------------------
Many of Mastodons API endpoints are paginated. What this means is that if you request
data from them, you might not get all the data at once - instead, you might only get the
first few results.
All endpoints that are paginated have three parameters: since_id, max_id and limit.
since_id allows you to specify the smallest id you want in the returned data. max_id,
similarly, allows you to specify the largest. By specifying either one (generally,
only one, not both) of them you can go through pages forwards and backwards.
limit allows you to specify how many results you would like returned. Note that an
instance may choose to return less results than you requested.
The responses returned by paginated endpoints contain a "link" header that specifies
which parameters to use to get the next and previous pages. Mastodon.py parses these
and stores them (if present) in the first (for the previous page) and last (for the
next page) item of the returned list as _pagination_prev and _pagination_next.
There are convenience functions available for fetching the previous and next page of
a paginated request as well as for fetching all pages starting from a first page.
A note about IDs A note about IDs
---------------- ----------------
Mastodons API uses IDs in several places: User IDs, Toot IDs, ... Mastodons API uses IDs in several places: User IDs, Toot IDs, ...
@ -257,8 +279,9 @@ you can simply pass them to the constructor of the class, too!
Note that while it is perfectly reasonable to log back in whenever Note that while it is perfectly reasonable to log back in whenever
your app starts, registering a new application on every your app starts, registering a new application on every
startup is not, so don't do that - instead, register an application startup is not, so don't do that - instead, register an application
once, and then persist your client id and secret. Convenience once, and then persist your client id and secret. A convenient method
methods for this are provided. for this is provided by the functions dealing with registering the app,
logging in and the Mastodon classes constructor.
To talk to an instance different from the flagship instance, specify To talk to an instance different from the flagship instance, specify
the api_base_url (usually, just the URL of the instance, i.e. the api_base_url (usually, just the URL of the instance, i.e.
@ -405,12 +428,20 @@ Writing data: Reports
Writing data: Domain blocks Writing data: Domain blocks
--------------------------- ---------------------------
These methods allow you to block and unblock all statuses from a domain These functions allow you to block and unblock all statuses from a domain
for the logged-in user. for the logged-in user.
.. automethod:: Mastodon.domain_block .. automethod:: Mastodon.domain_block
.. automethod:: Mastodon.domain_unblock .. automethod:: Mastodon.domain_unblock
Pagination
----------
These functions allow for convenient retrieval of paginated data.
.. automethod:: Mastodon.fetch_next
.. automethod:: Mastodon.fetch_previous
.. automethod:: Mastodon.fetch_remaining
Streaming Streaming
--------- ---------
These functions allow access to the streaming API. These functions allow access to the streaming API.

Veure arxiu

@ -14,10 +14,12 @@ import requests
from requests.models import urlencode from requests.models import urlencode
import dateutil import dateutil
import dateutil.parser import dateutil.parser
import re
import copy
class Mastodon: class Mastodon:
""" """
Super basic but thorough and easy to use mastodon.social Super basic but thorough and easy to use Mastodon
api wrapper in python. api wrapper in python.
If anything is unclear, check the official API docs at If anything is unclear, check the official API docs at
@ -743,6 +745,76 @@ class Mastodon:
params = self.__generate_params(locals()) params = self.__generate_params(locals())
return self.__api_request('DELETE', '/api/v1/domain_blocks', params) return self.__api_request('DELETE', '/api/v1/domain_blocks', params)
###
# Pagination
###
def fetch_next(self, previous_page):
"""
Fetches the next page of results of a paginated request. Pass in the
previous page in its entirety, or the pagination information dict
returned as a part of that pages last status ('_pagination_next').
Returns the next page or None if no further data is available.
"""
if isinstance(previous_page, list):
if '_pagination_next' in previous_page[-1]:
params = previous_page[-1]['_pagination_next']
else:
return None
else:
params = previous_page
method = params['_pagination_method']
del params['_pagination_method']
endpoint = params['_pagination_endpoint']
del params['_pagination_endpoint']
return self.__api_request(method, endpoint, params)
def fetch_previous(self, next_page):
"""
Fetches the previous page of results of a paginated request. Pass in the
previous page in its entirety, or the pagination information dict
returned as a part of that pages first status ('_pagination_prev').
Returns the previous page or None if no further data is available.
"""
if isinstance(next_page, list):
if '_pagination_prev' in next_page[-1]:
params = next_page[-1]['_pagination_prev']
else:
return None
else:
params = next_page
method = params['_pagination_method']
del params['_pagination_method']
endpoint = params['_pagination_endpoint']
del params['_pagination_endpoint']
return self.__api_request(method, endpoint, params)
def fetch_remaining(self, first_page):
"""
Fetches all the remaining pages of a paginated request starting from a
first page and returns the entire set of results (including the first page
that was passed in) as a big list.
Be careful, as this might generate a lot of requests, depending on what you are
fetching, and might cause you to run into rate limits very quickly.
"""
first_page = copy.deepcopy(first_page)
all_pages = []
current_page = first_page
while current_page != None:
all_pages.extend(current_page)
current_page = self.fetch_next(current_page)
return all_pages
### ###
# Streaming # Streaming
### ###
@ -884,6 +956,34 @@ class Mastodon:
except: except:
raise MastodonAPIError("Could not parse response as JSON, response code was %s, bad json content was '%s'" % (response_object.status_code, response_object.content)) raise MastodonAPIError("Could not parse response as JSON, response code was %s, bad json content was '%s'" % (response_object.status_code, response_object.content))
# Parse link headers
if isinstance(response, list) and 'Link' in response_object.headers:
tmp_urls = requests.utils.parse_header_links(response_object.headers['Link'].rstrip('>').replace('>,<', ',<'))
for url in tmp_urls:
if url['rel'] == 'next':
# Be paranoid and extract max_id specifically
next_url = url['url']
matchgroups = re.search(r"max_id=([0-9]*)", next_url)
if matchgroups:
next_params = copy.deepcopy(params)
next_params['_pagination_method'] = method
next_params['_pagination_endpoint'] = endpoint
next_params['max_id'] = int(matchgroups.group(1))
response[-1]['_pagination_next'] = next_params
if url['rel'] == 'prev':
# Be paranoid and extract since_id specifically
prev_url = url['url']
matchgroups = re.search(r"since_id=([0-9]*)", prev_url)
if matchgroups:
prev_params = copy.deepcopy(params)
prev_params['_pagination_method'] = method
prev_params['_pagination_endpoint'] = endpoint
prev_params['max_id'] = int(matchgroups.group(1))
response[0]['_pagination_prev'] = prev_params
# Handle rate limiting # Handle rate limiting
if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting: if 'X-RateLimit-Remaining' in response_object.headers and do_ratelimiting:
self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining']) self.ratelimit_remaining = int(response_object.headers['X-RateLimit-Remaining'])