From d7bdfaf8f6340281109c7e44fe8b108ee6f73b42 Mon Sep 17 00:00:00 2001 From: VG Date: Tue, 28 Nov 2017 21:33:11 +0100 Subject: first test at having a cli for dumping anime information from anidb in yaml format --- anidb/__init__.py | 71 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 23 deletions(-) (limited to 'anidb') diff --git a/anidb/__init__.py b/anidb/__init__.py index 29e9b78..c9eef78 100644 --- a/anidb/__init__.py +++ b/anidb/__init__.py @@ -1,7 +1,10 @@ +import os +import time +from datetime import datetime + import requests from bs4 import BeautifulSoup -from datetime import datetime BASE_URL = "http://api.anidb.net:9001/httpapi" SEARCH_URL = "http://anisearch.outrance.pl/" @@ -20,6 +23,16 @@ class AniDB: self._cache = {} def _request(self, datapage, params={}, cache=True): + + cache_filename = f'cache_{params["aid"]}.xml' + + if os.path.exists(cache_filename): + print('D: cache_filename exists, return its content:', cache_filename) + with open(cache_filename, 'r', encoding='utf8') as fpi: + return fpi.read() + + time.sleep(3) # avoid being banned with subsequent requests + params.update({ 'client': self.client_id, 'clientver': self.client_ver, @@ -28,7 +41,14 @@ class AniDB: }) r = requests.get(BASE_URL, params=params) r.raise_for_status() - return r + + #print('D: r.text:', r.text) + + print('D: creating cache file:', cache_filename) + with open(cache_filename, 'w', encoding='utf8') as fpo: + fpo.write(r.text) + + return r.text # Anime http://wiki.anidb.net/w/HTTP_API_Definition#Access @@ -45,22 +65,26 @@ class AniDB: results = [] animetitles = BeautifulSoup(r.text, 'xml').animetitles for anime in animetitles.find_all('anime'): - results.append(Anime({ - 'id': int(anime['id']), - 'title': str(anime.find('title', attrs={'type': "main"}).string) - }, partial=True, updater=lambda: self.get(anime['id']))) + print('D: got anime: ', anime) + aid = int(anime['aid']) + print('D: got aid: ', aid) + #results.append(Anime({ + # 'aid': aid, + # 'title': str(anime.find('title', attrs={'type': "main"}).string) + #}, partial=True, updater=lambda: self.get(aid))) + results.append(self.get(aid)) + print('D: result appended') return results - def get(self, id): + def get(self, aid): """ Allows retrieval of non-file or episode related information for a specific anime by AID (AniDB anime id). http://wiki.anidb.net/w/HTTP_API_Definition#Anime """ - id = int(id) # why? - - r = self._request("anime", {'aid': id}) - soup = BeautifulSoup(r.text, 'xml') + r = self._request("anime", {'aid': aid}) + #print('D: got in AniDB.get(), r:', r) + soup = BeautifulSoup(r, 'xml') if soup.error is not None: raise Exception(soup.error.string) @@ -68,25 +92,26 @@ class AniDB: titles = anime.titles a = Anime({ - 'id': id, + 'aid': aid, 'type': str(anime.type.string), 'episodecount': int(anime.episodecount.string), - 'startdate': datetime(*list(map(int, anime.startdate.string.split("-")))), - 'enddate': datetime(*list(map(int, anime.enddate.string.split("-")))), + #'startdate': datetime(*list(map(int, anime.startdate.string.split("-")))), + #'enddate': datetime(*list(map(int, anime.enddate.string.split("-")))), 'titles': [( str(title.string), title['type'] if 'type' in title else "unknown" ) for title in anime.find_all('title')], 'title': str(titles.find('title', attrs={'type': "main"}).string), 'relatedanime': [], - 'url': str(anime.url.string), + 'url': str(anime.url.string if anime.url else None), 'creators': [], - 'description': str(anime.description.string), - 'ratings': SmartDict({ - 'permanent': float(anime.ratings.permanent.string), - 'temporary': float(anime.ratings.temporary.string), - 'review': float(anime.ratings.review.string) - }), + 'description': str(anime.description.string if anime.description else ''), + #'ratings': SmartDict({ + # 'permanent': float(anime.ratings.permanent.string), + # 'temporary': float(anime.ratings.temporary.string), + # 'review': float(anime.ratings.review.string if anime.ratings.review + # else 0) + #}), 'picture': "http://img7.anidb.net/pics/anime/" + str(anime.picture.string), 'categories': [], 'tags': [], @@ -94,7 +119,7 @@ class AniDB: 'episodes': [], }) - self._cache[id] = a + self._cache[aid] = a return a @@ -136,4 +161,4 @@ class Anime: raise AttributeError("no attribute called '%s'" % name) def __repr__(self): - return u'' % (self.id, self.title) \ No newline at end of file + return u'' % (self.aid, self.title) -- cgit v1.2.3