Skip to content
This repository has been archived by the owner on Jan 29, 2023. It is now read-only.

Commit

Permalink
add InfoScanner referred at #10
Browse files Browse the repository at this point in the history
remove encode path in service.common.generate_video_link
  • Loading branch information
lordfriend committed Jul 9, 2016
1 parent 75d5d9d commit 9b1aef7
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 1 deletion.
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,15 @@
#0.6.0-alpha

##Features Add

- InfoScanner for scanning missing information (name, name_cn, duration) and auto fill those information from bangumi.tv, note that the name_cn is not always filled.

##Bug fix

- rollback session when db connection lost

NOTE: this release require use to update their config.yml.

#0.4.0-alpha

##Features Add
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ $ python tools.py --user-add admin 1234 # admin is username 1234 is password
$ python tools.py --user-promote admin 3 # admin is username 3 is the level, currently means super user
```

### set your server locale

To avoid some unicode issues, it is recommended to set locale of your server

### Docker
you can also set up the development environment with [Docker](https://www.docker.com/)

Expand Down
4 changes: 4 additions & 0 deletions Scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@
from sqlalchemy.sql import func
import traceback

from taskrunner.InfoScanner import info_scanner


class Scheduler:

def __init__(self):
Expand Down Expand Up @@ -173,6 +176,7 @@ def scan_bangumi(self):
def on_connected(result):
# logger.info(result)
scheduler.start()
info_scanner.start()

def on_connect_fail(result):
logger.error(result)
Expand Down
3 changes: 3 additions & 0 deletions config/config-sample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ deluge:

task:
interval: 15 # unit minute
info_scanner:
scan_time: # A info scanner will scheduled for once a day at this particular time. it is not an accurate time, default is 23:00
scan_time_format: # default is '%H:%M' see https://docs.python.org/2/library/datetime.html#strftime-strptime-behavior

download:
location: / # change this location
Expand Down
2 changes: 1 addition & 1 deletion service/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def generate_cover_link(self, bangumi):
return cover_url

def generate_video_link(self, bangumi_id, path):
video_link = '/video/{0}/{1}'.format(bangumi_id, path.encode('utf-8'))
video_link = '/video/{0}/{1}'.format(bangumi_id, path)
if self.video_domain is not None:
video_link = self.video_domain + video_link
return video_link
Expand Down
115 changes: 115 additions & 0 deletions taskrunner/InfoScanner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from utils.SessionManager import SessionManager
from domain.Bangumi import Bangumi
from domain.Episode import Episode
from sqlalchemy.sql.expression import or_
from sqlalchemy import exc
from twisted.internet import threads
from twisted.internet.task import LoopingCall
from datetime import datetime
import time
import yaml
import requests
import logging
import traceback

logger = logging.getLogger(__name__)

class InfoScanner:

def __init__(self):
fr = open('./config/config.yml', 'r')
config = yaml.load(fr)
if 'info_scanner' in config['task']:
scan_time = '16:00'
scan_time_format = '%H:%M'
if 'scan_time' in config['task']['info_scanner'] and config['task']['info_scanner']['scan_time'] is not None:
scan_time = config['task']['info_scanner']['scan_time']

if 'scan_time_format' in config['task']['info_scanner'] and config['task']['info_scanner']['scan_time_format'] is not None:
scan_time_format = config['task']['info_scanner']['scan_time_format']

self.scan_time = datetime.strptime(scan_time, scan_time_format)

self.scanner_running = False
self.last_scan_date = None

def start(self):
lc = LoopingCall(self.check_time)
lc.start(60)

def check_time(self):
if self.scanner_running:
return
current_time = datetime.now()
if self.last_scan_date is not None and self.last_scan_date == current_time.date():
return
if (not self.scanner_running) and (self.scan_time.hour == current_time.hour):
self.scanner_running = True
self.scan_episode()
self.last_scan_date = current_time.date()
self.scanner_running = False

def get_bgm_info(self, bgm_id):
bangumi_tv_url_base = 'http://api.bgm.tv/subject/'
bangumi_tv_url_param = '?responseGroup=large'
bangumi_tv_url = bangumi_tv_url_base + str(bgm_id) + bangumi_tv_url_param
r = requests.get(bangumi_tv_url, timeout=60)
if r.status_code < 400:
return (r.status_code, r.json())
else:
return (r.status_code, {})


def __scan_episode_in_thread(self):
logger.info('start scan info of episode')
session = SessionManager.Session
try:
# we don't scan the episode those name_cn is missing
# because many of them don't have name_cn
result = session.query(Episode, Bangumi).\
join(Bangumi).\
filter(or_(Episode.name == '', Episode.duration == ''))

bgm_episode_dict = {}

for episode, bangumi in result:
if not (bangumi.bgm_id in bgm_episode_dict):
# if this is not the first call for get_bgm_info, a delay should be added to prevent access the bgm api
# too frequently
if bgm_episode_dict:
time.sleep(20)
logger.info('try to get info for bangumi of %s' % str(bangumi.bgm_id))
(status_code, bangumi_info) = self.get_bgm_info(bangumi.bgm_id)
if status_code < 400:
bgm_episode_dict[bangumi.bgm_id] = bangumi_info

if not (bangumi.bgm_id in bgm_episode_dict):
continue

bangumi_info = bgm_episode_dict[bangumi.bgm_id]

for eps in bangumi_info['eps']:
if eps['id'] == episode.bgm_eps_id:
if episode.name == '':
episode.name = eps['name']
if episode.name_cn == '':
episode.name_cn = eps['name_cn']
if episode.duration == '':
episode.duration = eps['duration']
break

session.commit()
logger.info('scan finished, will scan at next day')
except exc.DBAPIError as db_error:
logger.error(db_error)
# if connection is invalid rollback the session
if db_error.connection_invalidated:
session.rollback()
except Exception as error:
logger.error(error)
traceback.print_exc()

def scan_episode(self):
threads.deferToThread(self.__scan_episode_in_thread)

info_scanner = InfoScanner()
Empty file added taskrunner/__init__.py
Empty file.

0 comments on commit 9b1aef7

Please sign in to comment.