Compare commits
11 Commits
master
...
umooc-clie
Author | SHA1 | Date |
---|---|---|
kdxcxs | dfa9613903 | 4 years ago |
kdxcxs | cb5bff2305 | 4 years ago |
kdxcxs | 10f59c120d | 4 years ago |
kdxcxs | 2980a7817d | 4 years ago |
kdxcxs | 70efa4f702 | 4 years ago |
kdxcxs | 098c86ec76 | 4 years ago |
kdxcxs | 1036f7833e | 4 years ago |
kdxcxs | 526b6349cc | 4 years ago |
kdxcxs | d820868b8b | 4 years ago |
kdxcxs | 101cbc5e2a | 4 years ago |
kdxcxs | fd659e666e | 4 years ago |
3 changed files with 222 additions and 43 deletions
@ -1,3 +1,8 @@ |
|||
# yomooc |
|||
# yomooc-backend |
|||
|
|||
## 版本说明 |
|||
|
|||
| 版本 | 说明 | |
|||
| ----- | ------------------- | |
|||
| 0.0.x | `umooc-client` 开发 | |
|||
|
|||
无论是 Android 软件还是网页版的优慕课都是实在难用, 无奈下计划起了这个项目 |
@ -0,0 +1,215 @@ |
|||
""" |
|||
A client that simulates the desktop device to communicate with umooc server |
|||
""" |
|||
|
|||
import requests |
|||
import time |
|||
import re |
|||
from bs4 import BeautifulSoup |
|||
|
|||
|
|||
class LoginError(BaseException): |
|||
def __init__(self, error_info): |
|||
super().__init__(self) |
|||
self.error_info = error_info |
|||
|
|||
def __str__(self): |
|||
return self.error_info |
|||
|
|||
|
|||
class ParseError(BaseException): |
|||
def __init__(self, error_info): |
|||
super().__init__(self) |
|||
self.error_info = error_info |
|||
|
|||
def __str__(self): |
|||
return self.error_info |
|||
|
|||
|
|||
class TopicListPage(object): |
|||
def __init__(self, raw_doc): |
|||
self.raw_html = raw_doc |
|||
self.topics = [] |
|||
self.parse() |
|||
|
|||
def parse(self): |
|||
page_soup = BeautifulSoup(self.raw_html, 'html.parser') |
|||
topic_table = page_soup.find_all('table')[1] |
|||
for tr in topic_table.findChildren('tr')[1:]: |
|||
title_dom = tr.findChildren('td')[1].findChild('b') |
|||
|
|||
if title_dom is None: |
|||
title_dom = tr.findChildren('td')[1].findChild('a') |
|||
title_dom.string = title_dom.string[:-9] # remove the redundant '\n ' |
|||
|
|||
thread_title = title_dom.string |
|||
if title_dom.name == 'b': |
|||
thread_id = title_dom.parent.attrs['href'].split('=')[1] |
|||
elif title_dom.name == 'a': |
|||
thread_id = title_dom.attrs['href'].split('=')[1] |
|||
else: |
|||
raise ParseError('Cannot get thread id') |
|||
|
|||
self.topics.append({'title': thread_title, |
|||
'id': thread_id}) |
|||
|
|||
|
|||
class TopicPage(object): |
|||
def __init__(self, raw_doc): |
|||
self.raw_html = raw_doc |
|||
self.replies = [] |
|||
self.parse() |
|||
|
|||
def parse(self): |
|||
page_soup = BeautifulSoup(self.raw_html.replace( |
|||
'<img src="http://eol.ctbu.edu.cn/meol/common/forum/styles/default/image/idiograph.gif" />' |
|||
'<br><div style="font-size:12px;line-height:200%;letter-spacing:2px;"></div>', |
|||
''), |
|||
'html.parser') |
|||
inputs = page_soup.find_all('input') |
|||
for reply_input in inputs: |
|||
contents = [] |
|||
for content in BeautifulSoup(reply_input.attrs['value'].replace( |
|||
'�', '[emoji]').replace( |
|||
'<span style="white-space: normal;"><br/></span>', '\n'), |
|||
'html.parser').contents: |
|||
if content.name != 'br': |
|||
emoji_re = re.compile(u'[\uD800-\uDBFF]|[\uDC00-\uDFFF]') |
|||
if content.name == 'div': |
|||
for div_child in content.contents: |
|||
if div_child.name == 'img': |
|||
img_id = div_child['src'][38:-2] |
|||
contents.append({'type': 'img', 'img_id': img_id}) |
|||
elif content.name == 'p': |
|||
contents.append( |
|||
{'type': 'text', 'content': emoji_re.sub('[emoji]', content.text.replace('\xa0 ', ''))}) |
|||
elif content.name == 'table': |
|||
# TODO: support tables |
|||
contents.append({'type': 'table', 'content': content.__str__()}) |
|||
elif content.name == 'blockquote': |
|||
contents.append({'type': 'blockquote', 'quotes': content.text.split('\xa0')}) |
|||
elif content.name == 'ol': |
|||
contents.append({'type': 'ol', 'lis': [li.text for li in content.children]}) |
|||
else: # pure text |
|||
contents.append({'type': 'text', 'content': emoji_re.sub('[emoji]', content.string)}) |
|||
self.replies.append( |
|||
{'username': reply_input.find_parents('tr')[0].h6.contents[0][25:], # remove the redundant spaces |
|||
'time': reply_input.find_parents('tr')[0].find_all('li')[1].span.string[7:], |
|||
'contents': contents}) # umooc just does not support emoji |
|||
|
|||
|
|||
class UmoocClient(object): |
|||
def __init__(self): |
|||
self.js_session_id = '' |
|||
self.dwr_session_id = '' |
|||
self.topic_list = [] |
|||
self.replies = {} |
|||
|
|||
def login(self, username, password): |
|||
resp = requests.post('http://eol.ctbu.edu.cn/meol/loginCheck.do', |
|||
headers={'Cache-Control': 'max-age=0', |
|||
'Upgrade-Insecure-Requests': '1', |
|||
'Origin': 'http://eol.ctbu.edu.cn', |
|||
'Content-Type': 'application/x-www-form-urlencoded', |
|||
'User-Agent': 'yomooc', |
|||
'Referer': 'http://eol.ctbu.edu.cn/meol/common/security/login.jsp?enterLid=46445', |
|||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7' |
|||
}, |
|||
data=f'logintoken={str(time.time()).replace(".", "")[:-4]}' |
|||
f'&enterLid=46445' |
|||
f'&IPT_LOGINUSERNAME={username}' |
|||
f'&IPT_LOGINPASSWORD={password}', |
|||
allow_redirects=False) |
|||
if resp.status_code == 302: |
|||
self.js_session_id = resp.cookies['JSESSIONID'] |
|||
else: |
|||
raise LoginError('Fail to get session') |
|||
self.prepare() |
|||
|
|||
def prepare(self): |
|||
# get dwr session id |
|||
resp = requests.post('http://eol.ctbu.edu.cn/meol/dwr/call/plaincall/__System.generateId.dwr', |
|||
headers={'Origin': 'http://eol.ctbu.edu.cn', |
|||
'Content-Type': 'text/plain', |
|||
'User-Agent': 'yomooc', |
|||
'Referer': 'http://eol.ctbu.edu.cn/meol/jpk/course/layout/newpage/index.jsp' |
|||
'?courseId=46445', |
|||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7', |
|||
'Cookie': f'JSESSIONID={self.js_session_id}'}, |
|||
data=f'callCount=1\n' |
|||
f'c0-scriptName=__System\n' |
|||
f'c0-methodName=generateId\n' |
|||
f'c0-id=0\n' |
|||
f'batchId=0\n' |
|||
f'instanceId=0\n' |
|||
f'page=%2Fmeol%2Fjpk%2Fcourse%2Flayout%2Fnewpage%2Findex.jsp%3FcourseId%3D46445\n' |
|||
f'scriptSessionId=\n' |
|||
f'windowName=\n') |
|||
self.dwr_session_id = re.search(r'[^"]*"\);', resp.text).group()[:-3] |
|||
|
|||
# get topics |
|||
|
|||
# it is needed to request some pages before getting the topic list |
|||
# maybe the server is judging which course the user is |
|||
requests.get('http://eol.ctbu.edu.cn/meol/jpk/course/layout/newpage/index.jsp?courseId=46445', |
|||
headers={'Upgrade-Insecure-Requests': '1', |
|||
'User-Agent': 'yomooc', |
|||
'Cookie': f'JSESSIONID={self.js_session_id}; ' |
|||
f'DWRSESSIONID={self.dwr_session_id}'}) |
|||
requests.get('http://eol.ctbu.edu.cn/meol/jpk/course/layout/newpage/default_demonstrate.jsp' |
|||
'?courseId=46445', |
|||
headers={'Upgrade-Insecure-Requests': '1', |
|||
'User-Agent': 'yomooc', |
|||
'Referer': 'http://eol.ctbu.edu.cn/meol/jpk/course/layout/newpage/index.jsp' |
|||
'?courseId=46445', |
|||
'Cookie': f'JSESSIONID={self.js_session_id}; ' |
|||
f'DWRSESSIONID={self.dwr_session_id}'}) |
|||
|
|||
def get_topic_list(self, page=1): |
|||
resp = requests.get(f'http://eol.ctbu.edu.cn/meol/common/faq/forum.jsp' |
|||
f'?viewtype=thread' |
|||
f'&forumid=102211' |
|||
f'&cateId=0' |
|||
f'&s_gotopage={page}', |
|||
headers={'Upgrade-Insecure-Requests': '1', |
|||
'User-Agent': 'yomooc', |
|||
'Referer': 'http://eol.ctbu.edu.cn/meol/common/faq/forum.jsp' |
|||
'?count=MODITIME' |
|||
'&forumid=102211', |
|||
'Cookie': f'JSESSIONID={self.js_session_id}; ' |
|||
f'DWRSESSIONID={self.dwr_session_id}'}) |
|||
topic_list_page = TopicListPage(resp.text) |
|||
self.topic_list.extend(topic_list_page.topics) |
|||
return self.topic_list |
|||
|
|||
def get_replies(self, thread_ids=None): |
|||
if thread_ids is None: |
|||
thread_ids = [topic['id'] for topic in self.topic_list] |
|||
for thread_id in thread_ids: |
|||
resp = requests.get(f'http://eol.ctbu.edu.cn/meol/common/faq/thread.jsp?threadid={thread_id}', |
|||
headers={'User-Agent': 'yomooc'}) |
|||
topic_page = TopicPage(resp.text) |
|||
self.replies[thread_id] = topic_page.replies |
|||
return self.replies |
|||
|
|||
def get_all_topics(self): |
|||
if self.dwr_session_id == '': |
|||
self.prepare() |
|||
resp = requests.get(f'http://eol.ctbu.edu.cn/meol/common/faq/forum.jsp' |
|||
f'?viewtype=thread' |
|||
f'&forumid=102211' |
|||
f'&cateId=0' |
|||
f'&s_gotopage={1}', |
|||
headers={'Upgrade-Insecure-Requests': '1', |
|||
'User-Agent': 'yomooc', |
|||
'Referer': 'http://eol.ctbu.edu.cn/meol/common/faq/forum.jsp' |
|||
'?count=MODITIME' |
|||
'&forumid=102211', |
|||
'Cookie': f'JSESSIONID={self.js_session_id}; ' |
|||
f'DWRSESSIONID={self.dwr_session_id}'}) |
|||
soup = BeautifulSoup(resp.text, 'html.parser') |
|||
topics_in_total = int(soup.select('div[class="page"] > b')[0].text) |
|||
for i in range(topics_in_total // 20): |
|||
self.get_topic_list(i + 1) |
|||
if topics_in_total % 20 != 0: |
|||
self.get_topic_list(topics_in_total // 20 + 1) |
@ -1,41 +0,0 @@ |
|||
""" |
|||
A client that simulates the desktop device to communicate with umooc server |
|||
""" |
|||
|
|||
import requests |
|||
import time |
|||
|
|||
|
|||
class LoginError(BaseException): |
|||
def __init__(self, ErrorInfo): |
|||
super().__init__(self) |
|||
self.errorinfo = ErrorInfo |
|||
|
|||
def __str__(self): |
|||
return self.errorinfo |
|||
|
|||
|
|||
class UmoocClient(object): |
|||
def __init__(self): |
|||
self.session = '' |
|||
|
|||
def login(self, username, password): |
|||
resp = requests.post('http://eol.ctbu.edu.cn/meol/loginCheck.do', |
|||
headers={'Cache-Control': 'max-age=0', |
|||
'Upgrade-Insecure-Requests': '1', |
|||
'Origin': 'http://eol.ctbu.edu.cn', |
|||
'Content-Type': 'application/x-www-form-urlencoded', |
|||
'User-Agent': 'yomooc', |
|||
'Referer': 'http://eol.ctbu.edu.cn/meol/common/security/login.jsp?enterLid=46445', |
|||
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-US;q=0.7' |
|||
}, |
|||
data=f'logintoken={str(time.time()).replace(".", "")[:-4]}' |
|||
f'&enterLid=46445' |
|||
f'&IPT_LOGINUSERNAME={username}' |
|||
f'&IPT_LOGINPASSWORD={password}', |
|||
allow_redirects=False, |
|||
proxies={'http': 'http://127.0.0.1:54385'}) |
|||
if resp.status_code == 302: |
|||
self.session = resp.cookies['JSESSIONID'] |
|||
else: |
|||
raise LoginError('Fail to get session') |
Loading…
Reference in new issue