您好,欢迎访问一九零五行业门户网

Python爬虫-抓取手机APP数据

抓取超级课程表话题数据。
#!/usr/local/bin/python2.7 # -*- coding: utf8 -*- 超级课程表话题抓取 import urllib2 from cookielib import cookiejar import json ''' 读json数据 ''' def fetch_data(json_data): data = json_data['data'] timestamplong = data['timestamplong'] messagebo = data['messagebos'] topiclist = [] for each in messagebo: topicdict = {} if each.get('content', false): topicdict['content'] = each['content'] topicdict['schoolname'] = each['schoolname'] topicdict['messageid'] = each['messageid'] topicdict['gender'] = each['studentbo']['gender'] topicdict['time'] = each['issuetime'] print each['schoolname'],each['content'] topiclist.append(topicdict) return timestamplong, topiclist ''' 加载更多 ''' def load(timestamp, headers, url): headers['content-length'] = '159' loaddata = 'timestamp=%s&phonebrand=meizu&platform=1&gendertype=-1&topicid=19&phoneversion=16&selecttype=3&channel=mxmarket&phonemodel=m040&versionnumber=7.2.1&' % timestamp req = urllib2.request(url, loaddata, headers) loadresult = opener.open(req).read() loginstatus = json.loads(loadresult).get('status', false) if loginstatus == 1: print 'load successful!' timestamp, topiclist = fetch_data(json.loads(loadresult)) load(timestamp, headers, url) else: print 'load fail' print loadresult return false loginurl = 'http://120.55.151.61/v2/studentskip/logincheckv4.action' topicurl = 'http://120.55.151.61/v2/treehole/message/getmessagebytopicidv3.action' headers = { 'content-type': 'application/x-www-form-urlencoded; charset=utf-8', 'user-agent': 'dalvik/1.6.0 (linux; u; android 4.1.1; m040 build/jro03h)', 'host': '120.55.151.61', 'connection': 'keep-alive', 'accept-encoding': 'gzip', 'content-length': '207', } ''' ---登录部分--- ''' logindata = 'phonebrand=meizu&platform=1&devicecode=868033014919494&account=fcf030e1f2f6341c1c93be5bbc422a3d&phoneversion=16&password=a55b48bb75c79200379d82a18c5f47d6&channel=mxmarket&phonemodel=m040&versionnumber=7.2.1&' cookiejar = cookiejar() opener = urllib2.build_opener(urllib2.httpcookieprocessor(cookiejar)) req = urllib2.request(loginurl, logindata, headers) loginresult = opener.open(req).read() loginstatus = json.loads(loginresult).get('data', false) if loginresult: print 'login successful!' else: print 'login fail' print loginresult ''' ---获取话题--- ''' topicdata = 'timestamp=0&phonebrand=meizu&platform=1&gendertype=-1&topicid=19&phoneversion=16&selecttype=3&channel=mxmarket&phonemodel=m040&versionnumber=7.2.1&' headers['content-length'] = '147' topicrequest = urllib2.request(topicurl, topicdata, headers) topichtml = opener.open(topicrequest).read() topicjson = json.loads(topichtml) topicstatus = topicjson.get('status', false) print topicjson if topicstatus == 1: print 'fetch topic success!' timestamp, topiclist = fetch_data(topicjson) data = load(timestamp, headers, topicurl) if data: timestamp, topiclist = fetch_data(data)
其它类似信息

推荐信息