python教程栏目介绍爬取json数据实例
相关免费学习推荐:python教程(视频)
本文中以爬取其中的ai流转率数据为例。
该地址返回的响应内容为json类型,其中红框标记的项即为ai流转率值:
实现代码如下:
import requestsimport jsonimport csv # 爬虫地址url = 'https://databank.yushanfang.com/api/ecapi?path=/databank/crowdfulllink/flowinfo&fromcrowdid=3312&beginthedate=201810{}&endthedate=201810{}&tocrowdidlist[0]=3312&tocrowdidlist[1]=3313&tocrowdidlist[2]=3314&tocrowdidlist[3]=3315' # 携带cookie进行访问headers = {'host':'databank.yushanfang.com','referer':'https://databank.yushanfang.com/','connection':'keep-alive','user-agent':'mozilla/5.0 (windows nt 10.0; wow64) applewebkit/537.36 (khtml, like gecko) chrome/63.0.3239.84 safari/537.36','cookie':'_tb_token_=inkdejldm3mgvkjhsfdw; bs_n_lang=zh_cn; cna=aaj1evii7x0cato9ktkvjzgs; ck2=072de851f1c02d5c7bac555f64c5c66d; c_token=c74594b486f8de731e2608cb9526a3f2; an=5ywo5qoj5pe25luj5a6y5pa55pex6iiw5bqxonpmea%3d%3d; lg=true; sg=\"=19\"; lvc=sahojs49pcqhqq%3d%3d; isg=bpt0md7de_ic5ie3oa85rxamxblk3uqjmmin6o5vjh8c-zrdtt7arxb3fxgeavap',} rows = []for n in range(20, 31): row = [] row.append(n) for m in range (21, 32): if m < n + 1: row.append("") else: # 格式化请求地址,更换请求参数 requrl = url.format(n, m) # 打印本次请求地址 print(url) # 发送请求,获取响应结果 response = requests.get(url=requrl, headers=headers, verify=false) text = response.text # 打印本次请求响应内容 print(text) # 将响应内容转换为json对象 jsonobj = json.loads(text) # 从json对象获取想要的内容 tocntpercent = jsonobj['data']['intercrowdinfo'][1]['tocntpercent'] # 生成行数据 row.append(str(tocntpercent)+"%") # 保存行数据 rows.append(row) # 生成excel表头header = ['ai流转率', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31'] # 将表头数据和爬虫数据导出到excel文件with open('d:\\res\\pachong\\tmall.csv', 'w', encoding='gb18030') as f : f_csv = csv.writer(f) f_csv.writerow(header) f_csv.writerows(rows)
import csvimport jsonimport sslimport urllib.request # 爬虫地址url = 'https://databank.yushanfang.com/api/ecapi?path=/databank/crowdfulllink/flowinfo&fromcrowdid=3312&beginthedate=201810{}&endthedate=201810{}&tocrowdidlist[0]=3312&tocrowdidlist[1]=3313&tocrowdidlist[2]=3314&tocrowdidlist[3]=3315' # 不校验证书ssl._create_default_https_context = ssl._create_unverified_context # 携带cookie进行访问headers = {'host':'databank.yushanfang.com','referer':'https://databank.yushanfang.com/','connection':'keep-alive','user-agent':'mozilla/5.0 (windows nt 10.0; wow64) applewebkit/537.36 (khtml, like gecko) chrome/63.0.3239.84 safari/537.36','cookie':'_tb_token_=inkdejldm3mgvkjhsfdw; bs_n_lang=zh_cn; cna=aaj1evii7x0cato9ktkvjzgs; ck2=072de851f1c02d5c7bac555f64c5c66d; c_token=c74594b486f8de731e2608cb9526a3f2; an=5ywo5qoj5pe25luj5a6y5pa55pex6iiw5bqxonpmea%3d%3d; lg=true; sg=\"=19\"; lvc=sahojs49pcqhqq%3d%3d; isg=bpt0md7de_ic5ie3oa85rxamxblk3uqjmmin6o5vjh8c-zrdtt7arxb3fxgeavap',} rows = []n = 20while n <31: row = [] row.append(n) m =21 while m <32: if m < n + 1: row.append("") else: # 格式化请求地址,更换请求参数 requrl = url.format(n, m) # 打印本次请求地址 print(requrl) # 发送请求,获取响应结果 request = urllib.request.request(url=requrl, headers=headers) response = urllib.request.urlopen(request) text = response.read().decode('utf8') # 打印本次请求响应内容 print(text) # 将响应内容转换为json对象 jsonobj = json.loads(text) # 从json对象获取想要的内容 tocntpercent = jsonobj['data']['intercrowdinfo'][1]['tocntpercent'] # 生成行数据 row.append(str(tocntpercent) + "%") m = m+1 rows.append(row) n = n+1 # 生成excel表头header = ['ai流转率', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31'] # 将表头数据和爬虫数据导出到excel文件with open('d:\\res\\pachong\\tmall.csv', 'w', encoding='gb18030') as f : f_csv = csv.writer(f) f_csv.writerow(header) f_csv.writerows(rows)
导出内容如下:
到此这篇关于使用python爬取json数据的文章就介绍到这了。
相关免费学习推荐:php编程(视频)
以上就是python爬取json数据的示例的详细内容。