Python爬虫之英雄联盟

Python 木语

文章目录

英雄联盟Python爬虫

1.英雄爬取

2.JS获取所有英雄信息

3.爬取比赛数据

第一个LOL网页爬取

第二个LOL网页数据爬取

第三个LOL网页数据爬取

4.多线程爬取LOL英雄皮肤图片

英雄联盟Python爬虫

英雄主界面qq 游戏资料-英雄联盟官方网站-腾讯游戏

1.英雄爬取

游戏资料-英雄联盟官方网站-腾讯游戏

get方法获取指定英雄信息。

游戏资料-英雄联盟官方网站-腾讯游戏

id=xxx

2.JS获取所有英雄信息

import json

import requests

from faker import Factory

from bs4 import BeautifulSoup

f = Factory.create()

def get_all_heros():

url = ' https:// game.gtimg.cn/images/lo l/act/img/js/heroList/hero_list.js '

headers = {

'user-agent': f.user_agent()

}

r = requests.get(url, headers=headers)

r.encoding = r.apparent_encoding

c = r.text

l = json.loads(c)['hero']

for i in l[:50]:

print("ID: {0} 姓名:{1} 别名:{2}".format(i['heroId'], i['name'], i['alias']))

if __name__ == '__main__':

get_all_heros()

效果：

3.爬取比赛数据

第一个LOL网页爬取

lol数据_lol比赛数据_lol战队数据_lol选手数据_玩加赛事-玩加电竞

用到了csrf-token，post请求需要携带set-cookies 中的csrf-token即可。

import json

import time

import requests

from faker import Factory

from urllib import parse

f = Factory.create()

def get_token():

url = ' lol数据_lol比赛数据_lol战队数据_lol选手数据_玩加赛事-玩加电竞 '

headers = {

'user-agent': f.user_agent(),

'Referer': ' lol数据_lol比赛数据_lol战队数据_lol选手数据_玩加赛事-玩加电竞 ',

'Host': ' 玩加电竞-玩加赛事-专注服务电竞爱好者的游戏社区 ',

}

r = requests.get(url, headers=headers, allow_redirects=False)

r.encoding = r.apparent_encoding

c = r.cookies

r.close()

myCookies = c.get_dict()

# print(myCookies)

return str(int(c.get('wanplus_csrf')[9:]) + int(16777216)), myCookies

def get_competition():

url = ' 操作失败-玩加电竞 '

token, myCookies = get_token()

headers = {

'user-agent': f.user_agent(),

'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',

'Host': ' 玩加电竞-玩加赛事-专注服务电竞爱好者的游戏社区 ',

'Origin': ' 玩加电竞-玩加赛事-专注服务电竞爱好者的游戏社区 ',

'Referer': ' lol数据_lol比赛数据_lol战队数据_lol选手数据_玩加赛事-玩加电竞 ',

'X-CSRF-Token': token,

'X-Requested-With': 'XMLHttpRequest',

}

formdata = {

'_gtk': token,

'draw': '1',

'columns[0][data]': 'order',

'columns[0][name]': '',

'columns[0][searchable]': 'true',

'columns[0][orderable]': 'false',

'columns[0][search][value]': '',

'columns[0][search][regex]': 'false',

'columns[1][data]': 'playername',

'columns[1][name]': '',

'columns[1][searchable]': 'true',

'columns[1][orderable]': 'false',

'columns[1][search][value]': '',

'columns[1][search][regex]': 'false',

'columns[2][data]': 'teamname',

'columns[2][name]': '',

'columns[2][searchable]': 'true',

'columns[2][orderable]': 'false',

'columns[2][search][value]': '',

'columns[2][search][regex]': 'false',

'columns[3][data]': 'meta',

'columns[3][name]': '',

'columns[3][searchable]': 'true',

'columns[3][orderable]': 'false',

'columns[3][search][value]': '',

'columns[3][search][regex]': 'false',

'columns[4][data]': 'appearedTimes',

'columns[4][name]': '',

'columns[4][searchable]': 'true',

'columns[4][orderable]': 'true',

'columns[4][search][value]': '',

'columns[4][search][regex]': 'false',

'columns[5][data]': 'kda',

'columns[5][name]': '',

'columns[5][searchable]': 'true',

'columns[5][orderable]': 'true',

'columns[5][search][value]': '',

'columns[5][search][regex]': 'false',

'columns[6][data]': 'attendrate',

'columns[6][name]': '',

'columns[6][searchable]': 'true',

'columns[6][orderable]': 'true',

'columns[6][search][value]': '',

'columns[6][search][regex]': 'false',

'columns[7][data]': 'killsPergame',

'columns[7][name]': '',

'columns[7][searchable]': 'true',

'columns[7][orderable]': 'true',

'columns[7][search][value]': '',

'columns[7][search][regex]': 'false',

'columns[8][data]': 'mostkills',

'columns[8][name]': '',

'columns[8][searchable]': 'true',

'columns[8][orderable]': 'true',

'columns[8][search][value]': '',

'columns[8][search][regex]': 'false',

'columns[9][data]': 'deathsPergame',

'columns[9][name]': '',

'columns[9][searchable]': 'true',

'columns[9][orderable]': 'true',

'columns[9][search][value]': '',

'columns[9][search][regex]': 'false',

'columns[10][data]': 'mostdeaths',

'columns[10][name]': '',

'columns[10][searchable]': 'true',

'columns[10][orderable]': 'true',

'columns[10][search][value]': '',

'columns[10][search][regex]': 'false',

'columns[11][data]': 'assistsPergame',

'columns[11][name]': '',

'columns[11][searchable]': 'true',

'columns[11][orderable]': 'true',

'columns[11][search][value]': '',

'columns[11][search][regex]': 'false',

'columns[12][data]': 'mostassists',

'columns[12][name]': '',

'columns[12][searchable]': 'true',

'columns[12][orderable]': 'true',

'columns[12][search][value]': '',

'columns[12][search][regex]': 'false',

'columns[13][data]': 'goldsPermin',

'columns[13][name]': '',

'columns[13][searchable]': 'true',

'columns[13][orderable]': 'true',

'columns[13][search][value]': '',

'columns[13][search][regex]': 'false',

'columns[14][data]': 'lasthitPermin',

'columns[14][name]': '',

'columns[14][searchable]': 'true',

'columns[14][orderable]': 'true',

'columns[14][search][value]': '',

'columns[14][search][regex]': 'false',

'columns[15][data]': 'damagetoheroPermin',

'columns[15][name]': '',

'columns[15][searchable]': 'true',

'columns[15][orderable]': 'true',

'columns[15][search][value]': '',

'columns[15][search][regex]': 'false',

'columns[16][data]': 'damagetoheroPercent',

'columns[16][name]': '',

'columns[16][searchable]': 'true',

'columns[16][orderable]': 'true',

'columns[16][search][value]': '',

'columns[16][search][regex]': 'false',

'columns[17][data]': 'damagetakenPermin',

'columns[17][name]': '',

'columns[17][searchable]': 'true',

'columns[17][orderable]': 'true',

'columns[17][search][value]': '',

'columns[17][search][regex]': 'false',

'columns[18][data]': 'damagetakenPercent',

'columns[18][name]': '',

'columns[18][searchable]': 'true',

'columns[18][orderable]': 'true',

'columns[18][search][value]': '',

'columns[18][search][regex]': 'false',

'columns[19][data]': 'wardsplacedPermin',

'columns[19][name]': '',

'columns[19][searchable]': 'true',

'columns[19][orderable]': 'true',

'columns[19][search][value]': '',

'columns[19][search][regex]': 'false',

'columns[20][data]': 'wardskilledPermin',

'columns[20][name]': '',

'columns[20][searchable]': 'true',

'columns[20][orderable]': 'true',

'columns[20][search][value]': '',

'columns[20][search][regex]': 'false',

'order[0][column]': '4',

'order[0][dir]': 'desc',

'start': '0',

'length': '20',

'search[value]': '',

'search[regex]': 'false',

'area': '',

'eid': '1065',

'type': 'player',

'gametype': '2',

'filter': '{"team":{},"player":{},"meta":{}}',

}

# 字典转换为 k1 = v1 & k2 = v2

data = parse.urlencode(formdata)

# print(data)

r = requests.post (url, cookies=myCookies, data=data, headers=headers, allow_redirects=False)

r.encoding = r.apparent_encoding

c = r.text

# print("11111内容如下:----------------------------------------")

if len(c) < 100:

print('获取失败,重新获取!')

return False

print('获取成功!')

l = json.loads(c)['data']

for i in l[:20]:

print('队伍编号: {0} 队伍名: {1} 玩家名称: {2}'.format(['teamid'], i['teamname'], i['playername']))

return True

def cookie_to_dic(mycookie):

dic = {}

for i in mycookie.split('; '):

dic[i.split('=')[0]] = i.split('=')[1]

return dic

if __name__ == '__main__':

while 1:

ok = get_competition()

if ok is True:

break

# test()

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

第二个LOL网页数据爬取

首页

没有任何反爬和csrf-token认证：

from faker import Factory

import requests

import json

f = Factory.create()

def fun():

url = ' http:// lol.admin.pentaq.com/ap i/tournament_team_data?tour=29&patch= '

headers = {

'user-agent': f.user_agent()

}

r = requests.get(url, headers=headers)

r.encoding = r.apparent_encoding

c = r.text

r.close()

l = json.loads(c)['data']['teams_data']

for i in l[:20]:

print("队伍名称: {0} 队伍ID:{1} win:{2}".format(i['team_full_name'], i['team_id'], i['win']))

if __name__ == '__main__':

fun()

第三个LOL网页数据爬取

Champions - League of Legends

采用BeautifulSoup 即可。

from faker import Factory

import requests

from bs4 import BeautifulSoup

f = Factory.create()

def fun():

url = ' Champions - League of Legends '

headers = {

'user-agent': f.user_agent(),

'Accept-Language': "zh-CN,zh;q=0.9,en;q=0.8'"

}

r = requests.get(url, headers=headers)

r.encoding = r.apparent_encoding

if r.status_code != 200:

return False

c = r.text

r.close()

# print(c)

if len(c) < 10000:

return False

html = BeautifulSoup(c, 'html.parser')

l = html.find('tbody', class_='tabItem champion-trend-tier-TOP').find_all('tr')

for x in l[:5]:

a = x.find_all('td')

tmp = a[3]

b = tmp.find_all('div')

name = b[0].text

pos = b[1].text.replace('\t','').replace('\n','')

print('rank: {0} name: {1} pos:{2} 胜率:{3} 登场率:{4}'.format(a[0].text, name, pos, a[4].text, a[5].text))

return True

# for c in l[:20]:

# a = c.find_all('td')

# tmp = a[3]

# b = tmp.find_all('div')

# name = b[0].text

# pos = b[1].text

# print('rank: {0] name: {1} pos:{2} 胜率:{3} 登场率:{4}'.format(a[0].text,name,pos,a[4].text,a[5].text))

if __name__ == '__main__':

while True:

ok = fun()

if ok:

break

4.多线程爬取LOL英雄皮肤图片

1.获取对应英雄url 列表，函数get_url_list()

2.下载对应的图片保存到文件夹download()

3.main()开启多线程执行爬取任务

import requests

import json

import os

from faker import Factory

from multiprocessing.dummy import Pool as ThreadPool

import time

f = Factory.create()

headers = {

'user-agent': f.user_agent()

}

def get_url_list():

url = ' https:// game.gtimg.cn/images/lo l/act/img/js/heroList/hero_list.js '

r = requests.get(url, headers=headers)

r.encoding = r.apparent_encoding

c = r.text

Heros = json.loads(c)["hero"] # 156个hero信息

idList = []

for hero in Heros:

hero_id = hero["heroId"]

idList.append(hero_id)

# print(idList)

def spider(url):

r = requests.get(url, headers=headers)

r.encoding = r.apparent_encoding

c = r.text

r.close()

res_dict = json.loads(c)

skins = res_dict["skins"] # 15个hero信息

for index, hero in enumerate(skins): # 这里使用到enumerate获取下标,以便文件图片命名;

item = {} # 字典对象

item['name'] = hero["heroName"]

item['skin_name'] = hero["name"]

if hero["mainImg"] == '':

continue

item['imgLink'] = hero["mainImg"]

# print(item)

download(index + 1, item)

def download(index, contdict):

name = contdict['name']

path = "皮肤/" + name

if not os.path.exists(path):

os.makedirs(path)

content = requests.get(contdict['imgLink'], headers=headers).content

with open('./皮肤/' + name + '/' + contdict['skin_name'] + str(index) + '.jpg', 'wb') as f:

f.write(content)

def main():

start = time.time()

pool = ThreadPool(6)

page = []

for i in range(1, 11):

newpage = ' https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js '.format(i)

print(newpage)

page.append(newpage)

result = pool.map (spider, page)

pool.close()

pool.join()

end = time.time()

print('用时:', end-start)

if __name__ == '__main__':

main()

最后需要资源的朋友可以+

：

————————————————

原文链接：英雄联盟Python爬虫_Harris-H的博客-CSDN博客

发布于 2022-01-04 20:27

Python

英雄联盟（LoL）

Python 入门