爬虫
1.post
1.请求头 hands = { "User-Agent": "###"
}
2.URL链接 URL = 'https://fanyi.baidu.com/sug'
3. data目录date = {
"kw" : "ed"
}
//这个data要转换成type格式
date = urllib.parse.urlencode(date).encode('utf-8')
//向服务器请求
requset = urllib.request.Requset(url=URL,data=date,headers= heads)
response = urllib.requset.uropen(request)
//内容
content =content = response.read().decode('utf-8')
//把内容输出出来
obj = json.loads(content)
print(obj)
2.get
URL = ('https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&'
'start=0&limit=20')
heads = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0",
'Cookie':
'll="118088"; bid=XT-58m4C_4o; ap_v=0,6.0'
}
def requse_page(page):
url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&'
data = {
"start" : (page - 1) * 20,
"limit" : 20
}
//data转换
data = urllib.parse.urlencode(data)
URL = url + data
heads = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0",
'Cookie':
'll="118088"; bid=XT-58m4C_4o; ap_v=0,6.0'
}
request = urllib.request.Request(url=URL, headers=heads)
return request
def get_content(requset):
request = urllib.request.urlopen(requset)
content = request.read().decode('utf-8')
return content
def download_file(rcontent):
with open('douban' + '.json','w' , encoding='utf-8') as f:
f.write(content)
//运行
start_page = int(input('输入起始页数'))
end_page = int(input('输入结束页数'))
for i in range(start_page, end_page + 1):
request = requse_page(i)
content = get_content(request)
download_file(content)