爬取网站链接:https://lishi.tianqi.com/xiamen/202312.html
爬取了厦门市2023年一整年的天气数据,包罗最高温,最低温,天气,风力风向等
爬虫代码:
- import requests
- import pandas as pd
- import csv
- from pyecharts.charts import Bar,Timeline
- import pyecharts.options as opts
- import parsel
- f = open('天气数据.csv',mode='w',encoding='utf-8',newline='')
- csv_writer = csv.DictWriter(f,fieldnames=['日期','星期','最高温','最低温','天气','风向','风力'])
- csv_writer.writeheader()
- cookies = {
- 'cityPy': 'xiamen',
- 'cityPy_expire': '1721098187',
- 'UserId': '17204933865319972',
- 'Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2': '1720493387',
- 'HMACCOUNT': '4A9167DA75AB7059',
- 'Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2': '1720493402',
- 'Hm_lvt_7c50c7060f1f743bccf8c150a646e90a': '1720493523',
- 'Hm_lvt_30606b57e40fddacb2c26d2b789efbcb': '1720493529',
- 'Hm_lpvt_30606b57e40fddacb2c26d2b789efbcb': '1720493529',
- 'Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a': '1720493646',
- }
- headers = {
- 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
- 'accept-language': 'zh-CN,zh;q=0.9',
- 'cache-control': 'no-cache',
- # 'cookie': 'cityPy=xiamen; cityPy_expire=1721098187; UserId=17204933865319972; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1720493387; HMACCOUNT=4A9167DA75AB7059; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1720493402; Hm_lvt_7c50c7060f1f743bccf8c150a646e90a=1720493523; Hm_lvt_30606b57e40fddacb2c26d2b789efbcb=1720493529; Hm_lpvt_30606b57e40fddacb2c26d2b789efbcb=1720493529; Hm_lpvt_7c50c7060f1f743bccf8c150a646e90a=1720493646',
- 'pragma': 'no-cache',
- 'priority': 'u=0, i',
- 'referer': 'https://lishi.tianqi.com/xiamen/202302.html',
- 'sec-ch-ua': '"Not/A)Brand";v="8", "Chromium";v="126", "Google Chrome";v="126"',
- 'sec-ch-ua-mobile': '?0',
- 'sec-ch-ua-platform': '"Windows"',
- 'sec-fetch-dest': 'document',
- 'sec-fetch-mode': 'navigate',
- 'sec-fetch-site': 'same-origin',
- 'sec-fetch-user': '?1',
- 'upgrade-insecure-requests': '1',
- 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36',
- }
- for i in range(1,13):
- if i < 10:
- i = '0'+str(i)
- url = f'https://lishi.tianqi.com/xiamen/2023{i}.html'
- response = requests.get(url=url, cookies=cookies, headers=headers)
- response.encoding=response.apparent_encoding
- html = response.text
- selector = parsel.Selector(html)
- li_list = selector.css('div.tian_three ul.thrui li')
- for li in li_list:
- date_time = li.css('div.th200::text').get().split(' ')[0]
- xingqi_time = li.css('div.th200::text').get().split(' ')[1]
- all_list = li.css('div.th140::text').getall()
- max_temp = all_list[0]
- min_temp = all_list[1]
- weather = all_list[2]
- wind_orient = all_list[3].split(' ')[0]
- wind_rank = all_list[3].split(' ')[1]
- dit = {
- '日期': date_time,
- '星期': xingqi_time,
- '最高温': max_temp,
- '最低温': min_temp,
- '天气': weather,
- '风向': wind_orient,
- '风力': wind_rank,
- }
- csv_writer.writerow(dit)
- print(date_time,xingqi_time,max_temp,min_temp,weather,wind_orient,wind_rank)
复制代码 保存到csv文件:
接下来读取文件,对文件格式做调整,以进行绘图操作。
- df = pd.read_csv('天气数据.csv')
- print(df.info())
- df['日期'] = pd.to_datetime(df['日期'])
- df['month_'] = df['日期'].dt.to_period('M')
- new_data = df.groupby(['month_','天气']).size().reset_index()
- new_data.columns = ['month','weather','count'] #改变列名
- print(df.head)
- print(new_data)
- print(new_data[new_data['month']=='2023-01'][['weather','count']].sort_values(by='count',ascending=False).values.tolist()) # 条件筛选,获取一月份的天气和数量值,获取的是数据框格式
- #.sort_values(by='count',ascending=False).values.tolist() 这段代码是按升序排序,获取值并转变为双列表格式
- # [['多云', 14], ['小雨', 5], ['晴', 5], ['雾', 4], ['阴', 2], ['中雨', 1]]
复制代码 new_data数据格式如下:
现在进行绘图操作:
- # 实例化一个时间序列的对象
- timeline = Timeline()
- timeline.add_schema(play_interval=1000) #单位是毫秒
- for month in new_data['month'].unique():
- data = (
- new_data[new_data['month'] == month][['weather', 'count']].sort_values(by='count',
- ascending=False).values.tolist()
- )
- print(data)
- bar = (
- Bar()
- .add_xaxis([x[0] for x in data])
- .add_yaxis('',[x[1] for x in data])
- .reversal_axis() #坐标轴倒转
- .set_global_opts(
- title_opts=opts.TitleOpts(title='厦门市2023年每月天气变化')
- )
- .set_series_opts(
- label_opts=opts.LabelOpts(position='right')
- )
- )
- timeline.add(bar,f'{month}')
- timeline.render('天气轮播图.html')
复制代码
点击轮播图下方的按钮就可以进行播放了。
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作!更多信息从访问主页:qidao123.com:ToB企服之家,中国第一个企服评测及商务社交产业平台。 |