Python crawls the hot search data of Weibo and saves it!

Python crawls the hot search data of Weibo and saves it!

Mainly use requests and bf4 two libraries to save the obtained information under d://hotsearch.txt

import requests;import bs4mylist=[]r = requests.get(url='https://s.weibo.com/top/summary?Refer=top_hot&topnav=1&wvr=6',timeout=10)print(r.status_code) # Get the return status r.encoding=r.apparent_encodingdemo = r.textfrom bs4 import BeautifulSoupsoup = BeautifulSoup(demo,"html.parser")for link in soup.find('tbody') :hotnumber=``if isinstance(link, bs4.element.Tag):# print(link('td'))lis=link('td')hotrank=lis[1]('a')[0].string#Hot search ranking hotname=lis[1 ].find('span')#Hot search name if isinstance(hotname,bs4.element.Tag):hotnumber=hotname.string#Hot search index passmylist.append([lis[0].string,hotrank,hotnumber,lis [2].string])f=open("d://hotsearch.txt","w+")for line in mylist:f.write('%s %s %s %s\n'%(line[ 0],line[1],line[2],line[3]))

Knowledge point expansion: use python to crawl hot searches on Weibo and perform data analysis

Crawling Weibo Hot Search

import scheduleimport pandas as pdfrom datetime import datetimeimport requestsfrom bs4 import BeautifulSoup url = "https://s.weibo.com/top/summary?cate=realtimehot&sudaref=s.weibo.com&display=0&retcode=6102"get_info_dict = {}count = 0 def main():global url, get_info_dict, countget_info_list = []print("Crawling data~~~")html = requests.get(url).textsoup = BeautifulSoup(html,'lxml')for tr in soup. find_all(name='tr', class_=''):get_info = get_info_dict.copy()get_info['title'] = tr.find(class_='td-02').find(name='a'). texttry:get_info['num'] = eval(tr.find(class_='td-02').find(name='span').text)except AttributeError:get_info['num'] = Noneget_info['time' ] = datetime.now().strftime("%Y/%m/%d %H:%M")get_info_list.append(get_info)get_info_list = get_info_list[1:16]df = pd.DataFrame(get_info_list)if count == 0:df.to_csv('datas.csv', mode='a+', index=False, encoding='gbk ')count += 1else:df.to_csv('datas.csv', mode='a+', index=False, header=False, encoding='gbk') # Timing crawler schedule.every(1).minutes.do (main) while True:schedule.run_pending()

pyecharts data analysis

import pandas as pdfrom pyecharts import options as optsfrom pyecharts.charts import Bar, Timeline, Gridfrom pyecharts.globals import ThemeType, CurrentConfig df = pd.read_csv('datas.csv', encoding='gbk')print(df)t = Timeline (init_opts=opts.InitOpts(theme=ThemeType.MACARONS)) # Custom theme for i in range(int(df.shape[0]/15)):bar = (Bar().add_xaxis(list(df['title '][i*15: i*15+15][::-1])) # x-axis data. add_yaxis('num', list(df['num'][i*15: i*15+15 ][::-1])) # y-axis data.reversal_axis() # flip.set_global_opts( # Global configuration item title_opts=opts.TitleOpts( # Title configuration item title=f"{list(df['time']) [i * 15]}",pos_right="5%", pos_bottom="15%",title_textstyle_opts=opts.TextStyleOpts(font_family='KaiTi', font_size=24, color='#FF1493')),xaxis_opts=opts .AxisOpts( # x-axis configuration item splitline_opts=opts.SplitLineOpts(is_show=True),),yaxis_opts=opts.AxisOpts( # y-axis configuration item splitline_opts=opts.SplitLineOpts(is_show=True),axislabel_opts=opts.LabelOpts(color='#DC143C'))). #set_series_opts( Series configuration items label_opts=opts.LabelOpts( # Label configuration position="right", color='#9400D3'))) grid = (Grid().add(bar, grid_opts=opts.GridOpts(pos_left="24%" )))t.add(grid, "")t.add_schema(play_interval=1000, # Carousel speed is_timeline_show=False, # Whether to display the timeline component is_auto_play=True, # Whether to automatically play) t.render('Time Carousel) Figure.html')add(grid, "")t.add_schema(play_interval=1000, # Carousel speed is_timeline_show=False, # Whether to display the timeline component is_auto_play=True, # Whether to automatically play) t.render('Time Carousel Diagram.html')add(grid, "")t.add_schema(play_interval=1000, # Carousel speed is_timeline_show=False, # Whether to display the timeline component is_auto_play=True, # Whether to automatically play) t.render('Time Carousel Diagram.html')

So far this article on how to use python to crawl and save Weibo hot search data is introduced here!

Reference: https://cloud.tencent.com/developer/article/1794698 python crawls Weibo hot search data and saves it! -Cloud + Community-Tencent Cloud