學習紀錄-爬蟲應用爬yahoo並輸出csv

Carl
7 min readDec 19, 2022

將tkinter與crawler應用再一起 紀錄學習。

    website = 'https://tw.news.yahoo.com/'
headers = {
'User-Agent':"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1"
}
try:
resp = requests.get(website,headers = headers)
except MissingSchema:
print(website)
sys.exit(0)
# return
soup = BeautifulSoup(resp.text,'html.parser')
# print(soup)
if resp.status_code != 200: # check status code 200 = ok
print('Invalid url:', resp.url,resp.status_code)
sys.exit(0)
# return
else:

titles = soup.find_all("div" , class_ = 'Z(10) Pos(a) Lh(32px) B(10px) Mx(14px) Fz(25px) Fz(20px)--sm Fw(b)')

titles2 = soup.find_all("a" , class_ = 'D(ib) Ov(h) Whs(nw) C($c-fuji-grey-l) C($c-fuji-blue-1-c):h Td(n) Fz(16px) Tov(e) Fw(700)')
# print(titles)
# for i in titles:
# print('焦點標題:',i.text)
# print('焦點連結:',i.find('a').get('href'))
# for i in titles2:
# print('其他標題:',i.text)
# print('其他連結:',i.get('href'))

window = tk.Tk()
window.title('多功能的驗證程式 Version:' + python_version)
window.geometry('800x600+550+250')
window.resizable(False, False)
window.iconbitmap('./pic/751000-wheel-chair.ico')

#===UI show
def onClickbtn(link):
webbrowser.open(link)
def copyUrl(url):
pc.copy(url)
def output_csv():
csv_name = 'yahoo_news.csv'
try: #檔案不存在 WinError 2 沒有許可權刪除 WinError 5
os.remove(csv_name)
print("File is deleted successfully")
except OSError as e:
print(e)
else: # w a 若沒有則新增檔案 w從頭開始寫 a 接著後面寫
with open(csv_name, 'w', newline='',encoding='utf-8-sig') as file:
fieldnames = ['標題','連結']
writer = csv.DictWriter(file, fieldnames = fieldnames)
writer.writeheader()
write1 = {
'標題':titles[0].text,
'連結':titles[0].find('a').get('href')
}
writer.writerow(write1)
for i in titles2:
with open(csv_name, 'a', newline='',encoding='utf-8-sig') as file:
# 定義欄位DS
fieldnames = ['標題','連結']
writer = csv.DictWriter(file, fieldnames = fieldnames)
write2 = {
'標題':i.text,
'連結':i.get('href')
}
writer.writerow(write2)

print('export csv file complete\ntotal write crawler count:',len(titles2)+1)
if 1:
yahoo_today = time.localtime(int(time.time()))
ftoday = time.strftime('%Y %m %d',yahoo_today)
labdate = tk.Label(window,text='這是'+ftoday + ' YAHOO 的焦點新聞')
labdate.config(bg='lightyellow',)
labdate.pack()
lab = tk.Label(window,text='標題:'+titles[0].text)
lab.pack()

text = tk.Text(window,height=3)
main_link = titles[0].find('a').get('href')
text.insert(tk.END,main_link)
text.pack()
btn2 = tk.Button(window,text='copy',command = copyUrl(main_link))
btn2.pack()
for i in titles2[0:3]:
# btn = tk.Button(window,text=i.text,command = onClickbtn(i.get('href')))
# btn.pack()

lab = tk.Label(window,text='標題:'+i.text)
lab.pack()

text = tk.Text(window,height=3)
other_link = i.get('href')
text.insert(tk.END,other_link)
text.pack()
btn2 = tk.Button(window,text='copy',command=copyUrl(other_link))
btn2.pack()
else: # 單一連結驗證
btn = tk.Button(window,text=titles2[0].text)
btn.pack()
# lab = tk.Label(window,text=titles2[0].get('href'))
# lab.pack()
text = tk.Text(window,height=3)
text.insert(tk.END,titles2[0].get('href'))
text.pack()
btn2 = tk.Button(window,text='copy',command=copyUrl(titles2[0].get('href')))
btn2.pack()
#===main window show
export_csv_btn = tk.Button(window,text='export CSV file' , command=output_csv)
export_csv_btn.pack()

# MENU環境設置
menu_config_setting()
window.mainloop()
tkinter顯示焦點第一則新聞及其他新聞
輸出標題及連結

--

--

Carl

「Cancell 抗癌好夥伴」共同創辦人,學習網路爬蟲,資料整合,Python & C語言。大部分為個人學習紀錄,如有錯誤,請多包涵。