日誌2021-06-30 03:59
自動化小屋文章GP數圖表分析器作者:熾炎之翼
import requests from bs4 import BeautifulSoup import matplotlib.pyplot as plt GPs = [] send_headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Connection": "keep-alive", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8"} page = 1 final_page = 1 artNum = 0 owner = input('Please enter the ID : ') URL = 'https://home.gamer.com.tw/creation.php?page=1&owner='+owner+'&v=3&t=0' request = requests.get(URL, headers=send_headers) html = request.content bsObj = BeautifulSoup(html, "html.parser") shouter = bsObj.findAll('a') for p in shouter: if p.text.isdigit() and ('TS1' not in str(p)): if int(p.text) > final_page: final_page = int(p.text) while page <= final_page: URL = 'https://home.gamer.com.tw/creation.php?page=' + str(page)+'&owner='+owner+'&v=3&t=0' request = requests.get(URL, headers=send_headers) html = request.content bsObj = BeautifulSoup(html, "html.parser") shouter2 = bsObj.findAll('span', {'class': 'BC4'}) for item in shouter2: artNum += 1 gp = item.text[:-3] GPs.append(gp) print('Analyzing {}/{} ......'.format(page, final_page)) page += 1 GPs.reverse() GPs = list(map(int, GPs)) x = [number for number in range(1, artNum+1)] y = GPs plt.figure(figsize=(60, 20)) # *100 plt.title('GP_analysis for '+owner) plt.ylabel('GP') plt.xlabel('Number of Articles') plt.yticks([10, 20, 50, max(GPs)]) plt.axhline(10, color="Red") plt.axhline(20, color="Blue") plt.axhline(50, color="Orange") for a, b in zip(x, y): plt.text(a, b+0.5, b, ha='center', va='bottom', fontsize=6) plt.plot(x, y) plt.show() |
import requests from bs4 import BeautifulSoup from matplotlib.font_manager import FontProperties import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei'] plt.rcParams['axes.unicode_minus'] = False GPs = [] names = [] send_headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36", "Connection": "keep-alive", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8", "Accept-Language": "zh-CN,zh;q=0.8"} page = 1 final_page = 1 artNum = 0 owner = input('Please enter the ID : ') URL = 'https://home.gamer.com.tw/creation.php?page=1&owner='+owner+'&v=3&t=0' request = requests.get(URL, headers=send_headers) html = request.content bsObj = BeautifulSoup(html, "html.parser") shouter = bsObj.findAll('a') for p in shouter: if p.text.isdigit() and ('TS1' not in str(p)): if int(p.text) > final_page: final_page = int(p.text) while page <= final_page: URL = 'https://home.gamer.com.tw/creation.php?page=' + str(page)+'&owner='+owner+'&v=3&t=0' request = requests.get(URL, headers=send_headers) html = request.content bsObj = BeautifulSoup(html, "html.parser") shouter1 = bsObj.findAll('a', {'class': 'TS1'}) for item in shouter1: names.append(item) shouter2 = bsObj.findAll('span', {'class': 'BC4'}) for item in shouter2: artNum += 1 gp = item.text[:-3] GPs.append(gp) print('Analyzing {}/{} ......'.format(page, final_page)) page += 1 names.reverse() GPs.reverse() GPs = list(map(int, GPs)) x = [number for number in range(1, artNum+1)] y = GPs plt.figure(figsize=(60, 20)) # *100 plt.title('GP_analysis for '+owner) plt.ylabel('GP') plt.xlabel('Titles') plt.xticks(x, names, rotation='vertical') plt.yticks([10, 20, 50, max(GPs)]) plt.axhline(10, color="Red") plt.axhline(20, color="Blue") plt.axhline(50, color="Orange") for a, b in zip(x, y): plt.text(a, b+0.5, b, ha='center', va='bottom', fontsize=6) plt.plot(x, y) plt.show() |
2021-06-30 08:48熾炎之翼:誰給GP網頁是用JS動態生成 所以用selenium 套件一樣可以實現
2021-06-30 08:48熾炎之翼:其實程式部分還好 就是一直在鑽牛角尖的感覺
2021-06-30 08:48熾炎之翼:感謝感謝
2021-06-30 08:49熾炎之翼:爬蟲不難又好玩
很快樂
2021-06-30 08:49熾炎之翼:沒關係 知道程式會幹嘛就可以了
2021-06-30 08:49熾炎之翼:不佬 程式的部分真的不難
2021-06-30 08:49熾炎之翼:謝謝
2021-06-30 08:50熾炎之翼:還行還行
2021-06-30 08:51熾炎之翼:對 這個問題網路上查就一堆
2021-06-30 08:52熾炎之翼:我是沒有一個個去嘗試 直接用了個人喜好的微軟正黑體XDDD
2021-06-30 08:53熾炎之翼:對 但是我這邊只有用到plt部分的功能 我覺得這樣說明可能比較直接一點
2021-06-30 08:54熾炎之翼:其實不一定 有些會檔有些不會檔 雖然的確大多網站都會檔就是了 不過既然分享文章就特別寫出來讓大家知道而已XD
2021-06-30 08:57熾炎之翼:slelenium我用過
我之前寫過巴哈自動發文機器人
他的確就是完全模擬正常鍵鼠
但是速度上卻因此被犧牲掉很多
我想如果像我600多篇文
那一篇一篇點進去假設搞個一秒鐘
(載入網頁、滑到底下balabala,回到上一頁)
這樣就要搞個10分鐘欸wwwwwww
2021-06-30 08:58熾炎之翼:你剛剛講的沒有錯喔
2021-06-30 08:58熾炎之翼:你繼續當你的醫生啦XD
2021-06-30 09:02熾炎之翼:Python最大最大的優點就是多到不行的酷function了XDDDD
2021-06-30 09:03熾炎之翼:感謝支持!
2021-06-30 09:03熾炎之翼:謝謝讓我舉例w
2021-06-30 09:03熾炎之翼:過獎了
2021-06-30 09:27熾炎之翼:其實跟資安是沒什麼關係
因為爬蟲能爬到的資料一般使用者按F12也能找到
最大的危害是對伺服器流量的占用
如果程式不斷對伺服器端發出request
很多人一起積少成多就會演變成小型的DDOS攻擊了w
2021-06-30 10:16熾炎之翼:否否
2021-06-30 10:17熾炎之翼:其實確實有機會被ban
我自己的巴哈大樓樓層爬蟲是有設0.3秒的sleep
還是要注意一下
2021-06-30 10:36熾炎之翼:加油 慢慢研究吧
2021-06-30 10:54熾炎之翼:不敢當
2021-06-30 10:54熾炎之翼:爬蟲真的很有趣
2021-06-30 11:07熾炎之翼:感謝GP~
2021-06-30 11:07熾炎之翼:還可以 這次的邏輯不難
2021-06-30 11:32熾炎之翼:只要知道程式的功能就好了ㄛ
2021-06-30 11:38熾炎之翼:這是真的 之前用過的感想就是很慢
之後可以來試試放個帳密
2021-06-30 11:39熾炎之翼:你媽…死了…
2021-06-30 12:06熾炎之翼:還行
2021-06-30 13:10熾炎之翼:我全部做完是2點 打完文章是4點 起床是9點半
2021-06-30 14:55熾炎之翼:你在幹米恐龍
2021-06-30 14:56熾炎之翼:我要去買非洲阿姨的影片祝你生日快樂了
2021-06-30 14:56熾炎之翼:沒有吧
2021-07-15 20:08熾炎之翼:我其實放了也不會特別講XD
2021-07-15 20:09熾炎之翼:其實放到github更沒人會看啦XD