1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
| from selenium import webdriver from lxml import etree import time import requests import csv
def getData(key): options = webdriver.ChromeOptions() options.add_argument('--no-sandbox') options.add_argument('--disable-gpu') prefs = { 'profile.default_content_setting_values': { 'notifications': 2 } } options.add_experimental_option('prefs', prefs) options.add_experimental_option('useAutomationExtension', False) options.add_experimental_option('excludeSwitches', ['enable-automation']) driver = webdriver.Chrome(chrome_options=options) driver.maximize_window()
url=f'https://twitter.com/{key}'
driver.get(url)
time.sleep(3)
js="var q=document.documentElement.scrollTop=0" driver.execute_script(js) time.sleep(3)
def save(data): file=open('dickc_year_Time.csv','a+',newline='') writer=csv.writer(file) writer.writerow(data) file.close() T_div=driver.find_elements_by_xpath('//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[2]/div/div/div[2]/section/div/div/div') print(len(T_div)) T_time_A='1' k=1 y_2021=0 y_2020=0 y_2019=0 y_2018=0 y_2017=0 for j in range(1, 100): for i in range(1, 100): try: T_time=driver.find_element_by_xpath(f'//*[@id="react-root"]/div/div/div[2]/main//div[2]/div/div/div[2]/section/div/div/div[{i}]/div/div/article/div//a/time').text T_time_B = driver.find_element_by_xpath('//*[@id="react-root"]/div/div/div[2]/main//div[2]/div/div/div[2]/section/div/div/div[2]/div/div/article/div//a/time').text
except: js='window.scrollTo(0,document.body.scrollHeight)' driver.execute_script(js) driver.implicitly_wait(10) time.sleep(1) print('正在下滑') try: T_time_A = driver.find_element_by_xpath( '//*[@id="react-root"]/div/div/div[2]/main//div[2]/div/div/div[2]/section/div/div/div[2]/div/div/article/div//a/time').text except: pass if (T_time_A == T_time_B): k += 1 k_num=0 break print(str(T_time)+'*******\n'+str(i)) if('2016' in str(T_time)): save([key,y_2021,y_2020,y_2019,y_2018,y_2017]) driver.close() exit() elif('2020' in str(T_time)): y_2020+=1 elif('2019' in str(T_time)): y_2019+=1 elif('2018' in str(T_time)): y_2018+=1 elif('2017' in str(T_time)): y_2017+=1 else: y_2021 += 1 if (k>3): save([key,y_2021,y_2020,y_2019,y_2018,y_2017]) driver.close() return print(y_2021,y_2020,y_2019,y_2018,y_2017)
driver.close()
if __name__ == '__main__': keys='tonyhawk'
for key in keys.split(): print(f'正在采集--{key}') getData(key) print(f'{key}----------已完成!')
|