Home > Project > Social Science > Machine Learning > Others


In my past year, I also exercised on some foundamental datasets

Project 1:Handwritten Digit Recognition

  1. Dataset:MNIST
  • 70,000 images of handwritten digits: 60,000 for training and 10,000 for testing.
  • The images are in grayscale, 28x28 pixels, and are centered to reduce preprocessing and speed up operations.
  1. Model:convolutional neural network
  • in PyTorch and train it to recognize handwritten digits using the MNIST dataset.

Supported Code:https://drive.google.com/drive/folders/1Z-HiImmdSsFBaxXdVuQEJT5QSvueNj5J?usp=sharing

Project 2:Twitter Influencer Analysis

In this study, we analyze the influence and correlation by scraping data such as posting frequency and likes from celebrities’ tweets on Twitter.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from selenium import webdriver
from lxml import etree
import time
import requests
import csv

def getData(key):
options = webdriver.ChromeOptions()
options.add_argument('--no-sandbox')
# options.add_argument('--headless') # 无头参数
options.add_argument('--disable-gpu')
# 關閉瀏覽器左上角通知提示
prefs = {
'profile.default_content_setting_values':
{
'notifications': 2
}
}
options.add_experimental_option('prefs', prefs)
# 關閉'chrome目前受到自動測試軟體控制'的提示
options.add_experimental_option('useAutomationExtension', False)
options.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(chrome_options=options) #chrome_options=
driver.maximize_window()

url=f'https://twitter.com/{key}'

driver.get(url)

time.sleep(3)

js="var q=document.documentElement.scrollTop=0"
driver.execute_script(js)
time.sleep(3)

def save(data):
file=open('dickc_year_Time.csv','a+',newline='')
writer=csv.writer(file)
writer.writerow(data)
file.close()
T_div=driver.find_elements_by_xpath('//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[2]/div/div/div[2]/section/div/div/div')
print(len(T_div))
T_time_A='1'
k=1
y_2021=0
y_2020=0
y_2019=0
y_2018=0
y_2017=0
for j in range(1, 100):
for i in range(1, 100):
try:
T_time=driver.find_element_by_xpath(f'//*[@id="react-root"]/div/div/div[2]/main//div[2]/div/div/div[2]/section/div/div/div[{i}]/div/div/article/div//a/time').text
T_time_B = driver.find_element_by_xpath('//*[@id="react-root"]/div/div/div[2]/main//div[2]/div/div/div[2]/section/div/div/div[2]/div/div/article/div//a/time').text

except:
js='window.scrollTo(0,document.body.scrollHeight)'
driver.execute_script(js)
driver.implicitly_wait(10)
time.sleep(1)
print('正在下滑')
try:
T_time_A = driver.find_element_by_xpath(
'//*[@id="react-root"]/div/div/div[2]/main//div[2]/div/div/div[2]/section/div/div/div[2]/div/div/article/div//a/time').text
except:
pass
if (T_time_A == T_time_B):
k += 1
k_num=0
break
print(str(T_time)+'*******\n'+str(i))
if('2016' in str(T_time)):
save([key,y_2021,y_2020,y_2019,y_2018,y_2017])
driver.close()
exit()
elif('2020' in str(T_time)):
y_2020+=1
elif('2019' in str(T_time)):
y_2019+=1
elif('2018' in str(T_time)):
y_2018+=1
elif('2017' in str(T_time)):
y_2017+=1
else:
y_2021 += 1
if (k>3):
save([key,y_2021,y_2020,y_2019,y_2018,y_2017])
driver.close()
return
print(y_2021,y_2020,y_2019,y_2018,y_2017)
# save([y_2021, y_2020, y_2019, y_2018, y_2017])


driver.close()

if __name__ == '__main__':
keys='tonyhawk'

# year='user 2021 2020 2019 2018 2017'
# file = open('dickc_year_Time.csv', 'a+', newline='')
# writer = csv.writer(file)
# writer.writerow(year.split())
# file.close()

for key in keys.split():
print(f'正在采集--{key}')
getData(key)
print(f'{key}----------已完成!')