-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAlexLanyi_Final.py
76 lines (67 loc) · 2.19 KB
/
AlexLanyi_Final.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# -*- coding: utf-8 -*-
"""
Created on Wed Dec 2 15:08:23 2020
@author: Alexander L
@data: MIT Election Data and Science Lab, 2018, "countypres_2000-2016.tab",
County Presidential Election Returns 2000-2016, doi.org/10.7910/DVN/VOQCHQ/HEIJCQ,
Harvard Dataverse, V6, UNF:6:ZZe1xuZ5H2l4NUiSRcRf8Q== [fileUNF]
"""
import numpy as np # Numpy
import pandas as pd # Pandas
import matplotlib.pyplot as plt # matplotlib
import scipy.stats # Scipy
# Read file as pandas data frame (election data from outside class research)
df = pd.read_csv('countypres_2000-2016.csv')
# Define colorize function for parties
def partyColorize(party):
pC=[];
for x in party:
if x =='democrat':
c='b'
elif x =='republican':
c='r'
elif x =='green':
c='g'
else:
c='y'
pC.append(c)
return pC
df['partyColor']=partyColorize(df['party']) # Call colorize function
# Plot data using matplotlib
ax1 = plt.figure().add_subplot(111, projection='3d')
ax1.scatter(
df['FIPS'],
df['candidatevotes'],
df['year'],
c=df['partyColor'],
s=1,
depthshade=True)
ax1.set_xlabel('FIPS')
ax1.set_ylabel('Votes')
ax1.set_zlabel('Year')
ax1.set_title("Voting Data")
# Statistical analysis
CI=0.999999
df['votePercent']=df['candidatevotes']/df['totalvotes']
df.drop(df[df['votePercent'].isna()==True].index, inplace = True) # Drop nan vote totals
vPCI=scipy.stats.t.interval(CI, len(df['votePercent'])-1, loc=np.mean(df['votePercent']), scale=scipy.stats.sem(df['votePercent']))
print("Vote percent confidence interval",CI,"% :")
print(vPCI)
# Leave behind outliers for analysis
df.drop(df[df['votePercent'].between(vPCI[0], vPCI[1], inclusive=True)].index, inplace = True)
# Plot florida data using matplotlib
fldf = df[df['state']=="Florida"]
ax2 = plt.figure().add_subplot(111, projection='3d')
ax2.scatter(
fldf['FIPS'],
fldf['candidatevotes'],
fldf['year'],
c=fldf['partyColor'],
s=1,
depthshade=True)
ax2.set_xlabel('FIPS')
ax2.set_ylabel('Votes')
ax2.set_zlabel('Year')
ax2.set_title("Florida Voting Data Outside Vote% CI")
# Export year 2000 from florida
fldf[fldf['year']==2000].to_csv('FilteredFlorida2000.csv')