library(tidyverse) library(lubridate) #This code imports crash data from NYPD, takes only the fatalities, sums it by month, and produces several complementary graphs ###asof7 april2020, there are 7 deaths for March2020 (check if this increases) #It shows data for the current month which will be incomplete) #Step 1: Import the data from the web (~8 minute download)#### mydata<-read.csv('https://data.cityofnewyork.us/api/views/h9gi-nx95/rows.csv?accessType=DOWNLOAD',header=TRUE) mydata2<-mydata[c("CRASH.DATE","NUMBER.OF.PERSONS.KILLED")] summary(mydata) #Step 2: Get rid of non-fatal crashes (so the dataset becomes much more manageable) #### myrealdata <- mydata2[mydata2$NUMBER.OF.PERSONS.KILLED!=0,] #now we have less than 2000 values. only fatalities. #This turns the crash date into a recognisable date The capital Y assumes a 4-digit year. myrealdata$CRASH.DATE<-as.Date(myrealdata$CRASH.DATE,format="%m/%d/%Y") #Then we sum up our data by month. the na.omit is there so the names.arg command works properly later Monthlydata<-myrealdata %>% group_by(bymonth=floor_date(CRASH.DATE, "month")) %>% summarize(killed=sum(NUMBER.OF.PERSONS.KILLED)) %>% na.omit() #Step 3a: Barplot of monthly fatalities (either for all the data or just the last 36 months)#### #The lines below defines monthlydatshort as the last eg 36 months, and plots that Monthlydatashort<-Monthlydata[(nrow(Monthlydata)-36):nrow(Monthlydata),] barplot(Monthlydatashort$killed~Monthlydatashort$bymonth, names.arg=month(Monthlydatashort$bymonth,TRUE),ylab="Monthly fatalities at scene",xlab="Calendar months") barplot(Monthlydata$killed~Monthlydata$bymonth, names.arg=month(Monthlydata$bymonth,TRUE),ylab="Monthly fatalities at scene",xlab="Calendar months") #original graph without specific-bar labels barplot(Monthlydata$killed~Monthlydata$bymonth,ylab="Monthly fatalities at scene",xlab="Calendar months") #Step 3b:: lineplot of fatalities for different years#### #THese annual line graphs take into account seasonality. To do this we need to split the month and year parts of the date data MonthlyAGG <- mutate(Monthlydata, Month = month(bymonth),Year=year(bymonth)) #the next line doesn't show the latest month, as it will be incomplete. Delete it if you want to show anyway. MonthlyAGG<-MonthlyAGG[1:nrow(MonthlyAGG)-1,] MonthlyAGG$Year <- factor(as.character(MonthlyAGG$Year)) ggplot() + geom_line(data = MonthlyAGG, size=1.5,aes(x = Month, y = killed, color = Year))+ scale_x_continuous(breaks = 1:12)+ scale_y_continuous(limits=c(0,40)) #Step3c alternative barplot using ggplot (work in progress#### ggplot(data=Monthlydatashort)+ geom_col(mapping=aes(x=bymonth,y=killed))+ scale_x_discrete() #Step 4: export the monthly data if you want#### write.csv(MonthlyAGG,"C:/temp/nypd.csv", row.names = TRUE) rm(list = ls())