library(tidyverse) library(lubridate) #This code imports crash data from NYPD, takes only the fatalities, sums it by month, and produces 2 complementary graphs ###asof7 april2020, there are 7 deaths for March2020 (check if this increases) #It shows data for the current month which will be incomplete) #Step 1: Import the data from the web (~3 minute download)#### mydata<-read.csv('https://data.cityofnewyork.us/api/views/h9gi-nx95/rows.csv?accessType=DOWNLOAD',header=TRUE) mydata<-mydata[c("CRASH.DATE","NUMBER.OF.PERSONS.KILLED")] #Step 2: Get rid of non-fatal crashes (so the dataset becomes much more manageable) #### # gives 1668731 values. So let's delete the non-fatal accidents below. myrealdata <- mydata[mydata$NUMBER.OF.PERSONS.KILLED!=0,] #now we have less than 2000 values. only fatalities. #This turns the crash date into a recognisable dat The capital Y assumes a 4-digit year. myrealdata$CRASH.DATE<-as.Date(myrealdata$CRASH.DATE,format="%m/%d/%Y") #Then we sum up our data by month. the na.omit is there so the names.arg command works properly later Monthlydata<-myrealdata %>% group_by(bymonth=floor_date(CRASH.DATE, "month")) %>% summarize(killed=sum(NUMBER.OF.PERSONS.KILLED)) %>% na.omit() #Step 3a: Barplot of monthly fatalities (either for all the data or just the last 36 months)#### #The lines below defines monthlydatshort as the last eg 36 months, and plots that Monthlydatashort<-Monthlydata[(nrow(Monthlydata)-36):nrow(Monthlydata),] barplot(Monthlydatashort$killed~Monthlydatashort$bymonth, names.arg=month(Monthlydatashort$bymonth,TRUE),ylab="Monthly fatalities at scene",xlab="Calendar months") #original graph without specific-bar labels #barplot(Monthlydata$killed~Monthlydata$bymonth,ylab="Monthly fatalities at scene",xlab="Calendar months") #Step 3b:: lineplot of fatalities for different years#### #THese annual line graphs take into account seasonality. To do this we need to split the month and year parts of the date data MonthlyAGG <- mutate(Monthlydata, Month = month(bymonth)) %>% mutate(Year=year(bymonth)) MonthlyAGG$Year <- factor(as.character(MonthlyAGG$Year)) ggplot() + geom_line(data = MonthlyAGG, size=1.5,aes(x = Month, y = killed, color = Year))+ scale_x_continuous(breaks = 1:12) #rm(list = ls())