library(readr)
url <- 'http://richardtwatson.com/data/manheim.txt'
t <- read_delim(url,delim=',')
t %>%
group_by(model) %>%
summarize(frequency = n())
library(readr)
url <- 'http://richardtwatson.com/data/manheim.txt' t <- read_delim(url,delim=',')
t %>%
group_by(sale) %>%
summarize(frequency = n())
library(readr)
library(sqldf)
url <- 'http://richardtwatson.com/data/manheim.txt' t <- read_delim(url,delim=',')
t %>%
group_by(model) %>%
summarize(mean = mean(price))
library(readr)
library(sqldf)
url <- 'http://richardtwatson.com/data/manheim.txt'
t <- read_delim(url,delim = ',')
t %>%
group_by(sale) %>%
summarize(mean = mean(price))
max(electricityprices$cost)
min(electricityprices$cost)
mean(electricityprices$cost)
median(electricityprices$cost)
library(readr)
library(sqldf)
url <- 'http://people.terry.uga.edu/rwatson/data/wealth.csv'
t <- read_delim(url,delim = ',')
ts <- sqldf('select * from t order by `GDP per capita`;')
library(readr)
library(sqldf)
url <- 'http://people.terry.uga.edu/rwatson/data/wealth.csv'
t <- read_delim(url,delim = ',')
sqldf('select avg(`GDP per capita`) from t;')
library(readr)
library(sqldf)
url <- 'http://people.terry.uga.edu/rwatson/data/wealth.csv'
sqldf('Select `GDP per capita`/(select avg(`GDP per capita`) from t) as "US ratio" from t where Country = "United States";')
library(readr)
url <- 'http://people.terry.uga.edu/rwatson/data/wealth.csv'
t <- read_delim(url,delim = ',') cor.test(t$`GDP per capita`,t$`Wealth per capita`)
library(readr)
library(sqldf)
options(sqldf.driver = "SQLite") # to avoid a conflict with RMySQL
library(DBI)
# connect to the database
conn <- dbConnect(RMySQL::MySQL(),"richardtwatson.com", dbname = "Weather", user = "db2", password = "student")
# Query the database and create file t for use with R
t1 <- dbGetQuery(conn,"SELECT * from record;")
url <- 'http://people.terry.uga.edu/rwatson/data/electricityprices.csv'
t2 <- read_delim(url,delim=',')
# Convert so timestamps are of the same data type
t2$timestamp = as.character(t2$timestamp)
m <- sqldf('select t1.timestamp, airTemp, humidity, precipitation, cost from t1, t2 where t1.timestamp = t2.timestamp;')
cor.test(m$airTemp,m$cost)
# download the table as csv file and read the file using RStudio's Import Dataset
library(sqldf)
sqldf('Select ST, count(*) from banklist group by ST')
# download the table as csv file and read the file using RStudio's Import Dataset library(sqldf)
sqldf('Select count(*) from banklist where `Acquiring.Institution` = "No Acquirer";')
library(sqldf)
options(sqldf.driver = "SQLite") # to avoid a conflict with RMySQL
library(lubridate)
# banklist$closeYear <- year(strptime(banklist$Closing.Date, "%d-%b-%y"))
temp <- strptime(banklist$Closing.Date, "%d-%b-%y")
banklist$closeYear <- year(temp)
sqldf('Select closeYear, count(*) from banklist group by closeYear;')
# Read the file using RStudio's Import Datase # set the column names as R puts an X in front of the year.
colnames(broccoli) <- c('state',1974,1978,1982,1987,1992,1997,2002,2007) library(reshape)
m <- melt(broccoli,id='state')
colnames(m) <- c('state','year','acres')
m$hectares <- round(m$acres/2.4711,2)
library(sqldf)
sqldf('select year, sum(hectares) as "Total area" from m group by year;')
write.table(m,"broccoli01.csv")
This page is part of the promotional and support
material for Data Management (open edition) by Richard T. Watson For questions and comments please contact the author |