1

I have hourly temperature data from 1970 to 2021. I would like to summarize the dataset into a graph in ggplot showing the mean temperature of each month and year. And I also want to show the average temperature and temperature volatility increased. And I would like to show average temperature in the '2010s is higher than in the '1990s.

Here is the code of temperature density plot in 2010s and 1990s. But I don't know how to set the x axis.

data_re <- data 
data_re$Year910 <- ifelse(data$Year %in% c(1990:1999), "1990s", 
                             ifelse(data$Year %in% c(2010:2020), "2010s",NA))
data_re1 <- subset(data_re, Year910 %in% c("1990s","2010s"))
data_re1$Year910 <- factor(data_re1$Year910)
ggplot(data_re1, aes(x=, color=Year910)) +
xlab('Temperature')+
geom_density()

Here is the data file. https://docs.google.com/spreadsheets/d/1HwPFJ1wKMMr0845Et60tV36WIWXM66Ig/edit?usp=sharing&ouid=111186166036061320361&rtpof=true&sd=true

Any advice on how to best go about this would be greatly appreciated.

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
Jimin Eom
  • 9
  • 3
  • Welcome to stack overflow. It looks like you aren't sure about what kind of plot, exactly, you want, which isn't a programming question. A good guess for a density plot would be `x=Temp`. Your second question is about comparing means and significance which is a big topic, see t-tests, `group_by`, and `summarize`. – PeterK May 05 '22 at 13:20

2 Answers2

3

I used lubridate library to convert the character to Date.

df = df %>%
  pivot_longer(!c(Date,Month,Year), names_to = "HourNo", values_to = "Temp")

df$Date = as.Date(df$Date, format = "%Y-%m-%d")
df = na.omit(df)
df$Abbr = paste(month.abb[month(df$Date)], "-", year(df$Date), sep="")

df_Aggr = df %>%
  group_by(Abbr) %>%
  summarise(Avg = mean(Temp),
            Begin = min(Date))
ggplot(df_Aggr, aes(x =Begin, y=Avg))+geom_line(color="blue", size=1)+
  scale_x_date(date_breaks = "24 month", date_labels = "%b-%Y")+
  theme(axis.text.x = element_text(vjust = 0.8,
    angle = 35, hjust = 0.9))+ggtitle("Average Temperature")

enter image description here

Deepansh Arora
  • 724
  • 1
  • 3
  • 15
2

I would first of all ensure your data is in the correct format after importing it. I am assuming you have already loaded your data frame from Excel and named it df:

df <- df[1:624,]
df[2:28] <- lapply(df[2:28], as.numeric)
df$Date <- as.POSIXct(df$Date)

Now we can load a couple of useful packages:

library(tidyverse)
library(geomtextpath)

Then we could summarize and plot with something like the following:

df %>%
  rowwise() %>%
  mutate(Temp = sum(across(starts_with('Hour')), na.rm = TRUE) / 24) %>%
  select(-starts_with('Hour')) %>%
  filter(Date > as.POSIXct('1989-12-31')) %>% 
  group_by(Year) %>%
  summarize(Temp = mean(Temp), Date = median(Date)) %>% {
  ggplot(., aes(Date, Temp)) +
  geom_line(color = 'gray') +
  geom_point(color = 'gray75') +
  geom_textsegment(aes(x = as.POSIXct('1990-01-01'), 
                   xend = as.POSIXct('1999-12-31'),
                   y = mean(Temp), yend = mean(Temp), color = '1990s',
                   label = '1990s'), vjust = -0.2, size = 6,
               data = .[.$Date < as.POSIXct('2000-01-01'),], linetype = 2) +
  geom_textsegment(aes(x = as.POSIXct('2000-01-01'), 
                   xend = as.POSIXct('2009-12-31'),
                   y = mean(Temp), yend = mean(Temp), color = '2000s',
                   label = '2000s'), vjust = -0.2, size = 6,
               data = .[.$Date < as.POSIXct('2010-01-01') &
                        .$Date > as.POSIXct('1999-12-31'),], linetype = 2) +
  geom_textsegment(aes(x = as.POSIXct('2010-01-01'), 
                   xend = as.POSIXct('2019-12-31'),
                   y = mean(Temp), yend = mean(Temp), color = '2010s',
                   label = '2010s'), vjust = -0.2, size = 6,
               data = .[.$Date < as.POSIXct('2020-01-01') &
                        .$Date > as.POSIXct('2009-12-31'),], linetype = 2) +
  theme_light(base_size = 16) +
  scale_color_brewer(palette = 'Set1') +
  theme(legend.position = 'none') +
  labs(title = 'Annual mean temperature')

enter image description here

Note that I have used an annual summary of the temperature here. If you use monthly temperatures, the range of temperatures becomes much larger and the message of the plot becomes much weaker:

df %>%
  rowwise() %>%
  mutate(Temp = sum(across(starts_with('Hour')), na.rm = TRUE) / 24) %>%
  select(-starts_with('Hour')) %>%
  filter(Date > as.POSIXct('1989-12-31')) %>% {
  ggplot(., aes(Date, Temp)) +
  geom_line(color = 'gray') +
  geom_point(color = 'gray75') +
  geom_textsegment(aes(x = as.POSIXct('1990-01-01'), 
                   xend = as.POSIXct('1999-12-31'),
                   y = mean(Temp), yend = mean(Temp), color = '1990s',
                   label = '1990s'), vjust = -0.2, size = 6,
               data = .[.$Date < as.POSIXct('2000-01-01'),], linetype = 2) +
  geom_textsegment(aes(x = as.POSIXct('2000-01-01'), 
                   xend = as.POSIXct('2009-12-31'),
                   y = mean(Temp), yend = mean(Temp), color = '2000s',
                   label = '2000s'), vjust = -0.2, size = 6,
               data = .[.$Date < as.POSIXct('2010-01-01') &
                        .$Date > as.POSIXct('1999-12-31'),], linetype = 2) +
  geom_textsegment(aes(x = as.POSIXct('2010-01-01'), 
                   xend = as.POSIXct('2019-12-31'),
                   y = mean(Temp), yend = mean(Temp), color = '2010s',
                   label = '2010s'), vjust = -0.2, size = 6,
               data = .[.$Date < as.POSIXct('2020-01-01') &
                        .$Date > as.POSIXct('2009-12-31'),], linetype = 2) +
  theme_light(base_size = 16) +
  scale_color_brewer(palette = 'Set1') +
  theme(legend.position = 'none') +
  labs(title = 'Annual mean temperature')
  }

enter image description here

Allan Cameron
  • 147,086
  • 7
  • 49
  • 87