Thanks to Davy and everyone I think I made progress. It still will not produce a line graph but there is nothing logically in the code that looks wrong. I take no credit here - I just cut and paste what smarter people than me have figured out but I still don't get a graph. Link to github csv at the end.
data = read.csv("C:/Users/12083/Desktop/librarydata.csv") # Read the data into R
head(data) # Quality control, looks good
str(data)
data$dates = as.Date(data$dates, format = "%d/%m/%Y") # This formats the date as dates for R
library(tidyverse) # This will import some functions that you need, spcifically %>% and ggplot
# Step 0: look that the data makes sense to you
summary(data$dates)
summary(data$city)
# Step 1: filter the right data
start.date = as.Date("2003-01-02")
end.date = as.Date("2010-05-04")
filtered = data %>%
filter(dates >= start.date &
dates <= end.date) # This will only take rows between those dates
summary(filtered)
colnames(filtered)
library(dplyr)
filtered_agg <- filtered %>%
group_by(city, dates, Location) %>%
summarize(location_sum=n())
filtered_agg
summary(filtered_agg)
# Step 2: Plotting
# Now you can create the plot with ggplot:
# Notes:
# I added geom_point() so that each X value gets a point.
# I think it's easier to read. You can remove this if you like
# Also added color, because I like it, feel free to delete
# The problem is in here - somewhere
Plot = ggplot(filtered_agg, aes(x=dates, y=Location, group = city)) + geom_line(aes(linetype=city, color = city)) + geom_point(aes(color=city))
Plot
dput
https://github.com/karl1776/chart
colnames(filtered)
1 "ï..Class.ID" "city" "dates" "year" "month"
[6] "day" "cit" "Department.College" "Course.Level" "Course.Title"
[11] "Tour." "TILT." "Date.Taught" "Session.Number" "AM.PM"
[16] "Hour.Count" "Library.Instructor" "Other.Library.Instructor" "Duplicate." "Course.Instructor"
[21] "ACRL" "IPED" "Location" "Building.Room" "Distance.Class."
[26] "Location.of.Site.1" "Site.1.Number.of.Students" "Location.of.Site.2" "Site.2.Number.of.Students" "Location.of.Site.3"
[31] "Site.3.Number.of.Students" "Location.of.Site.4" "Site.4.Number.of.Students" "Location.of.Site.5" "Site.5.Number.of.Students"
[36] "Location.of.Site.6" "Site.6.Number.of.Students" "Location.of.Site.7" "Site.7.Number.of.Students" "Location.of.Site.8"
[41] "Site.8.Number.of.Students" "Location.of.Site.9" "Site.9.Number.of.Students" "Location.of.Site.10" "Site.10.Number.of.Students"
Maybe I just don't see it but I have a hard time looking at examples with dummy data and translating that to how to load actual data from a csv file The picture shows my output from the dummy data -- exactly what I want. When I use my actual data nothing happens - have I left out a ggplot command to print the plot?
library(readxl)
require(tidyverse)
require(ggplot2)
require(lubridate)
#load data
df <- read_excel("C:/Users/12083/Desktop/librarydata.xlsx")
#plot data
df_example %>%
ggplot(aes(date,city, color=city))+
geom_line(aes(linetype=lt))+ #you can use single string for the same linetype for all lines or a vector of strings for each data point
scale_linetype_identity()+ #this removes the linetype from the legend
theme_minimal()
df_example
I get this output -- this is exactly right but no plot to accompany it.
city dates classes lt
1 Boise 2020-01-01 52 solid
2 Boise 2020-02-01 36 solid
3 Boise 2020-03-01 69 solid
4 Boise 2020-04-01 100 solid
5 Boise 2020-05-01 72 solid
6 Pocatello 2020-01-01 82 dashed
7 Pocatello 2020-02-01 15 dashed
8 Pocatello 2020-03-01 68 dashed
9 Pocatello 2020-04-01 17 dashed
10 Pocatello 2020-05-01 51 dashed
11 Salt Lake 2020-01-01 71 dotted
12 Salt Lake 2020-02-01 65 dotted
13 Salt Lake 2020-03-01 33 dotted
14 Salt Lake 2020-04-01 44 dotted
15 Salt Lake 2020-05-01 16 dotted
16 Twin Falls 2020-01-01 3 dotdash
17 Twin Falls 2020-02-01 30 dotdash
18 Twin Falls 2020-03-01 19 dotdash
19 Twin Falls 2020-04-01 34 dotdash
20 Twin Falls 2020-05-01 69 dotdash
21 Elsewhere 2020-01-01 62 longdash
22 Elsewhere 2020-02-01 14 longdash
23 Elsewhere 2020-03-01 59 longdash
24 Elsewhere 2020-04-01 35 longdash
25 Elsewhere 2020-05-01 91 longdash
dput
structure(list(`Class ID` = c(4438, 4439, 4428, 4437, 4430, 4431,
4432, 4433, 4434, 4435, 4436, 4427, 4440, 4417, 4414, 4407, 4413,
4412, 4418, 4410), city = c("Pocatello", "Pocatello", "Pocatello",
"Pocatello", "Pocatello", "Pocatello", "Pocatello", "Pocatello",
"Pocatello", "Pocatello", "Pocatello", "Pocatello", "Pocatello",
"Meridian", "Pocatello", "Pocatello", "Pocatello", "Pocatello",
"Pocatello", "Idaho Falls"), date = structure(c(1468972800, 1468972800,
1468886400, 1468800000, 1468454400, 1468454400, 1468368000, 1468368000,
1468368000, 1468281600, 1468281600, 1466553600, 1466553600, 1461283200,
1460592000, 1460419200, 1460419200, 1460073600, 1460073600, 1459987200
), tzone = "UTC", class = c("POSIXct", "POSIXt")), year = c(2016,
2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016,
2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016), month = c(7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, 4, 4, 4, 4, 4, 4, 4), day = c(20,
20, 29, 18, 14, 14, 13, 13, 13, 12, 12, 22, 22, 22, 13, 12, 12,
8, 8, 7), cit = c("Pocatello", "Pocatello", "Pocatello", "Pocatello",
"Pocatello", "Pocatello", "Pocatello", "Pocatello", "Pocatello",
"Pocatello", "Pocatello", "Pocatello", "Pocatello", "Meridian",
"Pocatello", "Pocatello", "Pocatello", "Pocatello", "Pocatello",
"Idaho Falls"), `Department/College` = c("College of Arts and Letters",
"College of Arts and Letters", "College of Arts and Letters",
"College of Arts and Letters", "College of Arts and Letters",
"College of Arts and Letters", "Library", "Library", "Library",
"College of Arts and Letters", "College of Arts and Letters",
"College of Education", "Library", "Division of Health Sciecnes",
"College of Arts and Letters", "College of Arts and Letters",
"College of Arts and Letters", "College of Arts and Letters",
"College of Arts and Letters", "College of Arts and Letters"),
`Course Level` = c("Lower Division", "Lower Division", "Lower Division",
"Lower Division", "Lower Division", "Lower Division", "K-12",
"K-12", "K-12", "Lower Division", "Lower Division", "Lower Division",
"K-12", "Graduate", "Lower Division", "Lower Division", "Lower Division",
"Lower Division", "Lower Division", "Lower Division"), `Course Title` = c("ACAD 1111",
"ACAD 1111", "POLS 1110", "ENGL 1123", "ACAD 1111", "ACAD 1111",
"Kid University", "Kid University", "Kid University", "ACAD 1111",
"ACAD 1111", "EDUC 1110", "Kid University", "Nursing_Orientation",
"ENGL 1102", "ENGL 1101", "ENGL 1101", "ENGL 1102", "ENGL 1102",
"ENGL 1102"), `Tour?` = c(FALSE, FALSE, FALSE, TRUE, FALSE,
FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE,
FALSE, FALSE, TRUE, TRUE, FALSE), `TILT?` = c(FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE
), `Date Taught` = structure(c(1468972800, 1468972800, 1468886400,
1468800000, 1468454400, 1468454400, 1468368000, 1468368000,
1468368000, 1468281600, 1468281600, 1466553600, 1466553600,
1461283200, 1460592000, 1460419200, 1460419200, 1460073600,
1460073600, 1459987200), tzone = "UTC", class = c("POSIXct",
"POSIXt")), `Session Number` = c("Third Session", "Third Session",
"Single Session", NA, "Second Session", "Second Session",
"Single Session", "Single Session", "Single Session", "First Session",
"First Session", "Single Session", "Single Session", "Single Session",
"Single Session", "Single Session", "First Session", "Third Session",
"Third Session", "Second Session"), `AM/PM` = c("AM", "PM",
"PM", "PM", "AM", "PM", "PM", "PM", "PM", "AM", "PM", "PM",
"PM", "AM", "PM", "PM", "AM", "AM", "AM", "AM"), `Hour Count` = c(1.5,
1.5, 1, 1.5, 1.5, 1.5, 0.5, 0.5, 1, 1.5, 1.5, 1.5, 1, 1,
1.5, 1.5, 1.5, 1, 1, 1.5),
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Cathy Gray",
NA, NA, NA, NA, "Monte Asche", "Philip Homan", NA), `Duplicate?` = c(FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
FALSE), ACRL = c(0, 0, 7, 5, 0, 0, 7, 7, 7, 22, 9,
8, 13, 35, 19, 6, 8, 0, 0, 0), IPED = c(22, 9, 7, 5, 23,
9, 7, 7, 7, 22, 9, 8, 13, 35, 19, 6, 8, 19, 19, 22), `Location of Instructor` = c("Pocatello",
"Pocatello", "Pocatello", "Pocatello", "Pocatello", "Pocatello",
"Pocatello", "Pocatello", "Pocatello", "Pocatello", "Pocatello",
"Pocatello", "Pocatello", "Meridian", "Pocatello", "Pocatello",
"Pocatello", "Pocatello", "Pocatello", "Idaho Falls"), `Building/Room` = c("LIBR 212",
"LIBR 212", "LIBR 212", "LIBR 212", "LIBR 212", "LIBR 212",
"Special Collections", "LIBR 212", "LIBR 212", "LIBR 212",
"LIBR 212", "LIBR 212", "LIBR 212", "Meridian", "LIBR 212",
"LIBR 212", "LIBR 212", "LIBR 212", "LIBR 212", "CHE 306"
), `Distance Class?` = c(FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
FALSE, FALSE, FALSE, FALSE, FALSE, FALSE), `Location of Site 1` = c("Boise",
"Boise", "Boise", "Boise", "Boise", "Boise", "Boise", "Boise",
"Boise", "Boise", "Boise", "Boise", "Boise", "Boise", "Boise",
"Boise", "Boise", "Boise", "Boise", "Boise"), `Site 1 Number of Students` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`Location of Site 2` = c("Idaho Falls", "Idaho Falls", "Idaho Falls",
"Idaho Falls", "Idaho Falls", "Idaho Falls", "Idaho Falls",
"Idaho Falls", "Idaho Falls", "Idaho Falls", "Idaho Falls",
"Idaho Falls", "Idaho Falls", "Idaho Falls", "Idaho Falls",
"Idaho Falls", "Idaho Falls", "Idaho Falls", "Idaho Falls",
"Idaho Falls"), `Site 2 Number of Students` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Location of Site 3` = c("Twin Falls",
"Twin Falls", "Twin Falls", "Twin Falls", "Twin Falls", "Twin Falls",
"Twin Falls", "Twin Falls", "Twin Falls", "Twin Falls", "Twin Falls",
"Twin Falls", "Twin Falls", "Twin Falls", "Twin Falls", "Twin Falls",
"Twin Falls", "Twin Falls", "Twin Falls", "Twin Falls"),
`Site 3 Number of Students` = c(0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Location of Site 4` = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), `Site 4 Number of Students` = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
`Location of Site 5` = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), `Site 5 Number of Students` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Location of Site 6` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), `Site 6 Number of Students` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Location of Site 7` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), `Site 7 Number of Students` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Location of Site 8` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), `Site 8 Number of Students` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Location of Site 9` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), `Site 9 Number of Students` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), `Location of Site 10` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA), `Site 10 Number of Students` = c(0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
-20L), class = c("tbl_df", "tbl", "data.frame"))
>