6

I am trying to plot flow map (for singapore) . I have Entry(Lat,Long) and Exit (Lat,long). I am trying to map the flow from entry to exit in singapore map.

structure(list(token_id = c(1.12374e+19, 1.12374e+19, 1.81313e+19, 
1.85075e+19, 1.30752e+19, 1.30752e+19, 1.32828e+19, 1.70088e+19, 
1.70088e+19, 1.70088e+19, 1.05536e+19, 1.44818e+19, 1.44736e+19, 
1.44736e+19, 1.44736e+19, 1.44736e+19, 1.89909e+19, 1.15795e+19, 
1.15795e+19, 1.15795e+19, 1.70234e+19, 1.70234e+19, 1.44062e+19, 
1.21512e+19, 1.21512e+19, 1.95909e+19, 1.95909e+19, 1.50179e+19, 
1.50179e+19, 1.24174e+19, 1.36445e+19, 1.98549e+19, 1.92068e+19, 
1.18468e+19, 1.18468e+19, 1.92409e+19, 1.92409e+19, 1.21387e+19, 
1.9162e+19, 1.9162e+19, 1.40385e+19, 1.40385e+19, 1.32996e+19, 
1.32996e+19, 1.69103e+19, 1.69103e+19, 1.57387e+19, 1.40552e+19, 
1.40552e+19, 1.00302e+19), Entry_Station_Lat = c(1.31509, 1.33261, 
1.28425, 1.31812, 1.33858, 1.29287, 1.39692, 1.37773, 1.33858, 
1.33322, 1.28179, 1.30036, 1.43697, 1.39752, 1.27637, 1.39752, 
1.41747, 1.35733, 1.28405, 1.37773, 1.35898, 1.42948, 1.32774, 
1.42948, 1.349, 1.36017, 1.34971, 1.38451, 1.31509, 1.31509, 
1.37002, 1.34971, 1.31231, 1.39169, 1.31812, 1.44909, 1.29341, 
1.41747, 1.33759, 1.44062, 1.31509, 1.38451, 1.29461, 1.32388, 
1.41747, 1.27614, 1.39752, 1.39449, 1.33261, 1.31231), Entry_Station_Long = c(103.76525, 
103.84718, 103.84329, 103.89308, 103.70611, 103.8526, 103.90902, 
103.76339, 103.70611, 103.74217, 103.859, 103.85563, 103.7865, 
103.74745, 103.84596, 103.74745, 103.83298, 103.9884, 103.85152, 
103.76339, 103.75191, 103.83505, 103.67828, 103.83505, 103.74956, 
103.88504, 103.87326, 103.74437, 103.76525, 103.76525, 103.84955, 
103.87326, 103.83793, 103.89548, 103.89308, 103.82004, 103.78479, 
103.83298, 103.69742, 103.80098, 103.76525, 103.74437, 103.80605, 
103.93002, 103.83298, 103.79156, 103.74745, 103.90051, 103.84718, 
103.83793), Exit_Station_Lat = structure(c(48L, 34L, 118L, 60L, 
14L, 54L, 10L, 49L, 49L, 74L, 71L, 65L, 102L, 5L, 102L, 119L, 
116L, 10L, 13L, 88L, 117L, 66L, 40L, 62L, 117L, 37L, 67L, 34L, 
85L, 44L, 102L, 44L, 115L, 29L, 92L, 17L, 121L, 70L, 120L, 52L, 
85L, 34L, 42L, 11L, 4L, 115L, 62L, 48L, 92L, 14L), .Label = c("1.27082", 
"1.27091", "1.27236", "1.27614", "1.27637", "1.27646", "1.27935", 
"1.28221", "1.28247", "1.28405", "1.28621", "1.28819", "1.28932", 
"1.29287", "1.29309", "1.29338", "1.29341", "1.29461", "1.29694", 
"1.29959", "1.29974", "1.30034", "1.30252", "1.30287", "1.30392", 
"1.30394", "1.30619", "1.30736", "1.30842", "1.31139", "1.3115", 
"1.31167", "1.31188", "1.31509", "1.31654", "1.31756", "1.31913", 
"1.31977", "1.32008", "1.3205", "1.32104", "1.32388", "1.32573", 
"1.32725", "1.32774", "1.33119", "1.33155", "1.33261", "1.33322", 
"1.33474", "1.33554", "1.33759", "1.33764", "1.33858", "1.33921", 
"1.34037", "1.34225", "1.34293", "1.3432", "1.34426", "1.34857", 
"1.349", "1.34905", "1.35158", "1.35733", "1.35898", "1.36017", 
"1.3625", "1.36849", "1.37002", "1.37121", "1.37304", "1.37666", 
"1.37775", "1.3786", "1.37862", "1.38001", "1.38029", "1.3803", 
"1.38178", "1.38269", "1.38295", "1.38399", "1.38423", "1.38451", 
"1.38671", "1.38672", "1.38777", "1.38814", "1.3894", "1.39147", 
"1.39169", "1.39189", "1.39208", "1.39389", "1.39449", "1.39452", 
"1.39628", "1.39692", "1.39717", "1.39732", "1.39752", "1.39821", 
"1.39928", "1.39962", "1.4023", "1.40455", "1.40511", "1.40524", 
"1.40843", "1.40961", "1.41184", "1.41588", "1.41685", "1.41747", 
"1.42526", "1.42948", "1.43256", "1.43697", "1.44062", "1.44909"
), class = "factor"), Exit_Station_Long = structure(c(59L, 19L, 
27L, 4L, 65L, 3L, 63L, 6L, 6L, 21L, 93L, 121L, 9L, 56L, 9L, 32L, 
16L, 63L, 44L, 23L, 50L, 12L, 54L, 11L, 50L, 71L, 87L, 19L, 7L, 
118L, 9L, 118L, 49L, 90L, 96L, 31L, 45L, 61L, 38L, 2L, 7L, 19L, 
117L, 47L, 34L, 49L, 11L, 59L, 96L, 65L), .Label = c("103.67828", 
"103.69742", "103.70611", "103.72092", "103.73274", "103.74217", 
"103.74437", "103.74529", "103.74745", "103.74905", "103.74956", 
"103.75191", "103.7537", "103.75803", "103.76011", "103.76215", 
"103.76237", "103.76449", "103.76525", "103.76648", "103.76667", 
"103.76893", "103.7696", "103.77082", "103.77145", "103.77266", 
"103.774", "103.77866", "103.78185", "103.78425", "103.78479", 
"103.7865", "103.78744", "103.79156", "103.79631", "103.79654", 
"103.79836", "103.80098", "103.803", "103.80605", "103.80745", 
"103.80781", "103.80978", "103.81703", "103.82004", "103.82592", 
"103.82695", "103.83216", "103.83298", "103.83505", "103.83918", 
"103.83953", "103.83974", "103.84387", "103.84496", "103.84596", 
"103.84673", "103.84674", "103.84718", "103.84823", "103.84955", 
"103.85092", "103.85152", "103.85226", "103.8526", "103.85267", 
"103.85436", "103.85446", "103.85452", "103.86088", "103.86149", 
"103.86275", "103.86291", "103.86395", "103.86405", "103.86896", 
"103.87087", "103.87135", "103.87534", "103.87563", "103.8763", 
"103.87971", "103.88003", "103.88126", "103.88243", "103.88296", 
"103.88504", "103.8858", "103.88816", "103.8886", "103.88934", 
"103.89054", "103.89237", "103.89313", "103.8938", "103.89548", 
"103.89719", "103.89723", "103.89854", "103.9003", "103.90051", 
"103.90208", "103.90214", "103.9031", "103.90484", "103.90537", 
"103.90597", "103.90599", "103.90663", "103.9086", "103.90902", 
"103.9126", "103.9127", "103.91296", "103.91616", "103.9165", 
"103.93002", "103.94638", "103.94929", "103.95337", "103.9884"
), class = "factor")), .Names = c("token_id", "Entry_Station_Lat", 
"Entry_Station_Long", "Exit_Station_Lat", "Exit_Station_Long"
), row.names = c(10807L, 10808L, 10810L, 10815L, 10817L, 10818L, 
10819L, 10820L, 10823L, 10824L, 10826L, 10827L, 10829L, 10831L, 
10832L, 10833L, 10834L, 10835L, 10836L, 10838L, 10840L, 10841L, 
10843L, 10847L, 10850L, 10852L, 10854L, 10855L, 10859L, 10861L, 
10869L, 10872L, 10883L, 10886L, 10891L, 10895L, 10896L, 10897L, 
10900L, 10902L, 10903L, 10906L, 10910L, 10911L, 10912L, 10913L, 
10915L, 10920L, 10921L, 10924L), class = "data.frame")

I am trying to get something this : Map Flow

IvanSanchez
  • 18,272
  • 3
  • 30
  • 45
Prasanna Nandakumar
  • 4,295
  • 34
  • 63
  • So, what is your problem ?There is no question here... – Sébastien Rochette May 31 '17 at 11:54
  • Is `token_id`the grouping variable? – CMichael May 31 '17 at 12:05
  • Yes , token id is grouping variable – Prasanna Nandakumar May 31 '17 at 12:09
  • If I understand correctly you have different users who, e.g., bought a day ticket and took multiple rides. Are the rides already ordered in the original data set. My code assumes that. – CMichael May 31 '17 at 12:30
  • Yes, they use same card to commute daily. This is a subset. The dataset is not ordered. But i have a date variable. I would generally like to see the pattern in which they travel. And figure most congested route – Prasanna Nandakumar May 31 '17 at 12:32
  • Well then I assume my plot should help you - you could include alpha in the geom_path to spot often overlayed paths as congested ones. To ensure validity of the individual user ride chains it is important to have the data sorted by ascending time. Your source had rounded paths but that is only helpful for long distance travel - on a city level I would recommend direct paths. Very interesting data! – CMichael May 31 '17 at 12:36
  • I updated the solution to include curved lines between the stations. Feel free to indicate what prevents you from accepting any of the solutions. – CMichael Jun 01 '17 at 09:17
  • @CMichael I am looking for something visually appealing (using Geospheres). Will wait for sometime, if nothing of that sort, will select an answer. – Prasanna Nandakumar Jun 01 '17 at 09:58
  • I would recommend updating your question to reflect this appropriately. So far you are asking - "I am trying to plot flow map (for singapore) ." It is a bit hard for contributors to guess what you are after in the end. – CMichael Jun 01 '17 at 10:07
  • Had indicated a link to flowingdata in my question. – Prasanna Nandakumar Jun 01 '17 at 10:08
  • But the example were intercontinental connetions. Do you just want to include this https://stackoverflow.com/questions/11813299/how-to-add-geo-spatial-connections-on-a-ggplot-map? – CMichael Jun 01 '17 at 10:09
  • Most oft them in SO were intercontinental, so was looking something with geospheres and leaflets for a particular region (singapore) – Prasanna Nandakumar Jun 01 '17 at 10:12
  • I added an answer according to these request but you will be disappointed because the greactcircles are not bend because the distance within Singapore is too short. – CMichael Jun 01 '17 at 11:23

5 Answers5

7

Just realized that the original solution usin geom_path was more complicated than necessary. geom_segmentworks without changing the data:

require(ggplot2)
require(ggmap)
basemap <- get_map("Singapore",
                   source = "stamen",
                   maptype = "toner",
                   zoom = 11)

g = ggplot(a)
map = ggmap(basemap, base_layer = g)
map = map + coord_cartesian() +
      geom_curve(size = 1.3,
                 aes(x=as.numeric(Entry_Station_Long),
                     y=as.numeric(Entry_Station_Lat),
                     xend=as.numeric(as.character(Exit_Station_Long)),
                     yend=as.numeric(as.character(Exit_Station_Lat)),
                     color=as.factor(token_id)))
map

This solution leverages Draw curved lines in ggmap, geom_curve not working to implement curved lines on a map.

ggmaps used for simplicity - for more ambitious projects I would recommend leaflet.

enter image description here

Below the solution using a long data format with some prior data wrangling. It also uses straight lines instead of the curves above.

a %>%
  mutate(path = row_number()) -> a

origin = select(a,token_id,Entry_Station_Lat,Entry_Station_Long,path)
origin$type = "origin"
dest = select(a,token_id,Exit_Station_Lat,Exit_Station_Long,path)
dest$type = "dest"

colnames(origin) = c("id","lat","long","path","type")
colnames(dest) = c("id","lat","long","path","type")
complete = rbind(origin,dest)
complete %>% arrange(path,type) -> complete

require(ggmap)
basemap <- get_map("Singapore",
                   source = "stamen",
                   maptype = "toner",
                   zoom = 11)

g = ggplot(complete, aes(x=as.numeric(long),
                         y=as.numeric(lat)))
map = ggmap(basemap, base_layer = g)

map + geom_path(aes(color = as.factor(id)),
                size = 1.1)

enter image description here

CMichael
  • 1,856
  • 16
  • 20
5

If you want to plot it on an actual Google Map, and recreate the style of your linked map, you can use my googleway package that uses Google's Maps API. You need an API key to use their maps

library(googleway)

df$Exit_Station_Lat <- as.numeric(as.character(df$Exit_Station_Lat))
df$Exit_Station_Long <- as.numeric(as.character(df$Exit_Station_Long))

df$polyline <- apply(df, 1, function(x) {
    lat <- c(x['Entry_Station_Lat'], x['Exit_Station_Lat'])
    lon <- c(x['Entry_Station_Long'], x['Exit_Station_Long'])
    encode_pl(lat = lat, lon = lon)
})

mapKey <- 'your_api_key'

style <- '[ { "stylers": [{ "visibility": "simplified"}]},{"stylers": [{"color": "#131314"}]},{"featureType": "water","stylers": [{"color": "#131313"},{"lightness": 7}]},{"elementType": "labels.text.fill","stylers": [{"visibility": "on"},{"lightness": 25}]}]'

google_map(key = mapKey, style = style) %>%
    add_polylines(data = df, 
      polyline = "polyline", 
      mouse_over_group = "Entry_Station_Lat",
      stroke_weight = 0.7,  
      stroke_opacity = 0.5, 
      stroke_colour = "#ccffff")

enter image description here


Note, to recreate the map using flight data, see the example given in ?add_polylines


You can also show other types of routes, for example, driving between the locations by using Google's Directions API to encode the driving routes.

df$drivingRoute <- lst_directions <- apply(df, 1, function(x){
    orig <- as.numeric(c(x['Entry_Station_Lat'], x['Entry_Station_Long']))
    dest <- as.numeric(c(x['Exit_Station_Lat'], x['Exit_Station_Long']))

    dir <- google_directions(origin = orig, destination = dest, key = apiKey)
    dir$routes$overview_polyline$points
})


google_map(key = mapKey, style = style) %>%
    add_polylines(data = df, 
      polyline = "drivingRoute", 
      mouse_over_group = "Entry_Station_Lat",
      stroke_weight = 0.7,  
      stroke_opacity = 0.5, 
      stroke_colour = "#ccffff")

enter image description here

SymbolixAU
  • 25,502
  • 4
  • 67
  • 139
  • This looks good. I need my visulaization to look something (where Entry and Exit should indicate as a Point and the lines should be curved. More Visually appealing.) – Prasanna Nandakumar Jun 01 '17 at 04:22
5

I've also written the mapdeck library to make visualisations like this more appealing*

library(mapdeck)

set_token("MAPBOX_TOKEN")  ## set your mapbox token here

df$Exit_Station_Lat <- as.numeric(as.character(df$Exit_Station_Lat))
df$Exit_Station_Long <- as.numeric(as.character(df$Exit_Station_Long))

mapdeck(
  style = mapdeck_style('dark')
  , location = c(104, 1)
  , zoom = 8
  , pitch = 45
) %>%
  add_arc(
    data = df
    , origin = c("Entry_Station_Long", "Entry_Station_Lat")
    , destination = c("Exit_Station_Long", "Exit_Station_Lat")
    , layer_id = 'arcs'
    , stroke_from_opacity = 100
    , stroke_to_opacity = 100
    , stroke_width = 3
    , stroke_from = "#ccffff"
    , stroke_to = "#ccffff"
  )

enter image description here

*subjectively speaking

SymbolixAU
  • 25,502
  • 4
  • 67
  • 139
4

Alternative answer using leaflet and geosphere

#get Packages
require(leaflet)
require(geosphere)

#format data
a$Entry_Station_Long = as.numeric(as.character(a$Entry_Station_Long))
a$Entry_Station_Lat = as.numeric(as.character(a$Entry_Station_Lat))
a$Exit_Station_Long = as.numeric(as.character(a$Exit_Station_Long))
a$Exit_Station_Lat = as.numeric(as.character(a$Exit_Station_Lat))
a$id = as.factor(as.numeric(as.factor(a$token_id)))

#create some colors
factpal <- colorFactor(heat.colors(30), pathList$id)

#create a list of interpolated paths
pathList = NULL
for(i in 1:nrow(a))
{
tmp = gcIntermediate(c(a$Entry_Station_Long[i],
                 a$Entry_Station_Lat[i]),
               c(a$Exit_Station_Long[i],
                 a$Exit_Station_Lat[i]),n = 25,
               addStartEnd=TRUE)
tmp = data.frame(tmp)
tmp$id = a[i,]$id
tmp$color = factpal(a[i,]$id)
pathList = c(pathList,list(tmp))
}

#create empty base leaflet object
leaflet() %>% addTiles() -> lf

#add each entry of pathlist to the leaflet object
for (path in pathList)
{
  lf %>% addPolylines(data = path,
                      lng = ~lon, 
                      lat = ~lat,
                      color = ~color) -> lf

}
#show output
lf

Note that as I mentioned before there is no way of geosphering the paths in such a small locality - the great circles are effectively straight lines. If you want the rounded edges for sake of aesthetics you may have to use the geom_curve way described in my other answer.

enter image description here

CMichael
  • 1,856
  • 16
  • 20
1

I would like to leave an alternative approach for you. What you can do is to restructure your data. Right now you have two columns for entry stations and the other two for exit stations. You can create one column for long, and another for lat by combing these columns. The trick is to use rbind() and c().

Let's have a look of this simple example.

x <- c(1, 3, 5)
y <- c(2, 4, 6)
c(rbind(x, y))

#[1] 1 2 3 4 5 6

Imagine x is long for entry stations and y for exit stations. 1 is longitude for a starting point. 2 is longitude where the first journey ended. As far as I can see from your sample data, it seems that 3 is identical 2. You could remove duplicated data points for each token_id. If you have a large set of data, perhaps this is something you want to consider. Back to the main point, you can create a column with longitude in the sequence you want with the combination of the two functions. Since you said you have date information, make sure you order the data by date. Then, the sequence of each journey appears in the right way in tmp. You want to do this with latitude as well.

Now we look into your sample data. It seems that Exit_Station_Lat and Exit_Station_Long are in factor. The first operation is to convert them to numeric. Then, you apply the method above and create a data frame. I called your data mydf.

library(dplyr)
library(ggplot2)
library(ggalt)
library(ggthemes)
library(raster)

mydf %>%
mutate_at(vars(Exit_Station_Lat: Exit_Station_Long),
          funs(as.numeric(as.character(.)))) -> mydf

group_by(mydf, token_id) %>%
do(data.frame(long = c(rbind(.$Entry_Station_Long,.$Exit_Station_Long)),
              lat = c(rbind(.$Entry_Station_Lat, .$Exit_Station_Lat))
             )
   ) -> tmp

Now let's get a map data from GADM. You can download data using the raster package.

getData(name = "GADM", country = "singapore", level = 0) %>%
fortify -> singapore

Finally, you draw a map. The key thing is to use group in aes in geom_path(). I hope this will let you move forward.

ggplot() +
geom_cartogram(data = singapore,
               aes(x = long, y = lat, map_id = id),
               map = singapore) +
geom_path(data = tmp,
          aes(x = long, y = lat, group = token_id,
          color = as.character(token_id)),
          show.legend = FALSE) +
theme_map() 

enter image description here

jazzurro
  • 23,179
  • 35
  • 66
  • 76