1

There are 43000 pairs of co-ordinates and as you can imagine that makes this a very lengthy process to run using the nested loops below, I just do not know how to make this work faster and exclude the loops.

Below is my code based on what I have been able to find on the web and my very old and limited knowledge of Basic using R. Please assist me by teaching me how to accomplish this in a more efficient and elegant code block if at all possible.

for (i in 1:length(FixedAssets$Lat)) {
    for (p  in 1:length(FixedAssets$Lat)) {
        dist <- distCosine(as.vector(c(FixedAssets$Longitude[i],FixedAssets$Latitude[i]), mode = "any"), as.vector(c(FixedAssets$Longitude[p],FixedAssets$Latitude[p]), mode = "any"), r = 6378137)
    if (dist < 200) { FixedAssets$prox200[i] = FixedAssets$prox200[i] + 1 }
    if (dist < 500) { FixedAssets$prox500[i] = FixedAssets$prox500[i] + 1 }
    if (dist < 1000) { FixedAssets$prox1000[i] = FixedAssets$prox1000[i] + 1 }
    if (dist < 2000) { FixedAssets$prox2000[i] = FixedAssets$prox2000[i] + 1 }
    if (dist < 3000) { FixedAssets$prox3000[i] = FixedAssets$prox3000[i] + 1 }
    if (dist < 5000) { FixedAssets$prox5000[i] = FixedAssets$prox5000[i] + 1 }
    if (dist < 10000) { FixedAssets$prox10000[i] = FixedAssets$prox10000[i] + 1 }
    if (dist < 20000) { FixedAssets$prox20000[i] = FixedAssets$prox20000[i] + 1 }    
    }
print(i)

}
zx8754
  • 52,746
  • 12
  • 114
  • 209
PeterJS
  • 11
  • 2

3 Answers3

3

You can do this efficiently using spDists from the sp-package. What makes it very fast is that the actual distance calculation is done in C, not in R. Loops in interpreted languages such as R are very slow.

Paul Hiemstra
  • 59,984
  • 12
  • 142
  • 149
1

An alternative to spDist could be distm from the geoshere package. However, it trends to take more time to calculate the distances:

library(maps)
library(geosphere)
library(sp)

data(world.cities)
system.time(d <- distm(world.cities[1:10000, c("long", "lat")]))
#  User      System verstrichen 
# 69.72        0.59       70.77 

system.time(d2 <- spDists(as.matrix(world.cities[1:10000, c("long", "lat")]), longlat = T))
# User      System verstrichen 
# 66.11        0.48       66.89 

as.dist(round(d[1:5, 1:5]/1000, 1))
#        1      2      3      4
# 2    1.5                     
# 3 3589.8 3588.7              
# 4 1327.5 1326.6 2326.7       
# 5  105.9  104.4 3510.2 1270.1
as.dist(round(d2[1:5, 1:5],1))
#        1      2      3      4
# 2    1.5                     
# 3 3592.9 3591.8              
# 4 1328.4 1327.5 2328.6       
# 5  105.6  104.1 3513.3 1270.8
lukeA
  • 53,097
  • 5
  • 97
  • 100
1

Here is a quite fast solution that uses data.table and distGeo{geosphere} to calculate distances on an ellipsoid.

library(geosphere)
library(data.table)

setDT(dt)[ , dist_km := distGeo(matrix(c(long, lat), ncol = 2), 
                                  matrix(c(long_dest, lat_dest), ncol = 2))/1000]

Data for reproducible example

library(maps)
library(reshape)  

# Load world cities data and keep only 300 cities, which will give us 90,000 pairs
data(world.cities)
world.cities <- world.cities[1:300,] 

# create all possible pairs of origin-destination in a long format
dt <- expand.grid.df(world.cities,world.cities)
names(dt)[10:11] <- c("lat_dest","long_dest")
rafa.pereira
  • 13,251
  • 6
  • 71
  • 109