Looking for some help understanding how to optimize some array processing, targeting some postgis compatible data types. The input data looks like this:
{
"items": [
{
"id": 10000,
"coords": [[644, 1347, 1], [653, 1353, 1], [637, 1358, 1], [633, 1362, 1]]
}
]
}
Here is what I've tried:
import json
import numpy
import ppygis
import time
start_time = time.time()
with open('example.json') as fp:
d = json.load(fp)
print "file load time:"
print time.time() - start_time
"""
standard python
"""
start_time = time.time()
py_array = d['items'][0]['coords']
print "array creation:"
print time.time() - start_time
start_time = time.time()
a = [' '.join(map(str, c)) for c in py_array]
b = '(' + ') ('.join(map(str, a)) + ')'
print "python array string processing time:"
print time.time() - start_time
start_time = time.time()
c = [ppygis.Point(p[0], p[1], p[2]) for p in py_array]
print "python array ppygis:"
print time.time() - start_time
"""
numpy
"""
start_time = time.time()
numpy_array = numpy.array(d['items'][0]['coords'])
print "numpy array creation:"
print time.time() - start_time
start_time = time.time()
a = [' '.join(map(str, c)) for c in numpy_array]
b = '(' + ') ('.join(map(str, a)) + ')'
print "numpy array string processing time:"
print time.time() - start_time
start_time = time.time()
c = [ppygis.Point(p[0], p[1], p[2]) for p in numpy_array]
print "numpy array ppygis:"
print time.time() - start_time
This is the output:
file load time:
8.29696655273e-05
array creation:
2.86102294922e-06
python array string processing time:
1.09672546387e-05
python array ppygis:
8.10623168945e-06
numpy array creation:
1.31130218506e-05
numpy array string processing time:
0.000116109848022
numpy array ppygis:
3.60012054443e-05
Why are the operations using the numpy arrays so much slower than the normal python array?