I have a column of a dataframe full of arrays with images.
>>>df.IMAGES.head()
0 ["https://cf-medias.avendrealouer.fr/image/_87...
1 ["http://photos.ubiflow.net/440414/165474561/p...
2 ["https://v.seloger.com/s/width/965/visuels/0/...
3 ["https://pix.yanport.com/ads/e9e07ed0-812f-11...
4 ["https://v.seloger.com/s/width/966/visuels/0/...
I want to check if the images from every couples of rows are similar. So I did a function to check if two images are similar. How can I apply my function to every couple of rows ?
from PIL import Image
import imagehash
import requests
from io import BytesIO
def image_similarity(imageAurl,imageB):
responseA = requests.get(imageAurl)
imgA = Image.open(BytesIO(response.content))
responseB = requests.get(imageBurl)
imgB = Image.open(BytesIO(response.content))
hash0 = imagehash.average_hash(Image.open(imageA))
hash1 = imagehash.average_hash(Image.open(imageB))
cutoff = 5
if hash0 - hash1 < cutoff:
print('images are similar')
else:
print('images are not similar')
Thanks to Kshitij Saxena I tried :
df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)
However I get the following error:
---------------------------------------------------------------------------
InvalidSchema Traceback (most recent call last)
<ipython-input-25-16b99a7b864a> in <module>
1 df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
----> 2 df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
6012 args=args,
6013 kwds=kwds)
-> 6014 return op.get_result()
6015
6016 def applymap(self, func):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in get_result(self)
140 return self.apply_raw()
141
--> 142 return self.apply_standard()
143
144 def apply_empty_result(self):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_standard(self)
246
247 # compute the result using the series generator
--> 248 self.apply_series_generator()
249
250 # wrap results
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\apply.py in apply_series_generator(self)
275 try:
276 for i, v in enumerate(series_gen):
--> 277 results[i] = self.f(v)
278 keys.append(v.name)
279 except Exception as e:
<ipython-input-25-16b99a7b864a> in <lambda>(x)
1 df['NextImage'] = df['IMAGES'][df['IMAGES'].index - 1]
----> 2 df['IsSimilar'] = df.apply(lambda x: image_similarity(x['IMAGES'], x['NextImage']), axis=1)
<ipython-input-21-3acdcb76f890> in image_similarity(imageAurl, imageB)
7
8 def image_similarity(imageAurl,imageB):
----> 9 responseA = requests.get(imageAurl)
10 imgA = Image.open(BytesIO(response.content))
11 responseB = requests.get(imageBurl)
~\AppData\Roaming\Python\Python36\site-packages\requests\api.py in get(url, params, **kwargs)
73
74 kwargs.setdefault('allow_redirects', True)
---> 75 return request('get', url, params=params, **kwargs)
76
77
~\AppData\Roaming\Python\Python36\site-packages\requests\api.py in request(method, url, **kwargs)
58 # cases, and look like a memory leak in others.
59 with sessions.Session() as session:
---> 60 return session.request(method=method, url=url, **kwargs)
61
62
~\AppData\Roaming\Python\Python36\site-packages\requests\sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
~\AppData\Roaming\Python\Python36\site-packages\requests\sessions.py in send(self, request, **kwargs)
638
639 # Get the appropriate adapter to use
--> 640 adapter = self.get_adapter(url=request.url)
641
642 # Start time (approximately) of the request
~\AppData\Roaming\Python\Python36\site-packages\requests\sessions.py in get_adapter(self, url)
729
730 # Nothing matches :-/
--> 731 raise InvalidSchema("No connection adapters were found for '%s'" % url)
732
733 def close(self):
InvalidSchema: ('No connection adapters were found for \'["https://cf-medias.avendrealouer.fr/image/_873908158_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908159_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908160_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908161_d.jpg","https://cf-medias.avendrealouer.fr/image/_873908162_d.jpg"]\'', 'occurred at index 0')
The error seems to be because I try to take the array of urls every columns have for an url.