2

In my django project I am scraping data with a python script. I temporarily store all the fields in a pandas dataframe before iterating over it to save all the info in a django model. The only way I was able to get this to work was saving the png in the model as models.BinaryField. I use an html template to display the model's fields in a report, but the image comes up as "memory at <>" instead of displaying the image. How can I display the image?

crawl.py

# Import Libraries
import requests
from selenium import webdriver
from lxml import html
import pandas as pd
import numpy as np
from datetime import datetime
import pytz
from selenium.webdriver.chrome.options import Options

def crawl_website(product, xpath_dict):
    # Set up parameters
    base_url = 'https://www.website.com/product/{sku}/sellers'
header = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 \
        (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'}

product = product
product_name = product.title
map_price = product.map_price
asin = product.asin
sku = product.sku

# Retrieve Webpage
full_url = base_url.format(sku = sku)
time_stamp = pytz.utc.localize(datetime.utcnow())
page = requests.get(full_url,
                   headers = headers)
doc = html.fromstring(page.content)

# Extract Price Field
original_price = doc.xpath(xpath_dict['original_price'])

# Discount
discount = [str(100 * max(0.0, round(1-float(i) / float(map_price),2))) + '%' for i in original_price]

# MAP Violation Field
map_violation = [float(i) < float(map_price) for i in original_price]

# Extract Seller Names
seller_name = doc.xpath(xpath_dict['seller_name'])

# If a violation is found, take a screenshot
screenshot = None
if True in map_violation:
    # Screenshot of Current URL
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options

    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920,1080")

    DRIVER = 'chromedriver'
    driver = webdriver.Chrome(DRIVER, chrome_options=chrome_options)
    driver.get(full_url)
    screenshot = driver.get_screenshot_as_png()
    driver.quit()

# Extract Seller Links
seller_link = doc.xpath(xpath_dict['seller_link'])

# Create DataFrame
total_rows = len(seller_name)
if True in map_violation:
    df = pd.DataFrame({
                    'Product_Name' : np.repeat(product_name, total_rows),
                    'ASIN' : np.repeat(asin, total_rows),
                    'SKU': np.repeat(sku, total_rows),
                    'Time_Stamp': np.repeat(time_stamp, total_rows),
                    'Seller_Name': seller_name,
                    'Seller_URL': seller_link,
                    'MAP_Price' : np.repeat(map_price, total_rows),
                    'Current_Price': original_price,
                    'Discount' : discount,
                    'MAP_Violation' : map_violation,
                    'Screenshot' : np.repeat(screenshot, total_rows)
    })
else:
        df = pd.DataFrame({
                'Product_Name' : np.repeat(product_name, total_rows),
                'ASIN' : np.repeat(asin, total_rows),
                'SKU': np.repeat(sku, total_rows),
                'Time_Stamp': np.repeat(time_stamp, total_rows),
                'Seller_Name': seller_name,
                'Seller_URL': seller_link,
                'MAP_Price' : np.repeat(map_price, total_rows),
                'Current_Price': original_price,
                'Discount' : discount,
                'MAP_Violation' : map_violation
})

return(df)

views.py

import pandas as pd
from datetime import datetime
from django.shortcuts import render, redirect
from products.models import Product
from sellers.models import Seller
from sellers.models import Seller_Price
from .crawl_website import crawl_website


def crawl_prices(request):
    if request.user.is_superuser:
        products = Product.objects.order_by('-date_added')
    else: 
        products = Product.objects.order_by('-date_added').filter(client_id=request.user)

    for product in products:     

        # Crawl Website
        if product.sku:
            df = crawl_website(product, xpath_dict)
            for i in range(len(df)):
                row = df.iloc[i]

                # Create Seller Object if it doesn't exist
                seller_obj, created = Seller.objects.get_or_create(name=row['Seller_Name'])
                # Update Previous Seller_Product records 'current' to False
                Seller_Price.objects.all().filter(seller_id=seller_obj, product_id=product).update(latest_update=False)

                # Record screenshot if there is a violation
                if row['MAP_Violation']:
                    seller_price_obj = Seller_Price.objects.create(
                        seller_id=seller_obj,
                        product_id=product,
                        date_reported=row['Time_Stamp'],
                        url=row['Seller_URL'],
                        seller_price=row['Current_Price'],
                        discount=row['Discount'],
                        violation=row['MAP_Violation'],
                        violation_snapshot=row['Screenshot']
                    )  
                else:
                    seller_price_obj = Seller_Price.objects.create(
                        seller_id=seller_obj,
                        product_id=product,
                        date_reported=row['Time_Stamp'],
                        url=row['Seller_URL'],
                        seller_price=row['Current_Price'],
                        discount=row['Discount'],
                        violation=row['MAP_Violation']
                    )        
    return redirect('/admin')


from django.views.generic import View
from django.utils import timezone
from .models import *

# This passes database objects to html template for reports
class Pdf(View):
def get(self, request):
    seller_price = Seller_Price.objects.order_by('-date_reported').filter(product_id__client_id=request.user, latest_update=True)
    today = timezone.now()
    params = {
        'today': today,
        'seller_price': seller_price,
        'request': request
    }

report.html

<!doctype html>
<html>
<head>
{% load staticfiles %}
<meta charset="utf-8">
<title>Sales Report</title>

</head>
<body>

{% for sp in seller_price %}
Seller Name: {{ sp.seller_id }}
Image: {{ sp.violation_snapshot }}      
{% endfor %}

</body>
</html>
JHall651
  • 427
  • 1
  • 4
  • 15

2 Answers2

2

I would highly advice against using binary field to save an actual file, but in case you have a valid use for it, this is the best I can come up with.

Assuming you know what django template filters are,

@register.filter(name='binary_to_image')
def encode_binary_data_to_image(binary):
    return # return your encoded binary, I would suggest base64.encode which is pretty simple

and inside your template

<img src = "data:image/png;base64,{{objects.binary_field|binary_to_image}}">

and now you rendered your image on the browser.

But can't keep myself from saying it again, please don't insist on using database for images if you don't have a valid really valid reason.


After your edits to your question, where you do

screenshot = driver.get_screenshot_as_png()

you can either add or refactor your code to save the screenshot to your media folder like

screenshot = driver.save_screenshot('/path/to/image/dir/file.name')

so it would save it to your image directory just like django.db.models.ImageField would do, and so you can just read it from the model like

<img src="{{object.image_field.url}}"></img>
Işık Kaplan
  • 2,815
  • 2
  • 13
  • 28
  • This is a much better way of doing it than my suggestion. filters are more django like and more pythonic imho – castaway2000 Apr 28 '19 at 01:23
  • I would like to take your advice and not store the image file in the binary field. The reason I resorted to that, is I couldn't figure out how to pass the image files to my views.py where the data is saved in the model. All the other data is in a pandas dataframe. Any suggestions? I added a lot of code above in case that is helpful. – JHall651 Apr 28 '19 at 01:43
0

This might work. I don't have a way to test this though. credit to this post

you will need to produce a Base64 encoding for your binary encoded data

import base64
encoded = base64.b64encode("BinaryField as ByteArray")

then make sure to return sp.violation_snapshot as base64 encoded.

then you can use it like this.

<img src="data:image/gif;base64,{{ sp.violation_snapshot }}">

castaway2000
  • 306
  • 5
  • 21