I'm trying to understand if there is a better way to save only part of a list (only the objects I don't have in the database). With my current solution I'm doing it in O(n^2) complexity and I'm also holding a lot of hashes of the database objects in memory.
my class :
class Product(Base):
__tablename__ = 'products'
id = Column('id',BIGINT, primary_key=True)
barcode = Column('barcode' ,BIGINT)
productName = Column('name', TEXT,nullable=False)
objectHash=Column('objectHash',TEXT,unique=True,nullable=False)
def __init__(self, productData,picture=None):
self.barcode = productData[ProductTagsEnum.barcode.value]
self.productName = productData[ProductTagsEnum.productName.value]
self.objectHash = md5((str(self.barcode)+self.produtName).encode('utf-8')).hexdigest()
my solution :
def saveNewProducts(self,products):
Session = sessionmaker()
session=Session()
productsHashes=[ product.objectHash for product in products]
query = session.query(Product.objectHash).filter(Product.objectHash.in_(productsHashes))
existedHashes=query.all()
newProducts = [ product for product in products if product.objectHash not in productsHashes]
session.bulk_save_objects(newProducts)