The program creates some random products and then creates orders by randomly choosing a product. Right now every order only has one item; a future version will randomize the number of line items per order.
I've never used Python or Pandas before and I wanted to make sure that my approach is the most efficient way of adding a new row to a DataFrame and selecting a random row from a DataFrame.
Any suggestions?
Thank you
def get_random_products(count=500):
x = 0
df = pd.DataFrame(columns=['product_id', 'SKU', 'price', 'category', 'size', 'color', 'style', 'gender'])
while x < count:
row = pd.DataFrame([[x
,get_random_SKU()
,get_price()
,get_category()
,get_size()
,get_color()
,get_style()
,get_gender()]]
,columns=['product_id', 'SKU', 'price', 'category', 'size', 'color', 'style', 'gender'])
df = df.append(row
,ignore_index=True)
x += 1
return df
#---
def get_random_orders(products, count=1000, start_order_id=1, number_of_customers=500):
# CustomerID OrderID OrderDate Price Category Size Color Style Gender
x = 0
df = pd.DataFrame(columns=['customer_id', 'order_id', 'order_date', 'SKU', 'price', 'category', 'size', 'color', 'style', 'gender'])
while x < count:
# Each time through, choose a random product to be in the order
p = products.to_records()[random.randint(0, len(products)-1)]
row = pd.DataFrame([[get_customer_id(number_of_customers)
,x+1
,get_order_date()
,p['SKU']
,p['price']
,p['category']
,p['size']
,p['color']
,p['style']
,p['gender']]]
,columns=['customer_id', 'order_id', 'order_date', 'SKU', 'price', 'category', 'size', 'color', 'style', 'gender'])
df = df.append(row
,ignore_index=True)
x += 1
return df
#Main code here
catalog = get_random_products(1000)
orders = get_random_orders(catalog, 1000, 1, 500)