I want to do multiple string replacements in Python.
I have a dictionary:
my_dict = {'Can I have some roti and aloo gobhi ?':
{'roti': ['pulka', 'butter kp', 'wheat parota', 'chapati',
'gobi parota', 'onion parota', 'paneer parota',
'kerala parota', 'aloo parota', 'plain naan',
'butter naan', 'garlic naan', 'plain kulcha',
'butter kulcha', 'lacha parota', 'tandoori roti',
'tandoori butter roti', 'roti'],
'aloo gobhi': ['paneer butter masala', 'palak paneer',
'kadai paneer', 'hydrabadi paneer',
'kadai gobi', 'aloo gobi', 'aloo mattar',
'mix veg curry', 'baby corn masala',
'dal fry', 'palak dal', 'dal tadka',
'mushroom masala', 'gobi masala',
'paneer tikka masala',
'mushroom tikka masala', 'aloo gobhi']
}
}
It basically has a sentence as a key and the value (which is again a dictionary). This dictionary which I have key as item to replace in the sentence by the corresponding value (which is a list). Now I want to construct a sentence using the key of a main dictionary by replacing roti
with any of the ones in the corresponding list and 'aloo gobhi' with any item of the corresponding list.
For example:
input_string = "Can I have some roti and aloo gobhi ?"
output_string = "Can I have some pulka and panner butter masala ?"
UPDATE:
I have an excel file (say food_items.xlsx
) where I have list of food items which are separated as dessert, starters,main course, etc. I have another excel file (say food_queries.xlsx
) where I have user queries requesting for order of food items which are present in food_items.xlsx
.
I'm trying to write a script which will cover all the food items in food_items.xlsx
with minimum number of user queries so that machine learning can be done with minimum queries.
import xlrd
import xlsxwriter
import string
import random
import re
import time
import itertools
list_of_items = []
dict_of_names = {}
def createList(filename):
try:
book = xlrd.open_workbook(filename)
sheet = book.sheet_by_name(book.sheet_names()[2])
for i in xrange(sheet.ncols):
list_1 = []
for j in xrange(sheet.nrows):
cell_value = sheet.cell(j,i).value
if str(cell_value) in (None,""):
j+=1
break
else:
list_1.append(str(cell_value).lower())
dict_of_names[str(list_1[0]).upper()] = list_1[1:]
except Exception, e:
print e
def getFile(readFile):
try:
list_of_sentences = []
row = 0
col = 0
query_book = xlrd.open_workbook(readFile)
first_sheet = query_book.sheet_by_index(0)
for i in xrange(first_sheet.ncols):
for j in xrange(first_sheet.nrows):
cell_value = str(first_sheet.cell(j,i).value)
if cell_value in (None,""," "):
j += 1
# dict_of_names[keys].remove(value)
else:
list_of_sentences.append(cell_value)
replaceStrings(list_of_sentences)
except Exception as e:
print e
def replaceStrings(list_of_sentences):
# all_dict = {}
# for sentence in list_of_sentences:
# dict_values = {}
# for keys,values in dict_of_names.items():
# for val in values:
# temp_dict = {}
# if val in sentence:
# temp_dict[val] = dict_of_names[keys]
# dict_values.update(temp_dict)
# all_dict[sentence] = dict_values
# print all_dict
# for keys,values in all_dict.items() :
# for b,c in itertools.izip(dict_values,food_item_1[0],food_item_1[1]):
# print sentence.replace(a,b).replace(a,c)
for sentence in list_of_sentences:
dict_values = {}
for keys,values in dict_of_names.items():
for val in values:
temp_dict = {}
if val in sentence:
temp_dict[val] = dict_of_names[keys]
dict_values.update(temp_dict)
keys = dict_values.keys()
n = len(keys)
for i in range(n):
thisKey = keys[i]
nextKey = keys[(i + 1) % n]
# print thisKey,nextKey
for c,a,b in itertools.izip(list_of_sentences, dict_values[thisKey],dict_values[nextKey]):
new_cell = c.replace(thisKey,a).replace(nextKey,b)
# del dict_values[a]
print new_cell
# for k in existing_names:
# if k in cell.value:
# lines = str(cell.value).replace(k,str(random.choice(new_names_one)))\
# .replace(k,str(random.choice(new_names_two)))
# worksheet.write(row,col,lines)
# row = row + 1
# else:
# break
if __name__ == "__main__":
print "starting execution.."
# workbook = xlsxwriter.Workbook('Query_set_1.xlsx')
# worksheet = workbook.add_worksheet()
createList("total food queries.xlsx")
getFile("total food queries.xlsx")
# workbook.close()
UPDATE 2:
The basic algorithm I want to implement is :
I need to cover all the food items (each food item can occur only once).
Once all food items are covered I just stop. (though there are few query sample form the user still left)
My main goal is cover all the food items, not the queries from the user.