I want to get the value from a third column when the values of the other two columns are given. I want the value of the rating for each of the movies and users to form a user-movie matrix.
I've gotten both the unique movie ids and the user ids in two lists and tried locating the instance where the frequency matches the values I want
import pandas as pd
import numpy as np
import matplotlib as plot
def main():
df = pd.read_csv(r'/Users/ttbarack/Desktop/ratings.csv')
#print(df)
userIds = []
for id in df['userId']:
if id not in userIds:
userIds.append(id)
#print(userIds)
movieIds = []
for movie in df['movieId']:
if movie not in movieIds:
movieIds.append(movie)
#print(movieIds)
"""PART 1"""
finalList = []
for id in userIds:
newlist = []
for mov in movieIds:
newlist.append(df['rating'].where(df['userId'].values() == id and df['movieId'].values() == mov))
finalList.append(newlist)
print(finalList)
This is the error I'm getting:
Traceback (most recent call last):
File "/Users/ttbarack/PycharmProjects/Proj1/Project2.py", line 29, in <module>
main()
File "/Users/ttbarack/PycharmProjects/Proj1/Project2.py", line 22, in main
newlist.append(df['rating'].where(df['userId'].values() == id and df['movieId'].values() == mov))
TypeError: 'numpy.ndarray' object is not callable