0

The full error is this. I am not sure how to fix it. I'm trying to predict the link between gender and aggresiveness in tweets. (https://i.stack.imgur.com/T4Ual.png)

This is the whole script

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#De specifikke, vi ved vi kommer til at bruge
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB #Gør at man kan have mere end 2 classes

data = pd.read_csv('/work/90301/Individual project/TheClimateChangeTwitterDataset.csv')

#corpus=data['text']
#corpus=text.loc[:,['aggressiveness', 'gender']] 

cv=CountVectorizer() #Take some text and turn it into a matrix
X = cv.fit_transform(data.values).toarray()
#x = X['aggressiveness'].values
#y = X['gender'].values
y=data['gender'].values
print(X.shape)
print(y.shape)
X_train,X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

#Instantiate and train Naive Bayes
classifier = MultinomialNB(fit_prior=True) 
classifier.fit(X_train, y_train)

#test model
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)

print(f'Relative accuracy: {accuracy_score(y_test, y_pred)}')
print(f'Accuracy in instances: {accuracy_score(y_test, y_pred, normalize=False)}')

#Infer the label (spam/ham) of a message
aggressiveness=[corpus]
#print(email)
aggressiveness_array = cv.transform(aggressiveness).toarray()
print(classifier.predict(aggressiveness_array))
  • check this: https://stackoverflow.com/questions/26367075/countvectorizer-attributeerror-numpy-ndarray-object-has-no-attribute-lower – isCzech Dec 07 '22 at 15:57

0 Answers0