I want to create from the dataset a list that contains word and frequency of the word . I did it and saved into val named 'mylist'. now I want to sort the list according to the frequency of the word and to create barplot from the 10 words that have the higher frequency.
but I not succeeded to sort it. I tried many ways to change the type of 'mylist' to data.frame or date.table but still the column of the frequency stay a list. To sumup I have the DT var that contains it is a list with 2 columns x-contains the words and type is character . The 2 column is 'v' - that contains the frequency and it is a list. I am not succeeding to sort it by the frequency. please help me.
library(ggplot2)
libary(MASS)
#get the data
data.uri = "http://www.crowdflower.com/wp-content/uploads/2016/03/gender-classifier-DFE-791531.csv"
pwd = getwd()
data.file.name = "gender.csv"
data.file = paste0(pwd, "./", data.file.name)
download.file(data.uri, data.file)
data = read.csv(data.file.name)
#manipulate the data
data <- data[data$X_unit_id < 815719694,]
print(data$X_unit_id)
#get all female has white sidebar
female_colors <- subset(data, data$gender=="female")
female_colors$fav_number
#get all male fav_numbers
male_colors <- subset(data, data$gender=="male")
male_colors$fav_number
text_male = subset(data, data$gender=="male")
text_male = text_male$text
print(text_male[1])
print(length(text_male))
v <- text_male[1:length(text_male)]
print(v)
print (v[1])
count_of_list = 0;
x = list()
for ( i in v) {
# Merge the two lists.
x <- c(x,unlist(strsplit(i," ")))
}
count = 0;
mylist = list()
for (word in x){
for (xWord in x){
if (word == xWord)
count = count + 1;
}
key <- word
value <- count
mylist[[ key ]] <- value
count = 0;
}
libary(data.table)
require(data.table)
DT = data.table(x=c(names(mylist)),v=c(mylist))
DT