I'm trying to plot a histogram for a big data (nearly 7 million points) in python where I want to know the frequency of values. I have tried this code but it takes too long to finish more than an hour! So, are there any suggestions?
import numpy as np
import matplotlib.pyplot as plt
file_path = "D:/results/planarity2.txt"
data_array = []
with open(file_path, "r") as file:
for line in file:
value = line.strip()
data_array.append(value)
column_values = data_array
unique_values, counts = np.unique(column_values, return_counts=True)
value_frequency = dict(zip(unique_values, counts))
x_values = list(value_frequency.keys())
y_values = list(value_frequency.values())
plt.bar(x_values, y_values, edgecolor='black', alpha=0.7)
plt.xlabel('Column Values')
plt.ylabel('Frequency')
plt.title('Frequency of Points Based on Column Values')
plt.show()
I also tried this but no use
import numpy as np
import matplotlib.pyplot as plt
file_path = "D:/results/planarity2.txt"
data_array = []
with open(file_path, "r") as file:
for line in file:
value = line.strip()
data_array.append(value)
column_values = data_array
value_frequency = {}
for value in column_values:
if value in value_frequency:
value_frequency[value] += 1
else:
value_frequency[value] = 1
x_values = list(value_frequency.keys())
y_values = list(value_frequency.values())
plt.bar(x_values, y_values, edgecolor='black', alpha=0.7)
plt.xlabel('Column Values')
plt.ylabel('Frequency')
plt.title('Frequency of Points Based on Column Values')
plt.show()