How to calculate the mean and standard deviation of this data set?
Anonymous in /c/coding_help
1225
report
Im working on a program to calculate the mean, median, mode and SD of a data set. I have a function that calculates mean which is:<br><br>```python<br>def calculate_mean(lst):<br> return sum(lst) / len(lst)<br>```<br><br>This function works fine and it (mostly) correctly calculates the mean of a given data set. <br><br>The only issue is that it calculates the (population) mean, not the (sample) mean. The difference here is the sample mean uses the denominator (n-1) instead of n for the SD calculation.<br><br>Can anyone tell me how to calculate the sample mean?<br><br>Here is the rest of the code:<br><br>```python<br>import matplotlib.pyplot as plt<br>import statistics<br><br>class Data:<br> def __init__(self, data, name, additional_notes = None):<br> self.data = data<br> self.name = name<br> self.notes = additional_notes<br><br>def find_outliers(lst):<br> outliers = []<br> threshold = 3<br> mean = sum(lst) / len(lst)<br> std = statistics.stdev(lst)<br> for i in lst:<br> z_score = (i - mean) / std<br> if z_score > threshold:<br> outliers.append(i)<br> return outliers<br><br>def calculate_data_sets(lst, outliers = None):<br> if outliers is None:<br> outliers = find_outliers(lst)<br> inliers = []<br> for i in range(len(lst)):<br> if lst[i] in outliers:<br> inliers.append("outlier")<br> else:<br> inliers.append(lst[i])<br> return inliers<br><br>def calculate_mean(lst):<br> return sum(lst) / len(lst)<br><br>def calculate_median(lst, outliers = None):<br> lst2 = calculate_data_sets(lst, outliers)<br> if len(lst2) % 2 == 0:<br> return lst2[len(lst2) // 2 - 1]<br> else:<br> return lst2[len(lst2) // 2]<br><br>def calculate_mode(lst, outliers = None):<br> lst2 = calculate_data_sets(lst, outliers)<br> max_var = None<br> max_count = 0<br> for i in lst2:<br> if lst2.count(i) > max_count:<br> max_count = lst2.count(i)<br> max_var = i<br> if lst2.count(i) == max_count:<br> max_var.append(i)<br> return max_var<br><br>def calculate_all(lst, outliers = None):<br> lst2 = calculate_data_sets(lst, outliers)<br> print("Mean:", calculate_mean(lst2))<br> print("Median:", calculate_median(lst2, outliers))<br> print("Mode:", calculate_mode(lst2, outliers))<br> print("Standard Deviation:", statistics.stdev(lst2))<br><br>def plot(lst, name, notes = None):<br> #data = Data(lst, name, notes)<br> plt.hist(lst, bins=10, alpha=0.7, color=["blue"], edgecolor="black")<br> plt.title(name)<br> plt.xlabel("Value")<br> plt.ylabel("Frequency")<br> plt.show()<br><br>def plot_all(lst, name, notes = None, outliers = None):<br> #data = Data(lst, name, notes)<br> plt.hist(lst, bins=10, alpha=0.7, color=["blue"], edgecolor="black")<br> plt.title(name)<br> plt.xlabel("Value")<br> plt.ylabel("Frequency")<br> plt.axvline(calculate_mean(lst), color='r', linestyle='dashed', linewidth=1, label = "Mean")<br> plt.axvline(calculate_median(lst, outliers), color='g', linestyle='dashed', linewidth=1, label = "Median")<br> try:<br> plt.axvline(calculate_mode(lst, outliers), color='orange', linestyle='dashed', linewidth=1, label = "Mode")<br> except:<br> print("Plotting mode failed.")<br> plt.legend()<br> plt.show()<br><br>def print_all(lst):<br> print("Vocabulary:", set(lst))<br><br>def find_outliers_of(lst):<br> outliers = []<br> counts = []<br> for i in lst:<br> if lst.count(i) == 1:<br> outliers.append(i)<br> counts.append(outliers)<br> outliers = []<br> for i in outliers:<br> j = lst.count(i)<br> if j <= (sum(counts) // len(counts)):<br> outliers.append(i)<br> return outliers<br><br>def calculate_mean_outliers(lst):<br> return sum(lst) / (len(lst) - 1)<br><br>def calculate_mean_inliers(lst, outliers = None):<br> if outliers is None:<br> outliers = find_outliers_of(lst)<br> inliers = []<br> for i in range(len(lst)):<br> if lst[i] in outliers:<br> outliers.append(lst[i])<br> else:<br> inliers.append(lst[i])<br> return sum(inliers) / (len(inliers) - 1)<br>```<br><br>​
Comments (25) 45522 👁️