python-machine-learning/Statistics.py at main · KoVoidG/python-machine-learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import streamlit as st
import pandas  as pd
import numpy as np
import statistics
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

file = 'KFC.xlsx'
sheet = 'Clean_data'
df = pd.read_excel(file, sheet_name=sheet)

sheet = 'Addon'
df2 = pd.read_excel(file, sheet_name=sheet)

sheet = 'Promotion'
df3 = pd.read_excel(file, sheet_name=sheet)

st.header("Statistical Summary")
st.subheader("Gender Distribution Percentage")
gender_data = df['gender'].value_counts().sort_index()
total = len(df)
mode_value = df['gender'].mode()[0]
col1, col2 = st.columns(2)
with col1:
    st.metric("Total Customers", total)
with col2:
    st.metric("Mode (Most Common)", mode_value)

st.divider()

st.subheader("**Frequency Distribution:**")
for gender, count in gender_data.items():
    percentage = (count / total) * 100
    st.write(f"• **{gender}**: {count} customers ({percentage:.1f}%)")
st.divider()

st.subheader("Nationality Distribution")
nationality_df = df['nationality'].value_counts().sort_index()
mode_value = df['nationality'].mode()[0]
col1, col2 = st.columns(2)
with col1:
    st.metric("Total Count", total)
with col2:
    st.metric("Mode (Most Common)", mode_value)

st.write("---")
st.subheader("Frequency Distribution:")
for nationality_df, count in nationality_df.items():
    percentage = (count / total) * 100
    st.write(f"• **{nationality_df}**: {count} customers ({percentage:.1f}%)")
st.divider()

# DINE TYPE
st.subheader(" Dine Type Distribution")
dinetype_counts = df['dineType'].value_counts().sort_index()
mode_value = df['dineType'].mode()[0]
st.write("Mode:", mode_value)
for dinetype, count in dinetype_counts.items():
    percentage = (count / total) * 100
    st.write(f"• **{dinetype}**: {count} customers ({percentage:.1f}%)")
st.divider()

st.subheader("Payment Distribution")
payment_counts = df['payment'].value_counts().sort_index()
mode_value = df['payment'].mode()[0]
st.write("Mode:", mode_value)

for payment, count in payment_counts.items():
    percentage = (count / total) * 100
    st.write(f"• **{payment}**: {count} customers ({percentage:.1f}%)")

st.divider()

# Age Statistical Summary
st.subheader("Statistical Summary of Age")

# Mapping: 1=under 18, 2=18-22, 3=23-27, 4=28-35, 5=above 35

median_age = df['age_enc'].median()
mode_age= df['age_enc'].mode()[0]

st.write('Median (Middle Budget Index):', median_age)
st.write("Mode (Most Frequent Budget Index):", mode_age)
st.write('*Interpretation*')
st.markdown('''
The age data was encoded into five categories,
where 1 = under 18, 2 = 18–22, 3 = 23–27, 4 = 28–35, and 5 = above 35.
Based on this, both the median (2.0) and mode (2) fall within the 18–22 age group,
indicating that this is both the middle and most common age range among respondents.
This suggests that the majority of participants are young adults,
with responses largely concentrated in this age group.''')

st.divider()

# Budget Statistical Summary
st.subheader("Statistical Summary of Spending Budget")

# Mapping: 1=Below 100, 2=100-199, 3=200-299, 4=300+

median_budget = df['budget_enc'].median()
mode_budget = df['budget_enc'].mode()[0]

st.write('Median (Middle Budget Index):', median_budget)
st.write("Mode (Most Frequent Budget Index):", mode_budget)
st.write('*Interpretation*')
st.markdown('''
The budget data was first encoded into four categories,
where (1 = below 100, 2 = 100–199, 3 = 200–299, and 4 = 300+).
Based on this, both the median (2.0) and mode (2) fall in the 100–199 range,
indicating that this is both the middle and most common budget among respondents.
This suggests that most individuals have a moderate budget,
with spending behavior largely concentrated within this range.''')

st.divider()

# Visit Frequency Statistical Summary
st.subheader("Statistical Summary of Visit Frequency")

# Mapping: 1=Rarely, 2=Sometimes, 3=Often, 4=Very Often

median_freq = df['visitFrequency_enc'].median()
mode_freq = df['visitFrequency_enc'].mode()[0]

st.write('Median (Middle Frequency Index):', median_freq)
st.write("Mode (Most Frequent Frequency Index):", mode_freq)

st.write('*Interpretation*')
st.markdown('''
The visit frequency data was encoded into four categories,
where 1 = Rarely, 2 = Sometimes, 3 = Often, and 4 = Very Often.
Based on this, both the median (2.0) and mode (2) fall under “Sometimes”,
indicating that this is both the middle and most common response among participants.
This suggests that most individuals visit occasionally rather than frequently,
showing a moderate level of engagement.''')

st.divider()

# Calculate values for flavor and service ratings
mean_flavor = df['flavorRating'].mean()
median_flavor = df['flavorRating'].median()
std_flavor = df['flavorRating'].std()
mode_flavor = df['flavorRating'].mode()[0]

mean_service = df['serviceRating'].mean()
median_service = df['serviceRating'].median()
std_service = df['serviceRating'].std()
mode_service = df['serviceRating'].mode()[0]

# Displaying the results in two columns
col1, col2 = st.columns(2)

with col1:
    st.markdown("**Flavor Rating Statistics**")
    # st.write('Mean:', round(mean_flavor, 2))
    st.write('Median:', median_flavor)
    # st.write('Standard Deviation:', round(std_flavor, 4))
    st.write("Mode:", mode_flavor)

with col2:
    st.markdown("**Service Rating Statistics**")
    # st.write('Mean:', round(mean_service, 2))
    st.write('Median:', median_service)
    # st.write('Standard Deviation:', round(std_service, 4))
    st.write("Mode:", mode_service)

st.divider()