| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| from scipy import stats |
|
|
| class Analyzer: |
| def perform_analysis(self, df): |
| analysis_type = st.selectbox("Select analysis type", |
| ["Descriptive Statistics", "Correlation Analysis", "Hypothesis Testing", "Custom Query"]) |
| |
| if analysis_type == "Descriptive Statistics": |
| st.write(df.describe()) |
| |
| if st.checkbox("Show additional statistics"): |
| st.write("Skewness:") |
| st.write(df.skew()) |
| st.write("Kurtosis:") |
| st.write(df.kurtosis()) |
| |
| elif analysis_type == "Correlation Analysis": |
| corr_matrix = df.corr() |
| st.write(corr_matrix) |
| |
| if st.checkbox("Show heatmap"): |
| fig = px.imshow(corr_matrix, color_continuous_scale='RdBu_r') |
| st.plotly_chart(fig) |
| |
| elif analysis_type == "Hypothesis Testing": |
| test_type = st.selectbox("Select test type", ["T-Test", "ANOVA", "Chi-Square"]) |
| |
| if test_type == "T-Test": |
| col1 = st.selectbox("Select first column", df.columns) |
| col2 = st.selectbox("Select second column", df.columns) |
| t_stat, p_value = stats.ttest_ind(df[col1], df[col2]) |
| st.write(f"T-statistic: {t_stat}") |
| st.write(f"P-value: {p_value}") |
| |
| elif test_type == "ANOVA": |
| grouping_col = st.selectbox("Select grouping column", df.columns) |
| value_col = st.selectbox("Select value column", df.columns) |
| groups = [group for name, group in df.groupby(grouping_col)[value_col]] |
| f_stat, p_value = stats.f_oneway(*groups) |
| st.write(f"F-statistic: {f_stat}") |
| st.write(f"P-value: {p_value}") |
| |
| elif test_type == "Chi-Square": |
| col1 = st.selectbox("Select first column", df.columns) |
| col2 = st.selectbox("Select second column", df.columns) |
| contingency_table = pd.crosstab(df[col1], df[col2]) |
| chi2, p_value, dof, expected = stats.chi2_contingency(contingency_table) |
| st.write(f"Chi-square statistic: {chi2}") |
| st.write(f"P-value: {p_value}") |
| |
| elif analysis_type == "Custom Query": |
| query = st.text_input("Enter a custom query (e.g., 'column_name > 5')") |
| if query: |
| try: |
| result = df.query(query) |
| st.write(result) |
| except Exception as e: |
| st.error(f"Error in query: {str(e)}") |