Spaces:
Sleeping
Sleeping
Taylor Kirk
commited on
Commit
·
4edde41
1
Parent(s):
168cd0b
Fixing errors
Browse files
tabs/main_page_tabs/dataset_overview.py
CHANGED
|
@@ -11,16 +11,9 @@ import pandas as pd
|
|
| 11 |
import os
|
| 12 |
from pathlib import Path
|
| 13 |
from utils.icons import lucide_icon
|
| 14 |
-
from utils.load_data import
|
| 15 |
|
| 16 |
-
|
| 17 |
-
current = Path(__file__).resolve()
|
| 18 |
-
for parent in current.parents:
|
| 19 |
-
if (parent / "review_data").exists():
|
| 20 |
-
return parent / "review_data"
|
| 21 |
-
return None
|
| 22 |
-
|
| 23 |
-
DATA_DIR = find_data_dir()
|
| 24 |
|
| 25 |
def render():
|
| 26 |
st.sidebar.header('Data')
|
|
|
|
| 11 |
import os
|
| 12 |
from pathlib import Path
|
| 13 |
from utils.icons import lucide_icon
|
| 14 |
+
from utils.load_data import get_data_directory
|
| 15 |
|
| 16 |
+
DATA_DIR = get_data_directory()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def render():
|
| 19 |
st.sidebar.header('Data')
|
tabs/predictive_model_tabs/pred_model_two.py
CHANGED
|
@@ -8,7 +8,11 @@ def render():
|
|
| 8 |
model = load_model() # Using the cached function
|
| 9 |
df = load_demo_data()
|
| 10 |
model_columns = ['lemma_title', 'lemma_text', 'images', 'Review Length', 'Title Length']
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
y = df['vote']
|
| 13 |
st.session_state.demo_probs = model.predict_proba(X)
|
| 14 |
|
|
|
|
| 8 |
model = load_model() # Using the cached function
|
| 9 |
df = load_demo_data()
|
| 10 |
model_columns = ['lemma_title', 'lemma_text', 'images', 'Review Length', 'Title Length']
|
| 11 |
+
|
| 12 |
+
X = df[model_columns].copy()
|
| 13 |
+
X['lemma_title'] = X['lemma_title'].fillna("")
|
| 14 |
+
X['lemma_text'] = X['lemma_text'].fillna("")
|
| 15 |
+
|
| 16 |
y = df['vote']
|
| 17 |
st.session_state.demo_probs = model.predict_proba(X)
|
| 18 |
|
utils/load_data.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
import streamlit as st
|
| 3 |
from pathlib import Path
|
|
|
|
| 4 |
import os
|
| 5 |
|
| 6 |
@st.cache_data(show_spinner="Loading data...⏳")
|
|
@@ -21,4 +22,14 @@ def load_dataset(path: str | Path, category: str | None=None):
|
|
| 21 |
else:
|
| 22 |
df = pd.read_parquet(path)
|
| 23 |
|
| 24 |
-
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
import streamlit as st
|
| 3 |
from pathlib import Path
|
| 4 |
+
from huggingface_hub import snapshot_download
|
| 5 |
import os
|
| 6 |
|
| 7 |
@st.cache_data(show_spinner="Loading data...⏳")
|
|
|
|
| 22 |
else:
|
| 23 |
df = pd.read_parquet(path)
|
| 24 |
|
| 25 |
+
return df
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
@st.cache_resource
|
| 29 |
+
def get_data_directory():
|
| 30 |
+
# This downloads the whole review_data folder from your Dataset repo
|
| 31 |
+
data_path = snapshot_download(
|
| 32 |
+
repo_id="tkbarb10/ads505-review-data",
|
| 33 |
+
repo_type="dataset"
|
| 34 |
+
)
|
| 35 |
+
return Path(data_path)
|
utils/topically.py
CHANGED
|
@@ -10,6 +10,7 @@ from sklearn.decomposition import NMF
|
|
| 10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 11 |
from sklearn.pipeline import make_pipeline, Pipeline
|
| 12 |
from utils.build_plotly import _build_topic_figure
|
|
|
|
| 13 |
|
| 14 |
import plotly.graph_objects as go # type: ignore
|
| 15 |
|
|
@@ -22,17 +23,6 @@ from utils.remove_html import remove_html_tags
|
|
| 22 |
# ROOT = Path(__file__).resolve().parents[1]
|
| 23 |
# DEFAULT_DATA_DIR = ROOT / "review_data"
|
| 24 |
|
| 25 |
-
from huggingface_hub import snapshot_download
|
| 26 |
-
|
| 27 |
-
@st.cache_resource
|
| 28 |
-
def get_data_directory():
|
| 29 |
-
# This downloads the whole review_data folder from your Dataset repo
|
| 30 |
-
data_path = snapshot_download(
|
| 31 |
-
repo_id="tkbarb10/ads505-review-data",
|
| 32 |
-
repo_type="dataset"
|
| 33 |
-
)
|
| 34 |
-
return Path(data_path) / "review_data"
|
| 35 |
-
|
| 36 |
DEFAULT_DATA_DIR = get_data_directory()
|
| 37 |
|
| 38 |
COLOR_WHEEL = {
|
|
|
|
| 10 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 11 |
from sklearn.pipeline import make_pipeline, Pipeline
|
| 12 |
from utils.build_plotly import _build_topic_figure
|
| 13 |
+
from load_data import get_data_directory
|
| 14 |
|
| 15 |
import plotly.graph_objects as go # type: ignore
|
| 16 |
|
|
|
|
| 23 |
# ROOT = Path(__file__).resolve().parents[1]
|
| 24 |
# DEFAULT_DATA_DIR = ROOT / "review_data"
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
DEFAULT_DATA_DIR = get_data_directory()
|
| 27 |
|
| 28 |
COLOR_WHEEL = {
|