Taylor Kirk commited on
Commit
4edde41
·
1 Parent(s): 168cd0b

Fixing errors

Browse files
tabs/main_page_tabs/dataset_overview.py CHANGED
@@ -11,16 +11,9 @@ import pandas as pd
11
  import os
12
  from pathlib import Path
13
  from utils.icons import lucide_icon
14
- from utils.load_data import load_dataset
15
 
16
- def find_data_dir():
17
- current = Path(__file__).resolve()
18
- for parent in current.parents:
19
- if (parent / "review_data").exists():
20
- return parent / "review_data"
21
- return None
22
-
23
- DATA_DIR = find_data_dir()
24
 
25
  def render():
26
  st.sidebar.header('Data')
 
11
  import os
12
  from pathlib import Path
13
  from utils.icons import lucide_icon
14
+ from utils.load_data import get_data_directory
15
 
16
+ DATA_DIR = get_data_directory()
 
 
 
 
 
 
 
17
 
18
  def render():
19
  st.sidebar.header('Data')
tabs/predictive_model_tabs/pred_model_two.py CHANGED
@@ -8,7 +8,11 @@ def render():
8
  model = load_model() # Using the cached function
9
  df = load_demo_data()
10
  model_columns = ['lemma_title', 'lemma_text', 'images', 'Review Length', 'Title Length']
11
- X = df[model_columns]
 
 
 
 
12
  y = df['vote']
13
  st.session_state.demo_probs = model.predict_proba(X)
14
 
 
8
  model = load_model() # Using the cached function
9
  df = load_demo_data()
10
  model_columns = ['lemma_title', 'lemma_text', 'images', 'Review Length', 'Title Length']
11
+
12
+ X = df[model_columns].copy()
13
+ X['lemma_title'] = X['lemma_title'].fillna("")
14
+ X['lemma_text'] = X['lemma_text'].fillna("")
15
+
16
  y = df['vote']
17
  st.session_state.demo_probs = model.predict_proba(X)
18
 
utils/load_data.py CHANGED
@@ -1,6 +1,7 @@
1
  import pandas as pd
2
  import streamlit as st
3
  from pathlib import Path
 
4
  import os
5
 
6
  @st.cache_data(show_spinner="Loading data...⏳")
@@ -21,4 +22,14 @@ def load_dataset(path: str | Path, category: str | None=None):
21
  else:
22
  df = pd.read_parquet(path)
23
 
24
- return df
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import streamlit as st
3
  from pathlib import Path
4
+ from huggingface_hub import snapshot_download
5
  import os
6
 
7
  @st.cache_data(show_spinner="Loading data...⏳")
 
22
  else:
23
  df = pd.read_parquet(path)
24
 
25
+ return df
26
+
27
+
28
+ @st.cache_resource
29
+ def get_data_directory():
30
+ # This downloads the whole review_data folder from your Dataset repo
31
+ data_path = snapshot_download(
32
+ repo_id="tkbarb10/ads505-review-data",
33
+ repo_type="dataset"
34
+ )
35
+ return Path(data_path)
utils/topically.py CHANGED
@@ -10,6 +10,7 @@ from sklearn.decomposition import NMF
10
  from sklearn.feature_extraction.text import TfidfVectorizer
11
  from sklearn.pipeline import make_pipeline, Pipeline
12
  from utils.build_plotly import _build_topic_figure
 
13
 
14
  import plotly.graph_objects as go # type: ignore
15
 
@@ -22,17 +23,6 @@ from utils.remove_html import remove_html_tags
22
  # ROOT = Path(__file__).resolve().parents[1]
23
  # DEFAULT_DATA_DIR = ROOT / "review_data"
24
 
25
- from huggingface_hub import snapshot_download
26
-
27
- @st.cache_resource
28
- def get_data_directory():
29
- # This downloads the whole review_data folder from your Dataset repo
30
- data_path = snapshot_download(
31
- repo_id="tkbarb10/ads505-review-data",
32
- repo_type="dataset"
33
- )
34
- return Path(data_path) / "review_data"
35
-
36
  DEFAULT_DATA_DIR = get_data_directory()
37
 
38
  COLOR_WHEEL = {
 
10
  from sklearn.feature_extraction.text import TfidfVectorizer
11
  from sklearn.pipeline import make_pipeline, Pipeline
12
  from utils.build_plotly import _build_topic_figure
13
+ from load_data import get_data_directory
14
 
15
  import plotly.graph_objects as go # type: ignore
16
 
 
23
  # ROOT = Path(__file__).resolve().parents[1]
24
  # DEFAULT_DATA_DIR = ROOT / "review_data"
25
 
 
 
 
 
 
 
 
 
 
 
 
26
  DEFAULT_DATA_DIR = get_data_directory()
27
 
28
  COLOR_WHEEL = {