Spaces:

ziem-io
/

whisky-wheel

Running on CPU Upgrade

App Files Files

ziem-io commited on Dec 19, 2025

Commit

9a2984d

1 Parent(s): b81d9a3

Update: Refactor code

Browse files

Files changed (2) hide show

app.py +50 -37
lib/bert_regressor_utils.py +26 -24

app.py CHANGED Viewed

@@ -115,72 +115,81 @@ def _translate_en(text: str, target_lang: str = "EN-GB"):
     result = deepl_client.translate_text(text, target_lang=target_lang)
     return result.text
 ### Do actual prediction #########################################################
 @spaces.GPU(duration=10)  # Sekunden GPU-Zeit pro Call
 def predict(review_raw: str, do_cleanup: bool):
     review_raw = (review_raw or "").strip()
     is_translated = False
     html_info_out = ""
-    # Abort if no text if given
     if not review_raw:
-        # immer drei Outputs zurückgeben
         return "Please enter a review.", "", {}
-    # Check for lang of text
     review_is_eng, review_lang_prob = _is_eng(review_raw)
-    # Abort if text is not english
     if not review_is_eng:
         review_raw = _translate_en(review_raw)
-        html_info_out += f"""<strong style='margin-bottom: 0.5em'>Your text has been automatically translated:</strong>
-        <p>{review_raw}</p>
-        """
         is_translated = True
     prediction_flavours = {}
     prediction_flavours_list = [0, 0, 0, 0, 0, 0, 0, 0]
-    # Do actual predictions if is english and whisky note
     t_start_flavours = time.time()
-    # ✅ Default-Werte, damit spätere Nutzung immer safe ist
     review_clean = review_raw
     cleanup_meta = []
-    has_review = True  # ohne Cleanup gehen wir davon aus: kompletten Text klassifizieren
-     # ✅ Cleanup nur wenn Checkbox aktiv
     if do_cleanup:
-        review_clean, cleanup_meta, has_review = cleanup_tasting_note(
             review_raw,
             model_cleanup,
             tokenizer_cleanup,
             device
         )
-        html_info_out += (
-            "<strong style='margin-bottom: 0.5em; display:block;'>"
-            "BETA: Your text has been cleaned up:</strong>"
-            "<p>"
-        )
-        for s in cleanup_meta:
-            sent = html.escape(s.get("sentence", ""))
-            if s.get("is_note"):
-                html_info_out += f"{sent} "
-            else:
-                html_info_out += f"<span style='text-decoration: line-through; color: gray;'>{sent}</span> "
-        html_info_out += "</p>"
-        if not has_review:
             html_info_out += "<strong>No tasting notes detected.</strong>"
-    # ✅ Nur vorhersagen, wenn wir Tasting Notes haben (oder Cleanup aus ist)
-    if has_review:
         prediction_flavours = predict_flavours(
             review_clean,
             model_classify,
@@ -189,18 +198,21 @@ def predict(review_raw: str, do_cleanup: bool):
         )
         prediction_flavours_list = list(prediction_flavours.values())
     t_end_flavours = time.time()
     html_wheel_out = build_svg_with_values(prediction_flavours_list)
     json_out = {
         "result": dict(prediction_flavours.items()),
-        "range": { "min": 0, "max": 4 },
         "review": {
             "raw": review_raw,
             "clean": review_clean,
             "clean_meta": cleanup_meta,
-            "has_review": has_review,
         },
         "models": {
             "cleanup": MODEL_FILE_CLEANUP,
@@ -211,6 +223,7 @@ def predict(review_raw: str, do_cleanup: bool):
         "duration": round((t_end_flavours - t_start_flavours), 3),
     }
     return html_info_out, html_wheel_out, json_out
 ##################################################################################

     result = deepl_client.translate_text(text, target_lang=target_lang)
     return result.text
+def _render_cleanup_html(cleanup_meta):
+    """
+    Renders cleanup_meta into HTML with struck-through non-note sentences.
+    """
+    html_info_out = "<strong style='display:block'>Your text has been cleaned up:</strong><p>"
+    for s in cleanup_meta:
+        sent = html.escape(s.get("sentence", ""))
+        if s.get("is_note"):
+            html_out += f"{sent} "
+        else:
+            html_out += (
+                f"<span style='text-decoration: line-through; color: gray;'>"
+                f"{sent}</span> "
+            )
+    html_info_out += "</p>"
+    return html_out
 ### Do actual prediction #########################################################
 @spaces.GPU(duration=10)  # Sekunden GPU-Zeit pro Call
 def predict(review_raw: str, do_cleanup: bool):
+    # Normalize input (handle None and trim whitespace)
     review_raw = (review_raw or "").strip()
     is_translated = False
     html_info_out = ""
+    # Abort early if no text is provided
     if not review_raw:
         return "Please enter a review.", "", {}
+    # Detect language of the input text
     review_is_eng, review_lang_prob = _is_eng(review_raw)
+    # Automatically translate non-English text
     if not review_is_eng:
         review_raw = _translate_en(review_raw)
+        html_info_out += (
+            "<strong style='display:block'>Your text has been automatically translated:</strong>"
+            f"<p>{html.escape(review_raw)}</p>"
+        )
         is_translated = True
+    # Initialize prediction outputs
     prediction_flavours = {}
     prediction_flavours_list = [0, 0, 0, 0, 0, 0, 0, 0]
+    # Start timing the model inference
     t_start_flavours = time.time()
+    # Default values to ensure all variables are always defined
+    # Without cleanup enabled, the full text is treated as a tasting note
     review_clean = review_raw
     cleanup_meta = []
+    review_status = "review_only"
+    # Apply cleanup only if the checkbox is enabled
     if do_cleanup:
+        review_clean, cleanup_meta, review_status = cleanup_tasting_note(
             review_raw,
             model_cleanup,
             tokenizer_cleanup,
             device
         )
+        # Render cleanup visualization only if the text was actually modified
+        if review_status != "review_only":
+            html_info_out += _render_cleanup_html(cleanup_meta)
+        elif review_status == "noise_only":
             html_info_out += "<strong>No tasting notes detected.</strong>"
+    # Run flavour prediction only if review content is present
+    if (not do_cleanup) or (review_status in ("review_only", "mixed")):
         prediction_flavours = predict_flavours(
             review_clean,
             model_classify,
         )
         prediction_flavours_list = list(prediction_flavours.values())
+    # Stop timing inference
     t_end_flavours = time.time()
+    # Build the flavour wheel SVG
     html_wheel_out = build_svg_with_values(prediction_flavours_list)
+    # Prepare structured JSON output
     json_out = {
         "result": dict(prediction_flavours.items()),
+        "range": {"min": 0, "max": 4},
         "review": {
             "raw": review_raw,
             "clean": review_clean,
             "clean_meta": cleanup_meta,
+            "status": review_status
         },
         "models": {
             "cleanup": MODEL_FILE_CLEANUP,
         "duration": round((t_end_flavours - t_start_flavours), 3),
     }
+    # Return HTML info, flavour wheel, and JSON output
     return html_info_out, html_wheel_out, json_out
 ##################################################################################

lib/bert_regressor_utils.py CHANGED Viewed

@@ -271,45 +271,47 @@ def text_to_sentences(text):
 ###################################################################################
 def cleanup_tasting_note(text, model, tokenizer, device, threshold=0.5):
-    # Initialize an empty list to store sentences that are identified as tasting notes
     good_sentences = []
     scored_sentences = []
     sentences = text_to_sentences(text)
-    # Iterate through each sentence detected in the processed document
-    for sentence in sentences:
-        # Leere Schnipsel überspringen
         if not sentence:
             continue
-        # AI Filter section (Your Guardrail model)
-        # Predict if the current sentence is a review using the loaded model and tokenizer
         result = predict_is_review(sentence, model, tokenizer, device)
-        # Extract the probability score from the result and round it to 3 decimal places
         score = round(result["probability"], 3)
-        # If valid, append the clean sentence text to the list
         scored_sentences.append({
-            'is_note': score > threshold,
-            'score': score,
-            'sentence': sentence
-        })
-        # Check if the probability is greater than 50% (threshold for being a tasting note)
-        if score > threshold:
-            # If valid, append the clean sentence text to the list
             good_sentences.append(sentence)
     new_text = " ".join(good_sentences)
-    # True, wenn mindestens ein Satz als Tasting Note erkannt wurde
-    has_review = len(good_sentences) > 0
-    # Join all valid sentences into a single string separated by spaces and return it
-    return new_text, scored_sentences, has_review
 ###################################################################################

 ###################################################################################
 def cleanup_tasting_note(text, model, tokenizer, device, threshold=0.5):
     good_sentences = []
     scored_sentences = []
+    has_review = False
+    has_noise = False
     sentences = text_to_sentences(text)
+    for sentence in sentences:
         if not sentence:
             continue
         result = predict_is_review(sentence, model, tokenizer, device)
         score = round(result["probability"], 3)
+        is_note = score > threshold
         scored_sentences.append({
+            "is_note": is_note,
+            "score": score,
+            "sentence": sentence
+        })
+        if is_note:
             good_sentences.append(sentence)
+            has_review = True
+        else:
+            has_noise = True
     new_text = " ".join(good_sentences)
+    # ✅ Status bestimmen
+    if has_review and has_noise:
+        review_status = "mixed"
+    elif has_review:
+        review_status = "review_only"
+    elif has_noise:
+        review_status = "noise_only"
+    else:
+        review_status = "noise_only"  # leerer Text → effektiv kein Review
+    return new_text, scored_sentences, review_status
 ###################################################################################