| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
|
|
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
|
|
|
|
| import streamlit as st |
| import nltk |
| import spacy |
| import benepar |
| from nltk import Tree |
| from graphviz import Digraph |
|
|
| |
| nltk_data_path = "/tmp/nltk_data" |
| nltk.data.path.append(nltk_data_path) |
| nltk.download('punkt', download_dir=nltk_data_path) |
|
|
| |
| nlp = spacy.load("en_core_web_sm") |
|
|
| |
| if "benepar" not in nlp.pipe_names: |
| benepar.download("benepar_en3") |
| nlp.add_pipe("benepar", config={"model": "benepar_en3"}) |
|
|
| |
| st.set_page_config( |
| page_title="๐ Syntax Parser Comparison Tool", |
| page_icon="๐", |
| layout="wide" |
| ) |
|
|
| |
| st.sidebar.title("โน๏ธ About This Tool") |
| st.sidebar.write(""" |
| Compare **Dependency Parsing**, **Constituency Parsing**, |
| and a simulated **Abstract Syntax Representation (ASR)**. |
| """) |
| st.sidebar.markdown("---") |
| st.sidebar.info("๐ก Enter a sentence in the input box to see all 3 parses.") |
|
|
| |
| st.title("๐ Syntax Parser Comparison Tool") |
| st.markdown(""" |
| This tool demonstrates **three parsing styles** side-by-side: |
| 1. **Dependency Parsing** โ Shows headโdependent word relationships. |
| 2. **Constituency Parsing** โ Displays hierarchical phrase structures. |
| 3. **Abstract Syntax Representation (ASR)** โ Merges phrase structure with dependency info. |
| """) |
|
|
| |
| sentence = st.text_input("โ๏ธ Enter a sentence:", "John eats an apple.") |
|
|
| if sentence: |
| doc = nlp(sentence) |
| sent = list(doc.sents)[0] |
|
|
| col1, col2, col3 = st.columns(3) |
|
|
| |
| with col1: |
| st.subheader("๐ Dependency Parsing") |
| dep_graph = Digraph() |
| dep_graph.attr(rankdir="TB") |
| for token in sent: |
| dep_graph.node(token.text, f"{token.text}\n({token.dep_})") |
| if token.head != token: |
| dep_graph.edge(token.head.text, token.text) |
| st.graphviz_chart(dep_graph) |
| with st.expander("Raw Dependency Tuples"): |
| st.code(" ".join(f"({t.text}, {t.dep_}, {t.head.text})" for t in sent)) |
|
|
| |
| with col2: |
| st.subheader("๐ณ Constituency Parsing") |
| tree_str = sent._.parse_string |
| with st.expander("Tree String"): |
| st.text(tree_str) |
| st.code(Tree.fromstring(tree_str).pformat(), language="text") |
|
|
| |
| with col3: |
| st.subheader("๐งฉ Simulated ASR Output") |
| st.markdown("Combines **dependency heads**, **POS tags**, and **phrase info**.") |
| highlighted_output = [] |
| for token in sent: |
| if token.dep_ in ("nsubj", "obj", "det", "ROOT"): |
| highlighted_output.append( |
| f"**[{token.text}]** - {token.dep_} โ {token.head.text} ({token.pos_})" |
| ) |
| st.write("\n".join(highlighted_output)) |
| with st.expander("ASR Encoded String"): |
| st.code( |
| " ".join(f"[{t.text}: {t.dep_} โ {t.head.text}]({t.pos_})" for t in sent) |
| ) |
|
|