| import gradio as gr |
| import pandas as pd |
| import re |
|
|
| |
| def check_spacing_around_dollar(df): |
| errors = [] |
| for i, row in df.iterrows(): |
| for col in df.columns: |
| text = str(row[col]) |
| matches = list(re.finditer(r'(\s?\$\S*?\s?|\s?\S*?\$)', text)) |
| for match in matches: |
| if not (text[match.start() - 1] == ' ' and text[match.end()] == ' '): |
| errors.append(f"行 {i+1} 列 '{col}': '{text}'") |
| return errors |
|
|
| |
| def check_spacing_between_dollars(df): |
| errors = [] |
| for i, row in df.iterrows(): |
| for col in df.columns: |
| text = str(row[col]) |
| matches = list(re.finditer(r'\$\S+?(?=\$)', text)) |
| for match in matches: |
| if text[match.end()] != ' ' and text[match.start() - 1] != ' ': |
| errors.append(f"行 {i+1} 列 '{col}': '{text}'") |
| return errors |
|
|
| |
| def check_numbers_surrounded_by_dollar(df): |
| errors = [] |
| for i, row in df.iterrows(): |
| for col in df.columns: |
| text = str(row[col]) |
| matches = list(re.finditer(r'\b\d+\b', text)) |
| for match in matches: |
| if not (text[match.start() - 1] == '$' and text[match.end()] == '$'): |
| errors.append(f"行 {i+1} 列 '{col}': '{text}'") |
| return errors |
|
|
| |
| def process_file(file): |
| if file.name.endswith('.csv'): |
| df = pd.read_csv(file.name) |
| elif file.name.endswith('.xlsx'): |
| df = pd.read_excel(file.name) |
| else: |
| return "只支持 CSV 和 XLSX 檔案" |
| |
| |
| errors1 = check_spacing_around_dollar(df) |
| errors2 = check_spacing_between_dollars(df) |
| errors3 = check_numbers_surrounded_by_dollar(df) |
| |
| return { |
| "第一個檢查": errors1, |
| "第二個檢查": errors2, |
| "第三個檢查": errors3 |
| } |
|
|
| |
| iface = gr.Interface( |
| fn=process_file, |
| inputs=gr.File(label="上傳 CSV 或 XLSX 檔案"), |
| outputs=gr.JSON(label="檢查結果"), |
| title="校對系統", |
| description="這個系統會檢查 CSV 或 XLSX 檔案中的格式錯誤,包括 $ 符號和數字的空格錯誤。" |
| ) |
|
|
| if __name__ == "__main__": |
| iface.launch() |
|
|