| from fastapi import FastAPI |
| from pydantic import BaseModel |
| import tensorflow as tf |
| from transformers import AutoTokenizer, TFAutoModel |
| from bs4 import BeautifulSoup |
| import hazm |
| import time |
|
|
| model_name="HooshvareLab/bert-base-parsbert-uncased" |
| tokenizer=AutoTokenizer.from_pretrained(model_name) |
| model=TFAutoModel.from_pretrained(model_name) |
|
|
| sent_tokenizer=hazm.SentenceTokenizer() |
| normalizer=hazm.Normalizer() |
|
|
|
|
| app=FastAPI() |
|
|
| class Input(BaseModel): |
| texts: list |
|
|
| @app.post("/get_vectors") |
| def get_vecs(data: Input): |
| now=time.time() |
| texts=data.texts |
| texts=list(map(lambda x: BeautifulSoup(x).get_text(), texts)) |
| texts=list(map(normalizer.normalize, texts)) |
| |
| tokens=tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=512) |
| outputs=model(**tokens) |
| |
| sentence_embedding=tf.reduce_mean(outputs.last_hidden_state, axis=1) |
| vecs=sentence_embedding.numpy().tolist() |
| |
| return {"vectors": vecs, "duration": time.time()-now} |