File size: 3,094 Bytes
3ed9870
 
 
 
 
 
 
 
8428754
3ed9870
 
 
8428754
3ed9870
 
 
 
 
 
 
 
 
 
 
8428754
3ed9870
 
 
8428754
 
 
ccc2666
8428754
3ed9870
 
4839ba0
3ed9870
 
7a437ab
3ed9870
8428754
3ed9870
8428754
 
 
 
 
 
 
 
 
3ed9870
 
 
 
 
 
8428754
3ed9870
8428754
 
 
7a437ab
8428754
 
 
 
461d454
8428754
3ed9870
 
 
 
8428754
3ed9870
8428754
3ed9870
 
8428754
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
import numpy as np
import json
import requests
import gradio as gr
import logging

# Initialize the tokenizer and model
bert_tokenizer = BertTokenizer.from_pretrained('MultiTokenizer_ep10')
bert_model = TFBertForSequenceClassification.from_pretrained('MultiModel_ep10')

# Function to send results to API
# def send_results_to_api(data, result_url):
#     headers = {'Content-Type':'application/json'}
#     response = requests.post(result_url, json = data, headers=headers)
#     if response.status_code == 200:
#         return response.json
#     else:
#         return {'error':f"failed to send result to API: {response.status_code}"}

def predict_text(params):
    try:
        params = json.loads(params)
    except json.JSONDecodeError as e:
        logging.error(f"Invalid JSON input: {e.msg} at line {e.lineno} column {e.colno}")
        return {"error": f"Invalid JSON input: {e.msg} at line {e.lineno} column {e.colno}"}
        
    texts = params.get("urls", [])
    if not params.get("normalfileID", []):
        file_ids = [None] * len(texts)
    else:
        file_ids = params.get("normalfileID", [])

    if not texts:
        return {"error": "Missing required parameters: 'texts'"}

    solutions = []
    confidence_threshold = 0.85  # Define your confidence threshold

    for text, file_id in zip(texts, file_ids):
        encoding = bert_tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=128,
            return_token_type_ids=True,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='tf'
        )
        input_ids = encoding['input_ids']
        token_type_ids = encoding['token_type_ids']
        attention_mask = encoding['attention_mask']

        pred = bert_model.predict([input_ids, token_type_ids, attention_mask])
        logits = pred.logits
        softmax_scores = tf.nn.softmax(logits, axis=1).numpy()[0]
        pred_label = tf.argmax(logits, axis=1).numpy()[0]

        # Get the confidence score for the predicted label
        confidence = softmax_scores[pred_label]
        print(confidence)
        # If confidence is below the threshold, set answer to None
        if confidence < confidence_threshold:
            pred_label = 7  # Set to 'None' class

        label = {0: 'BUSINESS', 1: 'COMEDY', 2: 'CRIME', 3: 'FOOD & DRINK', 4: 'POLITICS', 5: 'SPORTS', 6: 'TRAVEL', 7: 'None'}
        result = {'text': text, 'answer': [label[pred_label]], "qcUser": None, "normalfileID": file_id}
        solutions.append(result)

    # result_url = f"{api}/{job_id}"
    # send_results_to_api(solutions, result_url)
    return json.dumps({"solutions": solutions})

inputt = gr.Textbox(label="Parameters in Json Format... Eg. {'texts':['text1', 'text2']}")
outputt = gr.JSON()

application = gr.Interface(fn=predict_text, inputs=inputt, outputs=outputt, title='Multi Text Classification with API Integration..')
application.launch()