Spaces:
Runtime error
Runtime error
| import llama_cpp | |
| import llama_cpp.llama_tokenizer | |
| from llama_cpp import Llama | |
| import gradio as gr | |
| from loguru import logger | |
| import psutil | |
| from ctransformers import AutoModelForCausalLM,AutoTokenizer | |
| prompt_template = """[INST] <<SYS>> | |
| You are a helpful assistant for a crowdfunding platform called GiveSendGo. Your goal is to gather essential information for campaign and generate a title and sample pitch of atleast 1000 words for the campaign. | |
| <</SYS>> | |
| {question} [/INST] | |
| """ | |
| model_loc = "models/llama-2-13b-chat.Q5_K_M.gguf" | |
| model_loc = "TheBloke/Llama-2-13B-chat-GGUF" | |
| llama = AutoModelForCausalLM.from_pretrained( | |
| model_loc, | |
| model_type="llama", | |
| context_length=4096, | |
| max_new_tokens=2048, | |
| hf=True | |
| # threads=cpu_count, | |
| ) | |
| # llama = llama_cpp.Llama.from_pretrained( | |
| # #repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", | |
| # #filename="*q8_0.gguf", | |
| # mode_path=model_loc, | |
| # model_type="llama", | |
| # context_length=4096, | |
| # max_new_tokens=2048, | |
| # filename="llama-2-13b-chat.Q5_K_M.gguf", | |
| # tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), | |
| # verbose=False | |
| # ) | |
| # llama = Llama( | |
| # model_path=model_loc, | |
| # max_tokens=4096, | |
| # n_ctx=4096, | |
| # verbose=False, | |
| # ) | |
| _ = [elm for elm in prompt_template.splitlines() if elm.strip()] | |
| stop_string = [elm.split(":")[0] + ":" for elm in _][-2] | |
| logger.debug(f"{stop_string=}") | |
| _ = psutil.cpu_count(logical=False) - 1 | |
| cpu_count: int = int(_) if _ else 1 | |
| logger.debug(f"{cpu_count=}") | |
| model = "gpt-3.5-turbo" | |
| def predict(message, history): | |
| messages = [] | |
| prompt = prompt_template.format(question=message) | |
| for user_message, assistant_message in history: | |
| messages.append({"role": "system", "content": prompt},) | |
| messages.append({"role": "user", "content": user_message}) | |
| messages.append({"role": "assistant", "content": assistant_message}) | |
| messages.append({"role": "user", "content": message}) | |
| response = llama.create_chat_completion_openai_v1( | |
| model=model, | |
| messages=messages, | |
| response_format={ | |
| "type": "json_object", | |
| "schema": { | |
| "type": "object", | |
| "properties": {"title": {"type": "string"}, | |
| #"description": {"type": "string"}, | |
| "sample_pitch": {"type": "string"}, | |
| "amount": {"type": "string"}, | |
| "location": {"type": "string"}}, | |
| "required": ["title","sample_pitch","amount","location"], #description | |
| }, | |
| }, | |
| temperature=0.7, | |
| stream=True | |
| ) | |
| text = "" | |
| for chunk in response: | |
| content = chunk.choices[0].delta.content | |
| if content: | |
| text += content | |
| yield text | |
| def generate(message): | |
| try: | |
| messages = [] | |
| prompt = prompt_template.format(question=message) | |
| #for user_message, assistant_message in history: | |
| messages.append({"role": "system", "content": prompt},) | |
| #messages.append({"role": "user", "content": user_message}) | |
| #messages.append({"role": "assistant", "content": assistant_message}) | |
| messages.append({"role": "user", "content": message}) | |
| response = llama.create_chat_completion_openai_v1( | |
| model=model, | |
| messages=messages, | |
| response_format={ | |
| "type": "json_object", | |
| "schema": { | |
| "type": "object", | |
| "properties": {"title": {"type": "string"}, | |
| #"description": {"type": "string"}, | |
| "sample_pitch": {"type": "string"}, | |
| "amount": {"type": "string"}, | |
| "location": {"type": "string"}}, | |
| "required": ["title","sample_pitch","amount","location"], #description | |
| }, | |
| }, | |
| temperature=0.7, | |
| stream=False) | |
| # text = "" | |
| # for chunk in response: | |
| # content = chunk.choices[0].delta.content | |
| # if content: | |
| # text += content | |
| # logger.debug(f"api: {content=}") | |
| # yield text | |
| logger.debug(f"{response}") | |
| return response.choices[0].delta.content | |
| except Exception as exc: | |
| logger.error(exc) | |
| response = f"{exc=}" | |
| def predict_api(message): | |
| logger.debug(f"{message=}") | |
| text = generate(message) | |
| logger.debug(f"text::{text=}") | |
| return f"json: {text=}" | |
| js = """function () { | |
| gradioURL = window.location.href | |
| if (!gradioURL.endsWith('?__theme=dark')) { | |
| window.location.replace(gradioURL + '?__theme=dark'); | |
| } | |
| }""" | |
| css = """ | |
| footer { | |
| visibility: hidden; | |
| } | |
| full-height { | |
| height: 100%; | |
| } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css) as demo: | |
| gr.ChatInterface(predict, examples=["What is the capital of France?", "Who was the first person on the moon?"]) | |
| with gr.Accordion("For Chat/Translation API", open=False, visible=False): | |
| input_text = gr.Text() | |
| api_btn = gr.Button("Go", variant="primary") | |
| out_text = gr.Text() | |
| api_btn.click( | |
| predict_api, | |
| input_text, | |
| out_text, | |
| api_name="api", | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch(debug=True, share=True) | |