Spaces:
Runtime error
Runtime error
| import torch | |
| import gradio as gr | |
| from PIL import Image | |
| import whisper | |
| from transformers import AutoProcessor, AutoModelForImageTextToText | |
| processor = AutoProcessor.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True) | |
| model = AutoModelForImageTextToText.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| whisper_model = whisper.load_model("base") | |
| def build_instruction(user_text): | |
| return f"You are a professional AI prompt engineer. Convert the input into a highly detailed AI generation prompt. Include: Subject, Environment, Summary. Input: {user_text}\nReturn only the final prompt." | |
| def text_to_prompt(user_text): | |
| instruction = build_instruction(user_text) | |
| inputs = processor(text=instruction, return_tensors="pt").to(device) | |
| input_len = inputs.input_ids.shape[1] | |
| output = model.generate(**inputs, max_new_tokens=200) | |
| return processor.decode(output[0][input_len:], skip_special_tokens=True).strip() | |
| def image_text_to_prompt(image_path, user_text): | |
| if not user_text: | |
| user_text = "Describe this image in detail." | |
| image = Image.open(image_path).convert("RGB") | |
| instruction = build_instruction(user_text) | |
| inputs = processor(images=[image], text=instruction, return_tensors="pt").to(device) | |
| input_len = inputs.input_ids.shape[1] | |
| output = model.generate(**inputs, max_new_tokens=200) | |
| return processor.decode(output[0][input_len:], skip_special_tokens=True).strip() | |
| def audio_to_prompt(audio_path): | |
| result = whisper_model.transcribe(audio_path) | |
| return text_to_prompt(result["text"]) | |
| def generate_prompt_ui(input_type, text, image, audio): | |
| try: | |
| if input_type == "Text": | |
| return text_to_prompt(text) | |
| elif input_type == "Image + Text": | |
| return image_text_to_prompt(image, text) | |
| elif input_type == "Audio": | |
| return audio_to_prompt(audio) | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Gradio UI setup | |
| with gr.Blocks() as app: | |
| gr.Markdown("# 🧠 Janus-Pro Prompt Generator") | |
| input_type = gr.Radio(["Text", "Image + Text", "Audio"], label="Select Input Type", value="Text") | |
| text_input = gr.Textbox(label="Enter your idea") | |
| image_input = gr.Image(type="filepath", label="Upload Image", visible=False) | |
| audio_input = gr.Audio(type="filepath", label="Upload Audio", visible=False) | |
| output = gr.Textbox(label="Generated Prompt") | |
| btn = gr.Button("Generate 🚀") | |
| def toggle(choice): | |
| return ( | |
| gr.update(visible=(choice != "Audio")), | |
| gr.update(visible=(choice == "Image + Text")), | |
| gr.update(visible=(choice == "Audio")) | |
| ) | |
| input_type.change(toggle, input_type, [text_input, image_input, audio_input]) | |
| btn.click(generate_prompt_ui, [input_type, text_input, image_input, audio_input], output) | |
| app.launch() |