| {%- set system_prompt = system_prompt | default(" Transform the text provided by various speakers into speech output, utilizing the distinct voice of each respective speaker. |
| ") -%} |
| {{ system_prompt -}} |
| {%- set audio_bos_token = audio_bos_token | default("<|vision_start|>") %} |
| {%- set audio_eos_token = audio_eos_token | default("<|vision_end|>") %} |
| {%- set audio_diffusion_token = audio_diffusion_token | default("<|vision_pad|>") %} |
| {%- set ns = namespace(speakers_with_audio="") %} |
| {%- for message in messages %} |
| {%- set role = message['role'] %} |
| {%- set content = message['content'] %} |
| {%- set has_audio = content | selectattr('type', 'equalto', 'audio') | list | length > 0 %} |
| {%- if has_audio and role not in ns.speakers_with_audio %} |
| {%- set ns.speakers_with_audio = ns.speakers_with_audio + role + "," %} |
| {%- endif %} |
| {%- endfor %} |
| |
| {%- if ns.speakers_with_audio %} |
| {{ " Voice input: |
| " }} |
| {%- for speaker in ns.speakers_with_audio.rstrip(',').split(',') %} |
| {%- if speaker %} |
| Speaker {{ speaker }}:{{ audio_bos_token }}{{ audio_diffusion_token }}{{ audio_eos_token }}{{ " |
| " }} |
| {%- endif %} |
| {%- endfor %} |
| {%- endif %} |
| Text input:{{ " |
| " }} |
| |
| {%- for message in messages %} |
| {%- set role = message['role'] %} |
| {%- set text_items = message['content'] | selectattr('type', 'equalto', 'text') | list %} |
| {%- for item in text_items %} |
| Speaker {{ role }}: {{ item['text'] }}{{ " |
| " }} |
| {%- endfor %} |
| {%- endfor %} |
| Speech output:{{ " |
| " }}{{ audio_bos_token }} |