| #ifndef __DIFFUSION_MODEL_H__ |
| #define __DIFFUSION_MODEL_H__ |
|
|
| #include "flux.hpp" |
| #include "mmdit.hpp" |
| #include "unet.hpp" |
|
|
| struct DiffusionModel { |
| virtual void compute(int n_threads, |
| struct ggml_tensor* x, |
| struct ggml_tensor* timesteps, |
| struct ggml_tensor* context, |
| struct ggml_tensor* c_concat, |
| struct ggml_tensor* y, |
| struct ggml_tensor* guidance, |
| int num_video_frames = -1, |
| std::vector<struct ggml_tensor*> controls = {}, |
| float control_strength = 0.f, |
| struct ggml_tensor** output = NULL, |
| struct ggml_context* output_ctx = NULL, |
| std::vector<int> skip_layers = std::vector<int>()) = 0; |
| virtual void alloc_params_buffer() = 0; |
| virtual void free_params_buffer() = 0; |
| virtual void free_compute_buffer() = 0; |
| virtual void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) = 0; |
| virtual size_t get_params_buffer_size() = 0; |
| virtual int64_t get_adm_in_channels() = 0; |
| }; |
|
|
| struct UNetModel : public DiffusionModel { |
| UNetModelRunner unet; |
|
|
| UNetModel(ggml_backend_t backend, |
| std::map<std::string, enum ggml_type>& tensor_types, |
| SDVersion version = VERSION_SD1, |
| bool flash_attn = false) |
| : unet(backend, tensor_types, "model.diffusion_model", version, flash_attn) { |
| } |
|
|
| void alloc_params_buffer() { |
| unet.alloc_params_buffer(); |
| } |
|
|
| void free_params_buffer() { |
| unet.free_params_buffer(); |
| } |
|
|
| void free_compute_buffer() { |
| unet.free_compute_buffer(); |
| } |
|
|
| void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) { |
| unet.get_param_tensors(tensors, "model.diffusion_model"); |
| } |
|
|
| size_t get_params_buffer_size() { |
| return unet.get_params_buffer_size(); |
| } |
|
|
| int64_t get_adm_in_channels() { |
| return unet.unet.adm_in_channels; |
| } |
|
|
| void compute(int n_threads, |
| struct ggml_tensor* x, |
| struct ggml_tensor* timesteps, |
| struct ggml_tensor* context, |
| struct ggml_tensor* c_concat, |
| struct ggml_tensor* y, |
| struct ggml_tensor* guidance, |
| int num_video_frames = -1, |
| std::vector<struct ggml_tensor*> controls = {}, |
| float control_strength = 0.f, |
| struct ggml_tensor** output = NULL, |
| struct ggml_context* output_ctx = NULL, |
| std::vector<int> skip_layers = std::vector<int>()) { |
| (void)skip_layers; |
| return unet.compute(n_threads, x, timesteps, context, c_concat, y, num_video_frames, controls, control_strength, output, output_ctx); |
| } |
| }; |
|
|
| struct MMDiTModel : public DiffusionModel { |
| MMDiTRunner mmdit; |
|
|
| MMDiTModel(ggml_backend_t backend, |
| std::map<std::string, enum ggml_type>& tensor_types) |
| : mmdit(backend, tensor_types, "model.diffusion_model") { |
| } |
|
|
| void alloc_params_buffer() { |
| mmdit.alloc_params_buffer(); |
| } |
|
|
| void free_params_buffer() { |
| mmdit.free_params_buffer(); |
| } |
|
|
| void free_compute_buffer() { |
| mmdit.free_compute_buffer(); |
| } |
|
|
| void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) { |
| mmdit.get_param_tensors(tensors, "model.diffusion_model"); |
| } |
|
|
| size_t get_params_buffer_size() { |
| return mmdit.get_params_buffer_size(); |
| } |
|
|
| int64_t get_adm_in_channels() { |
| return 768 + 1280; |
| } |
|
|
| void compute(int n_threads, |
| struct ggml_tensor* x, |
| struct ggml_tensor* timesteps, |
| struct ggml_tensor* context, |
| struct ggml_tensor* c_concat, |
| struct ggml_tensor* y, |
| struct ggml_tensor* guidance, |
| int num_video_frames = -1, |
| std::vector<struct ggml_tensor*> controls = {}, |
| float control_strength = 0.f, |
| struct ggml_tensor** output = NULL, |
| struct ggml_context* output_ctx = NULL, |
| std::vector<int> skip_layers = std::vector<int>()) { |
| return mmdit.compute(n_threads, x, timesteps, context, y, output, output_ctx, skip_layers); |
| } |
| }; |
|
|
| struct FluxModel : public DiffusionModel { |
| Flux::FluxRunner flux; |
|
|
| FluxModel(ggml_backend_t backend, |
| std::map<std::string, enum ggml_type>& tensor_types, |
| bool flash_attn = false) |
| : flux(backend, tensor_types, "model.diffusion_model", flash_attn) { |
| } |
|
|
| void alloc_params_buffer() { |
| flux.alloc_params_buffer(); |
| } |
|
|
| void free_params_buffer() { |
| flux.free_params_buffer(); |
| } |
|
|
| void free_compute_buffer() { |
| flux.free_compute_buffer(); |
| } |
|
|
| void get_param_tensors(std::map<std::string, struct ggml_tensor*>& tensors) { |
| flux.get_param_tensors(tensors, "model.diffusion_model"); |
| } |
|
|
| size_t get_params_buffer_size() { |
| return flux.get_params_buffer_size(); |
| } |
|
|
| int64_t get_adm_in_channels() { |
| return 768; |
| } |
|
|
| void compute(int n_threads, |
| struct ggml_tensor* x, |
| struct ggml_tensor* timesteps, |
| struct ggml_tensor* context, |
| struct ggml_tensor* c_concat, |
| struct ggml_tensor* y, |
| struct ggml_tensor* guidance, |
| int num_video_frames = -1, |
| std::vector<struct ggml_tensor*> controls = {}, |
| float control_strength = 0.f, |
| struct ggml_tensor** output = NULL, |
| struct ggml_context* output_ctx = NULL, |
| std::vector<int> skip_layers = std::vector<int>()) { |
| return flux.compute(n_threads, x, timesteps, context, y, guidance, output, output_ctx, skip_layers); |
| } |
| }; |
|
|
| #endif |
|
|