| #include <stdio.h> |
| #include <string.h> |
| #include <time.h> |
| #include <iostream> |
| #include <random> |
| #include <string> |
| #include <vector> |
|
|
| #include <inttypes.h> |
| #include <cinttypes> |
| #include <algorithm> |
|
|
| #include "model_adapter.h" |
|
|
| #include "flux.hpp" |
| #include "stable-diffusion.cpp" |
| #include "util.cpp" |
| #include "upscaler.cpp" |
| #include "model.cpp" |
| #include "zip.c" |
|
|
| #include "otherarch/utils.h" |
|
|
| |
| #include "stable-diffusion.h" |
|
|
| |
| #include "stb_image.h" |
|
|
| #define STB_IMAGE_WRITE_IMPLEMENTATION |
| #define STB_IMAGE_WRITE_STATIC |
| #include "stb_image_write.h" |
|
|
| |
| #include "stb_image_resize.h" |
|
|
| enum SDMode { |
| TXT2IMG, |
| IMG2IMG, |
| IMG2VID, |
| CONVERT, |
| MODE_COUNT |
| }; |
|
|
| struct SDParams { |
| int n_threads = -1; |
| SDMode mode = TXT2IMG; |
| std::string model_path; |
| std::string clip_l_path; |
| std::string clip_g_path; |
| std::string t5xxl_path; |
| std::string diffusion_model_path; |
| std::string vae_path; |
| std::string taesd_path; |
| std::string esrgan_path; |
| std::string controlnet_path; |
| std::string embeddings_path; |
| std::string stacked_id_embeddings_path; |
| std::string input_id_images_path; |
| sd_type_t wtype = SD_TYPE_COUNT; |
| std::string lora_model_dir; |
| std::string output_path = "output.png"; |
| std::string input_path; |
| std::string control_image_path; |
|
|
| std::string prompt; |
| std::string negative_prompt; |
| float min_cfg = 1.0f; |
| float cfg_scale = 7.0f; |
| float guidance = 3.5f; |
| float style_ratio = 20.f; |
| int clip_skip = -1; |
| int width = 512; |
| int height = 512; |
| int batch_count = 1; |
|
|
| int video_frames = 6; |
| int motion_bucket_id = 127; |
| int fps = 6; |
| float augmentation_level = 0.f; |
|
|
| sample_method_t sample_method = EULER_A; |
| schedule_t schedule = DEFAULT; |
| int sample_steps = 20; |
| float strength = 0.75f; |
| float control_strength = 0.9f; |
| rng_type_t rng_type = CUDA_RNG; |
| int64_t seed = 42; |
| bool verbose = false; |
| bool vae_tiling = false; |
| bool control_net_cpu = false; |
| bool normalize_input = false; |
| bool clip_on_cpu = false; |
| bool vae_on_cpu = false; |
| bool diffusion_flash_attn = false; |
| bool canny_preprocess = false; |
| bool color = false; |
| int upscale_repeats = 1; |
|
|
| std::vector<int> skip_layers = {7, 8, 9}; |
| float slg_scale = 0.; |
| float skip_layer_start = 0.01; |
| float skip_layer_end = 0.2; |
| }; |
|
|
| |
| int total_img_gens = 0; |
|
|
| |
| static SDParams * sd_params = nullptr; |
| static sd_ctx_t * sd_ctx = nullptr; |
| static int sddebugmode = 0; |
| static std::string recent_data = ""; |
|
|
| static std::string sdplatformenv, sddeviceenv, sdvulkandeviceenv; |
| static bool notiling = false; |
| static bool sd_is_quiet = false; |
|
|
| bool sdtype_load_model(const sd_load_model_inputs inputs) { |
| sd_is_quiet = inputs.quiet; |
| set_sd_quiet(sd_is_quiet); |
| executable_path = inputs.executable_path; |
| std::string taesdpath = ""; |
| std::string lorafilename = inputs.lora_filename; |
| std::string vaefilename = inputs.vae_filename; |
| std::string t5xxl_filename = inputs.t5xxl_filename; |
| std::string clipl_filename = inputs.clipl_filename; |
| std::string clipg_filename = inputs.clipg_filename; |
| notiling = inputs.notile; |
| printf("\nImageGen Init - Load Model: %s\n",inputs.model_filename); |
| if(lorafilename!="") |
| { |
| printf("With LoRA: %s at %f power\n",lorafilename.c_str(),inputs.lora_multiplier); |
| } |
| if(inputs.taesd) |
| { |
| taesdpath = executable_path + "taesd.embd"; |
| printf("With TAE SD VAE: %s\n",taesdpath.c_str()); |
| } |
| else if(vaefilename!="") |
| { |
| printf("With Custom VAE: %s\n",vaefilename.c_str()); |
| } |
| if(t5xxl_filename!="") |
| { |
| printf("With Custom T5-XXL Model: %s\n",t5xxl_filename.c_str()); |
| } |
| if(clipl_filename!="") |
| { |
| printf("With Custom Clip-L Model: %s\n",clipl_filename.c_str()); |
| } |
| if(clipg_filename!="") |
| { |
| printf("With Custom Clip-G Model: %s\n",clipg_filename.c_str()); |
| } |
|
|
| |
| int cl_parseinfo = inputs.clblast_info; |
| std::string usingclblast = "GGML_OPENCL_CONFIGURED="+std::to_string(cl_parseinfo>0?1:0); |
| putenv((char*)usingclblast.c_str()); |
| cl_parseinfo = cl_parseinfo%100; |
| int platform = cl_parseinfo/10; |
| int devices = cl_parseinfo%10; |
| sdplatformenv = "GGML_OPENCL_PLATFORM="+std::to_string(platform); |
| sddeviceenv = "GGML_OPENCL_DEVICE="+std::to_string(devices); |
| putenv((char*)sdplatformenv.c_str()); |
| putenv((char*)sddeviceenv.c_str()); |
| std::string vulkan_info_raw = inputs.vulkan_info; |
| std::string vulkan_info_str = ""; |
| for (size_t i = 0; i < vulkan_info_raw.length(); ++i) { |
| vulkan_info_str += vulkan_info_raw[i]; |
| if (i < vulkan_info_raw.length() - 1) { |
| vulkan_info_str += ","; |
| } |
| } |
| if(vulkan_info_str!="") |
| { |
| sdvulkandeviceenv = "GGML_VK_VISIBLE_DEVICES="+vulkan_info_str; |
| putenv((char*)sdvulkandeviceenv.c_str()); |
| } |
|
|
| sd_params = new SDParams(); |
| sd_params->model_path = inputs.model_filename; |
| sd_params->wtype = (inputs.quant==0?SD_TYPE_COUNT:SD_TYPE_Q4_0); |
| sd_params->n_threads = inputs.threads; |
| sd_params->input_path = ""; |
| sd_params->batch_count = 1; |
| sd_params->vae_path = vaefilename; |
| sd_params->taesd_path = taesdpath; |
| sd_params->t5xxl_path = t5xxl_filename; |
| sd_params->clip_l_path = clipl_filename; |
| sd_params->clip_g_path = clipg_filename; |
| |
| bool endswithgguf = (sd_params->model_path.rfind(".gguf") == sd_params->model_path.size() - 5); |
| if(sd_params->clip_l_path!="" && sd_params->t5xxl_path!="" && endswithgguf) |
| { |
| printf("\nSwap to Diffusion Model Path:%s",sd_params->model_path.c_str()); |
| sd_params->diffusion_model_path = sd_params->model_path; |
| sd_params->model_path = ""; |
| } |
|
|
| sddebugmode = inputs.debugmode; |
|
|
| set_sd_log_level(sddebugmode); |
|
|
| bool vae_decode_only = false; |
| bool free_param = false; |
| if(inputs.debugmode==1) |
| { |
| printf("\nMODEL:%s\nVAE:%s\nTAESD:%s\nCNET:%s\nLORA:%s\nEMBD:%s\nVAE_DEC:%d\nVAE_TILE:%d\nFREE_PARAM:%d\nTHREADS:%d\nWTYPE:%d\nRNGTYPE:%d\nSCHED:%d\nCNETCPU:%d\n\n", |
| sd_params->model_path.c_str(), |
| sd_params->vae_path.c_str(), |
| sd_params->taesd_path.c_str(), |
| sd_params->controlnet_path.c_str(), |
| sd_params->lora_model_dir.c_str(), |
| sd_params->embeddings_path.c_str(), |
| vae_decode_only, |
| sd_params->vae_tiling, |
| free_param, |
| sd_params->n_threads, |
| sd_params->wtype, |
| sd_params->rng_type, |
| sd_params->schedule, |
| sd_params->control_net_cpu); |
| } |
|
|
| sd_ctx = new_sd_ctx(sd_params->model_path.c_str(), |
| sd_params->clip_l_path.c_str(), |
| sd_params->clip_g_path.c_str(), |
| sd_params->t5xxl_path.c_str(), |
| sd_params->diffusion_model_path.c_str(), |
| sd_params->vae_path.c_str(), |
| sd_params->taesd_path.c_str(), |
| sd_params->controlnet_path.c_str(), |
| sd_params->lora_model_dir.c_str(), |
| sd_params->embeddings_path.c_str(), |
| sd_params->stacked_id_embeddings_path.c_str(), |
| vae_decode_only, |
| sd_params->vae_tiling, |
| free_param, |
| sd_params->n_threads, |
| sd_params->wtype, |
| sd_params->rng_type, |
| sd_params->schedule, |
| sd_params->clip_on_cpu, |
| sd_params->control_net_cpu, |
| sd_params->vae_on_cpu, |
| sd_params->diffusion_flash_attn); |
|
|
| if (sd_ctx == NULL) { |
| printf("\nError: KCPP SD Failed to create context!\nIf using Flux/SD3.5, make sure you have ALL files required (e.g. VAE, T5, Clip...) or baked in!\n"); |
| return false; |
| } |
|
|
| if(lorafilename!="" && inputs.lora_multiplier>0) |
| { |
| printf("\nApply LoRA...\n"); |
| |
| sd_ctx->sd->apply_lora_from_file(lorafilename,inputs.lora_multiplier); |
| } |
|
|
| return true; |
|
|
| } |
|
|
| std::string clean_input_prompt(const std::string& input) { |
| std::string result; |
| result.reserve(input.size()); |
| for (char ch : input) { |
| |
| if (static_cast<unsigned char>(ch) <= 0x7F || (ch >= 0xC2 && ch <= 0xF4)) { |
| result.push_back(ch); |
| } |
| } |
| |
| result = result.substr(0, 800); |
| return result; |
| } |
|
|
| static const char* sample_method_str[] = { |
| "euler_a", |
| "euler", |
| "heun", |
| "dpm2", |
| "dpm++2s_a", |
| "dpm++2m", |
| "dpm++2mv2", |
| "ipndm", |
| "ipndm_v", |
| "lcm", |
| }; |
|
|
| static const char* rng_type_to_str[] = { |
| "std_default", |
| "cuda", |
| }; |
|
|
| static std::string get_image_params(const SDParams& params, int64_t seed) { |
| std::string parameter_string = params.prompt + "\n"; |
| if (params.negative_prompt.size() != 0) { |
| parameter_string += "Negative prompt: " + params.negative_prompt + "\n"; |
| } |
| parameter_string += "Steps: " + std::to_string(params.sample_steps) + ", "; |
| parameter_string += "CFG scale: " + std::to_string(params.cfg_scale) + ", "; |
| if (params.slg_scale != 0 && params.skip_layers.size() != 0) { |
| parameter_string += "SLG scale: " + std::to_string(params.cfg_scale) + ", "; |
| parameter_string += "Skip layers: ["; |
| for (const auto& layer : params.skip_layers) { |
| parameter_string += std::to_string(layer) + ", "; |
| } |
| parameter_string += "], "; |
| parameter_string += "Skip layer start: " + std::to_string(params.skip_layer_start) + ", "; |
| parameter_string += "Skip layer end: " + std::to_string(params.skip_layer_end) + ", "; |
| } |
| parameter_string += "Guidance: " + std::to_string(params.guidance) + ", "; |
| parameter_string += "Seed: " + std::to_string(seed) + ", "; |
| parameter_string += "Size: " + std::to_string(params.width) + "x" + std::to_string(params.height) + ", "; |
| parameter_string += "Model: " + sd_basename(params.model_path) + ", "; |
| parameter_string += "RNG: " + std::string(rng_type_to_str[params.rng_type]) + ", "; |
| parameter_string += "Sampler: " + std::string(sample_method_str[params.sample_method]); |
| if (params.schedule == KARRAS) { |
| parameter_string += " karras"; |
| } |
| parameter_string += ", "; |
| parameter_string += "Version: KoboldCpp"; |
| return parameter_string; |
| } |
|
|
| sd_generation_outputs sdtype_generate(const sd_generation_inputs inputs) |
| { |
| sd_generation_outputs output; |
|
|
| if(sd_ctx == nullptr || sd_params == nullptr) |
| { |
| printf("\nWarning: KCPP image generation not initialized!\n"); |
| output.data = ""; |
| output.status = 0; |
| return output; |
| } |
| uint8_t * input_image_buffer = NULL; |
| sd_image_t * results; |
| sd_image_t* control_image = NULL; |
|
|
| |
| std::string cleanprompt = clean_input_prompt(inputs.prompt); |
| std::string cleannegprompt = clean_input_prompt(inputs.negative_prompt); |
| std::string img2img_data = std::string(inputs.init_images); |
| std::string sampler = inputs.sample_method; |
|
|
| sd_params->prompt = cleanprompt; |
| sd_params->negative_prompt = cleannegprompt; |
| sd_params->cfg_scale = inputs.cfg_scale; |
| sd_params->sample_steps = inputs.sample_steps; |
| sd_params->seed = inputs.seed; |
| sd_params->width = inputs.width; |
| sd_params->height = inputs.height; |
| sd_params->strength = inputs.denoising_strength; |
| sd_params->clip_skip = inputs.clip_skip; |
| sd_params->mode = (img2img_data==""?SDMode::TXT2IMG:SDMode::IMG2IMG); |
|
|
| |
| int biggestdim = (sd_params->width>sd_params->height?sd_params->width:sd_params->height); |
| auto loadedsdver = get_loaded_sd_version(sd_ctx); |
| if(loadedsdver==SDVersion::VERSION_FLUX) |
| { |
| sd_params->cfg_scale = 1; |
| if(sampler=="euler a"||sampler=="k_euler_a"||sampler=="euler_a") |
| { |
| sampler = "euler"; |
| } |
| } |
| int reslimit = (loadedsdver==SDVersion::VERSION_SD1 || loadedsdver==SDVersion::VERSION_SD2)?832:1024; |
| if(biggestdim > reslimit) |
| { |
| float scaler = (float)biggestdim / (float)reslimit; |
| int newwidth = (int)((float)sd_params->width / scaler); |
| int newheight = (int)((float)sd_params->height / scaler); |
| newwidth = newwidth - (newwidth%64); |
| newheight = newheight - (newheight%64); |
| sd_params->width = newwidth; |
| sd_params->height = newheight; |
| } |
| bool dotile = (sd_params->width>768 || sd_params->height>768) && !notiling; |
| set_sd_vae_tiling(sd_ctx,dotile); |
|
|
| |
| sd_image_t input_image = {0,0,0,nullptr}; |
| std::vector<uint8_t> image_buffer; |
| int nx, ny, nc; |
| int img2imgW = sd_params->width; |
| int img2imgH = sd_params->height; |
| int img2imgC = 3; |
| std::vector<uint8_t> resized_image_buf(img2imgW * img2imgH * img2imgC); |
|
|
| std::string ts = get_timestamp_str(); |
| if(!sd_is_quiet) |
| { |
| printf("\n[%s] Generating Image (%d steps)\n",ts.c_str(),inputs.sample_steps); |
| }else{ |
| printf("\n[%s] Generating (%d st.)\n",ts.c_str(),inputs.sample_steps); |
| } |
|
|
| fflush(stdout); |
|
|
| if(sampler=="euler a"||sampler=="k_euler_a"||sampler=="euler_a") |
| { |
| sd_params->sample_method = sample_method_t::EULER_A; |
| } |
| else if(sampler=="euler"||sampler=="k_euler") |
| { |
| sd_params->sample_method = sample_method_t::EULER; |
| } |
| else if(sampler=="heun"||sampler=="k_heun") |
| { |
| sd_params->sample_method = sample_method_t::HEUN; |
| } |
| else if(sampler=="dpm2"||sampler=="k_dpm_2") |
| { |
| sd_params->sample_method = sample_method_t::DPM2; |
| } |
| else if(sampler=="lcm"||sampler=="k_lcm") |
| { |
| sd_params->sample_method = sample_method_t::LCM; |
| } |
| else if(sampler=="dpm++ 2m karras" || sampler=="dpm++ 2m" || sampler=="k_dpmpp_2m") |
| { |
| sd_params->sample_method = sample_method_t::DPMPP2M; |
| } |
| else |
| { |
| sd_params->sample_method = sample_method_t::EULER_A; |
| } |
|
|
| if (sd_params->mode == TXT2IMG) { |
|
|
| if(!sd_is_quiet && sddebugmode==1) |
| { |
| printf("\nTXT2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%p\nCSTR:%f\n\n", |
| sd_params->prompt.c_str(), |
| sd_params->negative_prompt.c_str(), |
| sd_params->clip_skip, |
| sd_params->cfg_scale, |
| sd_params->width, |
| sd_params->height, |
| sd_params->sample_method, |
| sd_params->sample_steps, |
| (int)sd_params->seed, |
| sd_params->batch_count, |
| control_image, |
| sd_params->control_strength); |
| } |
|
|
|
|
| results = txt2img(sd_ctx, |
| sd_params->prompt.c_str(), |
| sd_params->negative_prompt.c_str(), |
| sd_params->clip_skip, |
| sd_params->cfg_scale, |
| sd_params->guidance, |
| sd_params->width, |
| sd_params->height, |
| sd_params->sample_method, |
| sd_params->sample_steps, |
| sd_params->seed, |
| sd_params->batch_count, |
| control_image, |
| sd_params->control_strength, |
| sd_params->style_ratio, |
| sd_params->normalize_input, |
| sd_params->input_id_images_path.c_str(), |
| sd_params->skip_layers.data(), |
| sd_params->skip_layers.size(), |
| sd_params->slg_scale, |
| sd_params->skip_layer_start, |
| sd_params->skip_layer_end); |
| } else { |
|
|
| if (sd_params->width <= 0 || sd_params->width % 64 != 0 || sd_params->height <= 0 || sd_params->height % 64 != 0) { |
| printf("\nKCPP SD: bad request image dimensions!\n"); |
| output.data = ""; |
| output.status = 0; |
| return output; |
| } |
|
|
| image_buffer = kcpp_base64_decode(img2img_data); |
|
|
| if(input_image_buffer!=nullptr) |
| { |
| stbi_image_free(input_image_buffer); |
| input_image_buffer = nullptr; |
| } |
|
|
| input_image_buffer = stbi_load_from_memory(image_buffer.data(), image_buffer.size(), &nx, &ny, &nc, 3); |
|
|
| if (nx < 64 || ny < 64 || nx > 1024 || ny > 1024 || nc!= 3) { |
| printf("\nKCPP SD: bad input image dimensions %d x %d!\n",nx,ny); |
| output.data = ""; |
| output.status = 0; |
| return output; |
| } |
| if (!input_image_buffer) { |
| printf("\nKCPP SD: load image from memory failed!\n"); |
| output.data = ""; |
| output.status = 0; |
| return output; |
| } |
|
|
| |
| int resok = stbir_resize_uint8(input_image_buffer, nx, ny, 0, resized_image_buf.data(), img2imgW, img2imgH, 0, img2imgC); |
| if (!resok) { |
| printf("\nKCPP SD: resize image failed!\n"); |
| output.data = ""; |
| output.status = 0; |
| return output; |
| } |
|
|
| input_image.width = img2imgW; |
| input_image.height = img2imgH; |
| input_image.channel = img2imgC; |
| input_image.data = resized_image_buf.data(); |
|
|
| if(!sd_is_quiet && sddebugmode==1) |
| { |
| printf("\nIMG2IMG PROMPT:%s\nNPROMPT:%s\nCLPSKP:%d\nCFGSCLE:%f\nW:%d\nH:%d\nSM:%d\nSTEP:%d\nSEED:%d\nBATCH:%d\nCIMG:%p\nSTR:%f\n\n", |
| sd_params->prompt.c_str(), |
| sd_params->negative_prompt.c_str(), |
| sd_params->clip_skip, |
| sd_params->cfg_scale, |
| sd_params->width, |
| sd_params->height, |
| sd_params->sample_method, |
| sd_params->sample_steps, |
| (int)sd_params->seed, |
| sd_params->batch_count, |
| control_image, |
| sd_params->strength); |
| } |
|
|
| results = img2img(sd_ctx, |
| input_image, |
| sd_params->prompt.c_str(), |
| sd_params->negative_prompt.c_str(), |
| sd_params->clip_skip, |
| sd_params->cfg_scale, |
| sd_params->guidance, |
| sd_params->width, |
| sd_params->height, |
| sd_params->sample_method, |
| sd_params->sample_steps, |
| sd_params->strength, |
| sd_params->seed, |
| sd_params->batch_count, |
| control_image, |
| sd_params->control_strength, |
| sd_params->style_ratio, |
| sd_params->normalize_input, |
| sd_params->input_id_images_path.c_str(), |
| sd_params->skip_layers.data(), |
| sd_params->skip_layers.size(), |
| sd_params->slg_scale, |
| sd_params->skip_layer_start, |
| sd_params->skip_layer_end); |
| } |
|
|
| if (results == NULL) { |
| printf("\nKCPP SD generate failed!\n"); |
| output.data = ""; |
| output.status = 0; |
| return output; |
| } |
|
|
|
|
| for (int i = 0; i < sd_params->batch_count; i++) { |
| if (results[i].data == NULL) { |
| continue; |
| } |
|
|
| int out_data_len; |
| unsigned char * png = stbi_write_png_to_mem(results[i].data, 0, results[i].width, results[i].height, results[i].channel, &out_data_len, get_image_params(*sd_params, sd_params->seed + i).c_str()); |
| if (png != NULL) |
| { |
| recent_data = kcpp_base64_encode(png,out_data_len); |
| free(png); |
| } |
|
|
| free(results[i].data); |
| results[i].data = NULL; |
| } |
|
|
| free(results); |
| output.data = recent_data.c_str(); |
| output.status = 1; |
| total_img_gens += 1; |
| return output; |
| } |
|
|