| |
| |
| |
| |
|
|
| import gc |
| import torch |
| from visualization_utils import show_images |
|
|
| def _add_object( |
| pipe, |
| prompts, |
| seed_src, |
| seed_obj, |
| extended_scale, |
| source_latents, |
| structure_transfer_step, |
| subject_token, |
| blend_steps, |
| show_attention=False, |
| localization_model="attention_points_sam", |
| is_img_src=False, |
| img_src_latents=None, |
| use_offset=False, |
| display_output=False, |
| ): |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| out = pipe( |
| prompt=prompts, |
| guidance_scale=3.5 if (not is_img_src) else [1,3.5], |
| height=1024, |
| width=1024, |
| max_sequence_length=512, |
| num_inference_steps=30, |
| seed=[seed_src, seed_obj], |
| |
| |
| extended_scale=extended_scale, |
| extended_steps_multi=10, |
| extended_steps_single=20, |
| |
| |
| source_latents=source_latents, |
| structure_transfer_step=structure_transfer_step, |
| |
| |
| subject_token=subject_token, |
| localization_model=localization_model, |
| blend_steps=blend_steps, |
| show_attention=show_attention, |
| |
| |
| is_img_src=is_img_src, |
| img_src_latents=img_src_latents, |
| use_offset=use_offset, |
|
|
| |
| tqdm_desc="Running Addit: Generating Edited Image", |
| ) |
|
|
| if display_output: |
| show_images(out.images) |
|
|
| return out.images |
|
|
| def add_object_generated( |
| pipe, |
| prompt_source, |
| prompt_object, |
| subject_token, |
| seed_src, |
| seed_obj, |
| show_attention=False, |
| extended_scale=1.05, |
| structure_transfer_step=2, |
| blend_steps=[15], |
| localization_model="attention_points_sam", |
| display_output=False |
| ): |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| |
| print('Generating source image...') |
| source_image, source_latents = pipe( |
| prompt=[prompt_source], |
| guidance_scale=3.5, |
| height=1024, |
| width=1024, |
| max_sequence_length=512, |
| num_inference_steps=30, |
| seed=[seed_src], |
| output_type="both", |
| tqdm_desc="Generating Source Image", |
| ) |
| source_image = source_image[0] |
|
|
| |
| print('Running Addit...') |
| src_image, edited_image = _add_object( |
| pipe=pipe, |
| prompts=[prompt_source, prompt_object], |
| subject_token=subject_token, |
| seed_src=seed_src, |
| seed_obj=seed_obj, |
| source_latents=source_latents, |
| structure_transfer_step=structure_transfer_step, |
| extended_scale=extended_scale, |
| blend_steps=blend_steps, |
| show_attention=show_attention, |
| localization_model=localization_model, |
| display_output=display_output |
| ) |
|
|
| return src_image, edited_image |
|
|
| def add_object_real( |
| pipe, |
| source_image, |
| prompt_source, |
| prompt_object, |
| subject_token, |
| seed_src, |
| seed_obj, |
| localization_model="attention_points_sam", |
| extended_scale=1.05, |
| structure_transfer_step=4, |
| blend_steps=[20], |
| use_offset=False, |
| show_attention=False, |
| use_inversion=False, |
| display_output=False |
| ): |
| print('Noising-Denoising Original Image') |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| |
| source_latents = pipe.call_img2img( |
| prompt=prompt_source, |
| image=source_image, |
| num_inference_steps=30, |
| strength=0.1, |
| guidance_scale=3.5, |
| output_type="latent", |
| generator=torch.Generator(device=pipe.device).manual_seed(0), |
| tqdm_desc="Encoding Source Image", |
| ).images |
|
|
| |
| img_src_latents = None |
| if use_inversion: |
| print('Inverting Image') |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| latents_list = pipe.call_invert( |
| prompt=prompt_source, |
| image=source_latents, |
| num_inference_steps=30, |
| guidance_scale=1, |
| fixed_point_iterations=2, |
| generator=torch.Generator(device=pipe.device).manual_seed(0), |
| tqdm_desc="Inverting Source Image", |
| ) |
| img_src_latents = [x[0] for x in latents_list][::-1] |
|
|
| print('Running Addit') |
| gc.collect() |
| torch.cuda.empty_cache() |
|
|
| src_image, edited_image = _add_object( |
| pipe, |
| prompts=[prompt_source, prompt_object], |
| seed_src=seed_src, |
| seed_obj=seed_obj, |
| extended_scale=extended_scale, |
| source_latents=source_latents, |
| structure_transfer_step=structure_transfer_step, |
| subject_token=subject_token, |
| blend_steps=blend_steps, |
| show_attention=show_attention, |
| localization_model=localization_model, |
| is_img_src=True, |
| img_src_latents=img_src_latents, |
| use_offset=use_offset, |
| display_output=display_output, |
| ) |
|
|
| return src_image, edited_image |
|
|