| | """ |
| | Generate a large batch of image samples from a model and save them as a large |
| | numpy array. This can be used to produce samples for FID evaluation. |
| | """ |
| |
|
| | import argparse |
| | import json |
| | import sys |
| | import os |
| |
|
| | sys.path.append('.') |
| |
|
| | from pdb import set_trace as st |
| | import imageio |
| | import numpy as np |
| | import torch as th |
| | import torch.distributed as dist |
| |
|
| | from guided_diffusion import dist_util, logger |
| | from guided_diffusion.script_util import ( |
| | NUM_CLASSES, |
| | model_and_diffusion_defaults, |
| | create_model_and_diffusion, |
| | add_dict_to_argparser, |
| | args_to_dict, |
| | continuous_diffusion_defaults, |
| | control_net_defaults, |
| | ) |
| |
|
| | th.backends.cuda.matmul.allow_tf32 = True |
| | th.backends.cudnn.allow_tf32 = True |
| | th.backends.cudnn.enabled = True |
| |
|
| | from pathlib import Path |
| |
|
| | from tqdm import tqdm, trange |
| | import dnnlib |
| | from nsr.train_util_diffusion import TrainLoop3DDiffusion as TrainLoop |
| | from guided_diffusion.continuous_diffusion import make_diffusion as make_sde_diffusion |
| | import nsr |
| | import nsr.lsgm |
| | from nsr.script_util import create_3DAE_model, encoder_and_nsr_defaults, loss_defaults, AE_with_Diffusion, rendering_options_defaults, eg3d_options_default, dataset_defaults |
| |
|
| | from datasets.shapenet import load_eval_data |
| | from torch.utils.data import Subset |
| | from datasets.eg3d_dataset import init_dataset_kwargs |
| |
|
| | SEED = 0 |
| |
|
| |
|
| | def main(args): |
| |
|
| | |
| |
|
| | dist_util.setup_dist(args) |
| | logger.configure(dir=args.logdir) |
| |
|
| | th.cuda.empty_cache() |
| |
|
| | th.cuda.manual_seed_all(SEED) |
| | np.random.seed(SEED) |
| |
|
| | |
| | logger.log("creating model and diffusion...") |
| | args.img_size = [args.image_size_encoder] |
| | |
| | |
| | |
| | args.image_size = args.image_size_encoder |
| |
|
| | denoise_model, diffusion = create_model_and_diffusion( |
| | **args_to_dict(args, |
| | model_and_diffusion_defaults().keys())) |
| |
|
| | if 'cldm' in args.trainer_name: |
| | assert isinstance(denoise_model, tuple) |
| | denoise_model, controlNet = denoise_model |
| |
|
| | controlNet.to(dist_util.dev()) |
| | controlNet.train() |
| | else: |
| | controlNet = None |
| |
|
| | opts = eg3d_options_default() |
| | if args.sr_training: |
| | args.sr_kwargs = dnnlib.EasyDict( |
| | channel_base=opts.cbase, |
| | channel_max=opts.cmax, |
| | fused_modconv_default='inference_only', |
| | use_noise=True |
| | ) |
| |
|
| | |
| | |
| | denoise_model.to(dist_util.dev()) |
| | if args.use_fp16: |
| | denoise_model.convert_to_fp16() |
| | denoise_model.eval() |
| |
|
| | |
| | logger.log("creating 3DAE...") |
| | auto_encoder = create_3DAE_model( |
| | **args_to_dict(args, |
| | encoder_and_nsr_defaults().keys())) |
| |
|
| | |
| | |
| |
|
| | |
| |
|
| | |
| | |
| |
|
| | auto_encoder.to(dist_util.dev()) |
| | auto_encoder.eval() |
| |
|
| | |
| | logger.log("create dataset") |
| |
|
| | if args.objv_dataset: |
| | from datasets.g_buffer_objaverse import load_data, load_eval_data, load_memory_data, load_wds_data |
| | else: |
| | from datasets.shapenet import load_data, load_eval_data, load_memory_data |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | TrainLoop = { |
| | |
| | |
| | |
| | |
| | 'vpsde_crossattn': nsr.lsgm.TrainLoop3DDiffusionLSGM_crossattn, |
| | 'vpsde_crossattn_objv': nsr.crossattn_cldm_objv.TrainLoop3DDiffusionLSGM_crossattn, |
| | }[args.trainer_name] |
| |
|
| | |
| | if 'vpsde' in args.trainer_name: |
| | sde_diffusion = make_sde_diffusion( |
| | dnnlib.EasyDict( |
| | args_to_dict(args, |
| | continuous_diffusion_defaults().keys()))) |
| | |
| | logger.log('create VPSDE diffusion.') |
| | else: |
| | sde_diffusion = None |
| |
|
| | auto_encoder.decoder.rendering_kwargs = args.rendering_kwargs |
| |
|
| | training_loop_class = TrainLoop(rec_model=auto_encoder, |
| | denoise_model=denoise_model, |
| | control_model=controlNet, |
| | diffusion=diffusion, |
| | sde_diffusion=sde_diffusion, |
| | loss_class=None, |
| | data=None, |
| | |
| | eval_data=None, |
| | **vars(args)) |
| |
|
| | logger.log("sampling...") |
| | dist_util.synchronize() |
| |
|
| | |
| | |
| | |
| |
|
| | if dist_util.get_rank() == 0: |
| |
|
| | (Path(logger.get_dir()) / 'FID_Cals').mkdir(exist_ok=True, |
| | parents=True) |
| |
|
| | with open(os.path.join(args.logdir, 'args.json'), 'w') as f: |
| | json.dump(vars(args), f, indent=2) |
| |
|
| | |
| | camera = th.load('assets/objv_eval_pose.pt', map_location=dist_util.dev())[:] |
| |
|
| | if args.create_controlnet or 'crossattn' in args.trainer_name: |
| | training_loop_class.eval_cldm( |
| | prompt=args.prompt, |
| | unconditional_guidance_scale=args. |
| | unconditional_guidance_scale, |
| | use_ddim=args.use_ddim, |
| | save_img=args.save_img, |
| | use_train_trajectory=args.use_train_trajectory, |
| | camera=camera, |
| | num_instances=args.num_instances, |
| | num_samples=args.num_samples, |
| | export_mesh=args.export_mesh, |
| | |
| | |
| | ) |
| | else: |
| | |
| | training_loop_class.eval_ddpm_sample( |
| | training_loop_class.rec_model, |
| | save_img=args.save_img, |
| | use_train_trajectory=args.use_train_trajectory, |
| | export_mesh=args.export_mesh, |
| | |
| | ) |
| |
|
| | dist.barrier() |
| | logger.log("sampling complete") |
| |
|
| |
|
| | def create_argparser(): |
| | defaults = dict( |
| | image_size_encoder=224, |
| | triplane_scaling_divider=1.0, |
| | diffusion_input_size=-1, |
| | trainer_name='adm', |
| | use_amp=False, |
| | |
| |
|
| | |
| | clip_denoised=False, |
| | num_samples=10, |
| | num_instances=10, |
| | use_ddim=False, |
| | ddpm_model_path="", |
| | cldm_model_path="", |
| | rec_model_path="", |
| |
|
| | |
| | logdir="/mnt/lustre/yslan/logs/nips23/", |
| | data_dir="", |
| | eval_data_dir="", |
| | eval_batch_size=1, |
| | num_workers=1, |
| |
|
| | |
| | overfitting=False, |
| | image_size=128, |
| | iterations=150000, |
| | schedule_sampler="uniform", |
| | anneal_lr=False, |
| | lr=5e-5, |
| | weight_decay=0.0, |
| | lr_anneal_steps=0, |
| | batch_size=1, |
| | microbatch=-1, |
| | ema_rate="0.9999", |
| | log_interval=50, |
| | eval_interval=2500, |
| | save_interval=10000, |
| | resume_checkpoint="", |
| | resume_cldm_checkpoint="", |
| | resume_checkpoint_EG3D="", |
| | use_fp16=False, |
| | fp16_scale_growth=1e-3, |
| | load_submodule_name='', |
| | ignore_resume_opt=False, |
| | freeze_ae=False, |
| | denoised_ae=True, |
| | |
| | prompt="a red chair", |
| | interval=1, |
| | save_img=False, |
| | use_train_trajectory= |
| | False, |
| | unconditional_guidance_scale=1.0, |
| | use_eos_feature=False, |
| | export_mesh=False, |
| | cond_key='caption', |
| | ) |
| |
|
| | defaults.update(model_and_diffusion_defaults()) |
| | defaults.update(encoder_and_nsr_defaults()) |
| | defaults.update(loss_defaults()) |
| | defaults.update(continuous_diffusion_defaults()) |
| | defaults.update(control_net_defaults()) |
| | defaults.update(dataset_defaults()) |
| |
|
| | parser = argparse.ArgumentParser() |
| | add_dict_to_argparser(parser, defaults) |
| |
|
| | return parser |
| |
|
| |
|
| | if __name__ == "__main__": |
| |
|
| | |
| | |
| |
|
| | os.environ[ |
| | "TORCH_DISTRIBUTED_DEBUG"] = "DETAIL" |
| |
|
| | args = create_argparser().parse_args() |
| |
|
| | args.local_rank = int(os.environ["LOCAL_RANK"]) |
| | args.gpus = th.cuda.device_count() |
| |
|
| | args.rendering_kwargs = rendering_options_defaults(args) |
| |
|
| | main(args) |
| |
|