由于Fooocus的优秀推理能力,后续考虑从webui切换到Fooocus上,因此对其中的代码要进行深入分析,Fooocus的sdxl版本在11g的显存上跑起来压力不大,但是webui的sdxl版本起码12g。尤其要对比其和webui的优化点,但是在代码层面,并不是和webui同一档次的框架,webui采用了支持hook式的插件系统,但是fooocus因为其midjourney的指向,所以并不是走三方插件的路子。
在autodl上:python launch.py --listen --port 6006
entry_with_update.py
- # 启动
- python entry_with_update.py --listen
-
- launch.py ->
- prepare_environment()->
- ini_comfy_args()->
- - args_manager.py -> args = comfy_cli.args ->backend/headless/comfy/cli_args.py
- download_models()->
-
- webui.py ->
- - run_button.click().then(fn=generate_clicked)->
- -- modules/async_worker.py->workers()->threading.Thread(target=worker).start()
- -- handler()-> # 传入参数并配置
- -- prompt_processing/vary/upscale/inpaint/controlnet/
- -- imgs = pipeline.process_diffusin(...)->
-
modules/default_pipeline.py -> process_diffusion() 主pipeline,webui中是StableDiffusionProcessingTxt2Img和StableDiffusionProcessingImg2Img两个核心接口。
- if latent is None:
- empty_latent = core.generate_empty_latent(width,height,1)
- else:
- empty_latent = latent
-
- sampled_latent = core.ksampler(final_unet,final_refiner,positive_cond,negative_cond,empty_latent,steps,denoise,callback,cfg_scale,sampler_name,scheduler_name,switch)
- decoded_latent = core.decode_vae(vae,sampled_latent,...)
- images = core.pytorch_to_numpy(decoded_latent)
默认方法:refresh_everything()
- refresh_everything()->
-
- refresh_refiner_model(refiner_model_name)
- refresh_base_model(base_model_name)
- refresh_loras(loras)
-
- prepare_text_encoder(True)
core.py -> generate_empty_latent()
- opEmptyLatentImage.generate(width,height,batch_size)[0]
-
- - backend/headless/nodes.py -> EmptyLatentImage.generate()
- - latent = torch.zeros([bs,4,height//8,width//8]) -> {'samples':latent}
core.py->ksampler()->backend/headless/comfy/sample.py
- core->ksampler()
-
- latent_image = latent['sampler']
- noise = comfy.sample.prepare_noise(latent_image,seed,...)
-
- samples = comfy.sample.sample(model,noise,steps,cfg,sampler_name,scheduler,positive,negative,latent_image,...)
- - backend/headless/comfy/sample.py->sample()
- - real_model,positive_copy,negative_copy,noise_mask,models=prepare_sampling(model,noise.shape,positive...)
- - sampler = comfy.samplers.KSampler(...)
- - sampler = sampler.sample(noise,positive_copy,negative_copy,cfg,latent_image...)
- -- sampler = sampler_class(self.sampler)
- -- sample(self.model,noise,positive,...)
sampler_class()->backend/headless/comfy/samplers.py
- sampler_class(name)->
- sampler = ksampler(name)->class KSAMPLER(Sampler)
sample->modules/sample_hijack.py (backend/headless/comfy/samplers.py) 劫持了comfy中的sample
- sample_hijack(model,noise,positive,negative,cfg,device,sampler,sigmas,model_options,latent_image,denoise_mask,callback,...) ->
-
- positive = positive[:]
- negative = negative[:]
-
- model_wrap = model_wrap(model)
- - model_denoise = CFGNoisePredictor(model)
- - model_wrap = k_diffusion_external.CompVisDenoiser(model_denoise)
-
- calculate_start_end_timesteps(model_wrap,negative)
- calculate_start_end_timesteps(model_wrap,positive)
- for c in positive/negative:
- create_cond_with_same_area_if_none(negative/positive,c)
- pre_run_control(model_wrap,negative+positive) # cfg相关
-
- latent_image = model.process_latent_in(latent_image)
-
- samples = samplers.sample(model_wrap,sigmas,extra_args,...)
- model.process_latent_out(samples)
backend/headless/comfy/samplers.py
- class KSAMPLER->sample(model_wrap,sigmas,extra_args,callback,...)
- model_k = KSamplerOInpaint(model_wrap)
-
- if sampler_name == "dpm_fast":
- samples = k_diffusion_sampling.sample_dpm_fast(model_k,noise,...)
- elif sampler_name == "dpm_adaptive":
- samples = k_diffusion_sampling.sample_dpm_adaptive(model_k,noise,...)
- else:
- samples = getattr(k_diffusion_sampling,"sample_{}".format(sampler_name))(model_k,noise,...)
backend/headless/comfy/k_diffusion/sampling.py
- sampler_dpmpp_2m_sde_gpu(model,x,sigmas,extra_args,callback,...)
- noise_sampler = BrownianTreeNoiseSampler(x,sigma_min,..) if noise_sampler is None else noise_sampler
- sample_dpmpp_2m_sde(model,x,...)
sample_dpmpp_2m_sde
- for i in trange(len(sigmas)-1):
- denoised = model(x,sigmas[i]*s_in,**extra_args)
- if callback is not None:
- callback({'x':x,'i':i,'sigma':sigma[i],'sigma_hat':sigmas[i],'denoised':denoised})
- if sigmas[i+1] == 0:
- x = denoised
- else:
- # DPM-Solver++(2M) SDE
- t, s = -sigmas[i].log(), -sigmas[i + 1].log()
- h = s - t
- eta_h = eta * h
- x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised
-
- if old_denoised is not None:
- r = h_last / h
- if solver_type == 'heun':
- x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - old_denoised)
- elif solver_type == 'midpoint':
- x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)
- if eta:
- x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise
backend/headless/nodes.py 节点就是类
- NODE_CLASS_MAPPINGS = {
- "KSampler": KSampler,
- "CheckpointLoaderSimple": CheckpointLoaderSimple,
- "CLIPTextEncode": CLIPTextEncode,
- "CLIPSetLastLayer": CLIPSetLastLayer,
- "VAEDecode": VAEDecode,
- "VAEEncode": VAEEncode,
- "VAEEncodeForInpaint": VAEEncodeForInpaint,
- "VAELoader": VAELoader,
- "EmptyLatentImage": EmptyLatentImage,
- "LatentUpscale": LatentUpscale,
- "LatentUpscaleBy": LatentUpscaleBy,
- "LatentFromBatch": LatentFromBatch,
- "RepeatLatentBatch": RepeatLatentBatch,
- "SaveImage": SaveImage,
- "PreviewImage": PreviewImage,
- "LoadImage": LoadImage,
- "LoadImageMask": LoadImageMask,
- "ImageScale": ImageScale,
- "ImageScaleBy": ImageScaleBy,
- "ImageInvert": ImageInvert,
- "ImageBatch": ImageBatch,
- "ImagePadForOutpaint": ImagePadForOutpaint,
- "EmptyImage": EmptyImage,
- "ConditioningAverage": ConditioningAverage,
- "ConditioningCombine": ConditioningCombine,
- "ConditioningConcat": ConditioningConcat,
- "ConditioningSetArea": ConditioningSetArea,
- "ConditioningSetAreaPercentage": ConditioningSetAreaPercentage,
- "ConditioningSetMask": ConditioningSetMask,
- "KSamplerAdvanced": KSamplerAdvanced,
- "SetLatentNoiseMask": SetLatentNoiseMask,
- "LatentComposite": LatentComposite,
- "LatentBlend": LatentBlend,
- "LatentRotate": LatentRotate,
- "LatentFlip": LatentFlip,
- "LatentCrop": LatentCrop,
- "LoraLoader": LoraLoader,
- "CLIPLoader": CLIPLoader,
- "UNETLoader": UNETLoader,
- "DualCLIPLoader": DualCLIPLoader,
- "CLIPVisionEncode": CLIPVisionEncode,
- "StyleModelApply": StyleModelApply,
- "unCLIPConditioning": unCLIPConditioning,
- "ControlNetApply": ControlNetApply,
- "ControlNetApplyAdvanced": ControlNetApplyAdvanced,
- "ControlNetLoader": ControlNetLoader,
- "DiffControlNetLoader": DiffControlNetLoader,
- "StyleModelLoader": StyleModelLoader,
- "CLIPVisionLoader": CLIPVisionLoader,
- "VAEDecodeTiled": VAEDecodeTiled,
- "VAEEncodeTiled": VAEEncodeTiled,
- "unCLIPCheckpointLoader": unCLIPCheckpointLoader,
- "GLIGENLoader": GLIGENLoader,
- "GLIGENTextBoxApply": GLIGENTextBoxApply,
-
- "CheckpointLoader": CheckpointLoader,
- "DiffusersLoader": DiffusersLoader,
-
- "LoadLatent": LoadLatent,
- "SaveLatent": SaveLatent,
-
- "ConditioningZeroOut": ConditioningZeroOut,
- "ConditioningSetTimestepRange": ConditioningSetTimestepRange,
- }