智谱CogVideoX视频开源大模子 - ToB企服应用市场:ToB评测及商务社交产业平台

cd /workspace/
git clone https://github.com/THUDM/CogVideo.git

复制代码

cd /workspace/CogVideo/
pip install -r requirements.txt
cd sat
pip install -r requirements.txt
pip install omegaconf

复制代码

mkdir THUDM
cd THUDM
git lfs install
git clone https://www.modelscope.cn/ZhipuAI/CogVideoX-5b-I2V.git

复制代码

cd ..
cd inference
python cli_demo.py

复制代码

apt install podman-docker
apt install docker.io

复制代码

systemctl start docker
systemctl enable docker

复制代码

docker run -itd --name=cogvideo -p 7878:7878 --gpus=all registry.cn-hangzhou.aliyuncs.com/guoshiyin/cogvideo:v3

复制代码

pip install modelscope
pip install torch
pip install accelerate
pip install sentencepiece
pip install --upgrade opencv-python transformers
pip install git+https://github.com/huggingface/diffusers.git@878f609aa5ce4a78fea0f048726889debde1d7e8#egg=diffusers # Still in PR

复制代码

mkdir /workspace/
touch cli.py
vi cli.py

复制代码

# To get started, PytorchAO needs to be installed from the GitHub source and PyTorch Nightly.
# Source and nightly installation is only required until the next release.
import torch
from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXImageToVideoPipeline
from diffusers.utils import export_to_video, load_image
from transformers import T5EncoderModel
from torchao.quantization import quantize_, int8_weight_only
quantization = int8_weight_only
text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX-5b-I2V", subfolder="text_encoder", torch_dtype=torch.bfloat16)
quantize_(text_encoder, quantization())
transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX-5b-I2V",subfolder="transformer", torch_dtype=torch.bfloat16)
quantize_(transformer, quantization())
vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX-5b-I2V", subfolder="vae", torch_dtype=torch.bfloat16)
quantize_(vae, quantization())
# Create pipeline and run inference
pipe = CogVideoXImageToVideoPipeline.from_pretrained(
"THUDM/CogVideoX-5b-I2V",
text_encoder=text_encoder,
transformer=transformer,
vae=vae,
torch_dtype=torch.bfloat16,
)
pipe.enable_model_cpu_offload()
pipe.vae.enable_tiling()
pipe.vae.enable_slicing()
prompt = "A little girl is riding a bicycle at high speed. Focused, detailed, realistic."
image = load_image(image="input.jpg")
video = pipe(
prompt=prompt,
image=image,
num_videos_per_prompt=1,
num_inference_steps=50,
num_frames=49,
guidance_scale=6,
generator=torch.Generator(device="cuda").manual_seed(42),
).frames[0]
export_to_video(video, "output.mp4", fps=8)

复制代码

cd /workspace/
python cli.py

复制代码