224 lines
7.9 KiB
Python
224 lines
7.9 KiB
Python
import os
|
|
import time
|
|
import pdb
|
|
|
|
import cuid
|
|
import gradio as gr
|
|
|
|
|
|
from huggingface_hub import snapshot_download
|
|
|
|
ProjectDir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
|
|
CheckpointsDir = os.path.join(ProjectDir, "checkpoints")
|
|
|
|
|
|
def download_model():
|
|
if not os.path.exists(CheckpointsDir):
|
|
print("Checkpoint Not Downloaded, start downloading...")
|
|
tic = time.time()
|
|
snapshot_download(
|
|
repo_id="TMElyralab/MuseV",
|
|
local_dir=CheckpointsDir,
|
|
max_workers=8,
|
|
)
|
|
toc = time.time()
|
|
print(f"download cost {toc-tic} seconds")
|
|
else:
|
|
print("Already download the model.")
|
|
|
|
|
|
download_model() # for huggingface deployment.
|
|
|
|
from gradio_video2video import online_v2v_inference
|
|
from gradio_text2video import online_t2v_inference
|
|
|
|
|
|
def update_shape(image):
|
|
if image != None:
|
|
h, w, _ = image.shape
|
|
else:
|
|
h, w = 768, 512
|
|
return w, h
|
|
|
|
|
|
class ConcatenateBlock(gr.blocks.Block):
|
|
def __init__(self, options):
|
|
self.options = options
|
|
self.current_string = ""
|
|
|
|
def update_string(self, new_choice):
|
|
if new_choice and new_choice not in self.current_string.split(", "):
|
|
if self.current_string == "":
|
|
self.current_string = new_choice
|
|
else:
|
|
self.current_string += ", " + new_choice
|
|
return self.current_string
|
|
|
|
|
|
def process_input(new_choice):
|
|
return concatenate_block.update_string(new_choice), ""
|
|
|
|
|
|
control_options = [
|
|
"pose",
|
|
"pose_body",
|
|
"pose_hand",
|
|
"pose_face",
|
|
"pose_hand_body",
|
|
"pose_hand_face",
|
|
"dwpose",
|
|
"dwpose_face",
|
|
"dwpose_hand",
|
|
"dwpose_body",
|
|
"dwpose_body_hand",
|
|
"canny",
|
|
"tile",
|
|
"hed",
|
|
"hed_scribble",
|
|
"depth",
|
|
"pidi",
|
|
"normal_bae",
|
|
"lineart",
|
|
"lineart_anime",
|
|
"zoe",
|
|
"sam",
|
|
"mobile_sam",
|
|
"leres",
|
|
"content",
|
|
"face_detector",
|
|
]
|
|
concatenate_block = ConcatenateBlock(control_options)
|
|
|
|
|
|
css = """#input_img {max-width: 1024px !important} #output_vid {max-width: 1024px; max-height: 576px}"""
|
|
|
|
|
|
with gr.Blocks(css=css) as demo:
|
|
gr.Markdown(
|
|
"<div align='center'> <h1> MuseV: Infinite-length and High Fidelity Virtual Human Video Generation with Visual Conditioned Parallel Denoising</span> </h1> \
|
|
<h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
|
|
</br>\
|
|
Zhiqiang Xia <sup>*</sup>,\
|
|
Zhaokang Chen<sup>*</sup>,\
|
|
Bin Wu<sup>†</sup>,\
|
|
Chao Li,\
|
|
Kwok-Wai Hung,\
|
|
Chao Zhan,\
|
|
Yingjie He,\
|
|
Wenjiang Zhou\
|
|
(<sup>*</sup>Equal Contribution, <sup>†</sup>Corresponding Author, benbinwu@tencent.com)\
|
|
</br>\
|
|
Lyra Lab, Tencent Music Entertainment\
|
|
</h2> \
|
|
<a style='font-size:18px;color: #000000' href='https://github.com/TMElyralab/MuseV'>[Github Repo]</a>\
|
|
<a style='font-size:18px;color: #000000'>, which is important to Open-Source projects. Thanks!</a>\
|
|
<a style='font-size:18px;color: #000000' href=''> [ArXiv(Coming Soon)] </a>\
|
|
<a style='font-size:18px;color: #000000' href=''> [Project Page(Coming Soon)] </a> \
|
|
<a style='font-size:18px;color: #000000'>If MuseV is useful, please help star the repo~ </a> </div>"
|
|
)
|
|
with gr.Tab("Text to Video"):
|
|
with gr.Row():
|
|
with gr.Column():
|
|
prompt = gr.Textbox(label="Prompt")
|
|
image = gr.Image(label="VisionCondImage")
|
|
gr.Markdown("seed=-1 means that the seeds run each time are different")
|
|
seed = gr.Number(label="Seed", value=-1)
|
|
video_length = gr.Number(label="Video Length", value=12)
|
|
fps = gr.Number(label="Generate Video FPS", value=6)
|
|
gr.Markdown(
|
|
(
|
|
"If W&H is -1, then use the Reference Image's Size. Size of target video is $(W, H)*img\_edge\_ratio$. \n"
|
|
"The shorter the image size, the larger the motion amplitude, and the lower video quality.\n"
|
|
"The longer the W&H, the smaller the motion amplitude, and the higher video quality"
|
|
)
|
|
)
|
|
with gr.Row():
|
|
w = gr.Number(label="Width", value=-1)
|
|
h = gr.Number(label="Height", value=-1)
|
|
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)
|
|
|
|
btn1 = gr.Button("Generate")
|
|
out = gr.outputs.Video()
|
|
# pdb.set_trace()
|
|
with gr.Row():
|
|
board = gr.Dataframe(
|
|
value=[["", "", ""]] * 3,
|
|
interactive=False,
|
|
type="array",
|
|
label="Demo Video",
|
|
)
|
|
|
|
# image.change(fn=update_shape, inputs=[image], outputs=[w, h])
|
|
|
|
btn1.click(
|
|
fn=online_t2v_inference,
|
|
inputs=[prompt, image, seed, fps, w, h, video_length, img_edge_ratio],
|
|
outputs=out,
|
|
)
|
|
|
|
with gr.Tab("Video to Video"):
|
|
with gr.Row():
|
|
with gr.Column():
|
|
prompt = gr.Textbox(label="Prompt")
|
|
gr.Markdown(
|
|
(
|
|
"pose of VisionCondImage should be same as of the first frame of the video. "
|
|
"its better generate target first frame whose pose is same as of first frame of the video with text2image tool, sch as MJ, SDXL."
|
|
)
|
|
)
|
|
image = gr.Image(label="VisionCondImage")
|
|
video = gr.Video(label="ReferVideo")
|
|
# radio = gr.inputs.Radio(, label="Select an option")
|
|
# ctr_button = gr.inputs.Button(label="Add ControlNet List")
|
|
# output_text = gr.outputs.Textbox()
|
|
processor = gr.Textbox(
|
|
label=f"Control Condition. gradio code now only support dwpose_body_hand, use command can support multi of {control_options}",
|
|
value="dwpose_body_hand",
|
|
)
|
|
gr.Markdown("seed=-1 means that seeds are different in every run")
|
|
seed = gr.Number(label="Seed", value=-1)
|
|
video_length = gr.Number(label="Video Length", value=12)
|
|
fps = gr.Number(label="Generate Video FPS", value=6)
|
|
gr.Markdown(
|
|
(
|
|
"If W&H is -1, then use the Reference Image's Size. Size of target video is $(W, H)*img\_edge\_ratio$. \n"
|
|
"The shorter the image size, the larger the motion amplitude, and the lower video quality. \n"
|
|
"The longer the W&H, the smaller the motion amplitude, and the higher video quality. "
|
|
)
|
|
)
|
|
with gr.Row():
|
|
w = gr.Number(label="Width", value=-1)
|
|
h = gr.Number(label="Height", value=-1)
|
|
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)
|
|
|
|
btn2 = gr.Button("Generate")
|
|
out1 = gr.outputs.Video()
|
|
# image.change(fn=update_shape, inputs=[image], outputs=[w, h])
|
|
|
|
btn2.click(
|
|
fn=online_v2v_inference,
|
|
inputs=[
|
|
prompt,
|
|
image,
|
|
video,
|
|
processor,
|
|
seed,
|
|
fps,
|
|
w,
|
|
h,
|
|
video_length,
|
|
img_edge_ratio,
|
|
],
|
|
outputs=out1,
|
|
)
|
|
|
|
|
|
# Set the IP and port
|
|
ip_address = "0.0.0.0" # Replace with your desired IP address
|
|
port_number = 7860 # Replace with your desired port number
|
|
|
|
|
|
demo.queue().launch(
|
|
share=False, debug=True, server_name=ip_address, server_port=port_number
|
|
)
|