from mllm.dataset.process_function import PlainBoxFormatter
from mllm.dataset.builder import prepare_interactive
from mllm.models.builder.build_shikra import load_pretrained_shikra
from mllm.dataset.utils.transform import expand2square, box_xyxy_expand2square
# Set up logging
log_level = logging.DEBUG
transformers.logging.set_verbosity(log_level)
transformers.logging.enable_default_handler()
transformers.logging.enable_explicit_format()
# prompt for coco
# Argument parsing
parser = argparse.ArgumentParser("Shikra Local Demo")
parser.add_argument('--model_path', default = "xxx/shikra-merge", help="Path to the model")
parser.add_argument('--load_in_8bit', action='store_true', help="Load model in 8-bit precision")
parser.add_argument('--image_path', default = "xxx/shikra-main/mllm/demo/assets/ball.jpg", help="Path to the image file")
parser.add_argument('--text', default="What do you see in this image? Please mention the objects and their locations using the format [x1,y1,x2,y2].", help="Text prompt")