2024-08-10 23:04:27 +02:00
|
|
|
import os
|
|
|
|
import torch
|
|
|
|
|
|
|
|
from base64 import b64encode
|
|
|
|
from cotracker.utils.visualizer import Visualizer, read_video_from_path
|
2024-08-12 22:37:51 +02:00
|
|
|
import numpy as np
|
|
|
|
from PIL import Image
|
|
|
|
import time
|
2024-08-10 23:04:27 +02:00
|
|
|
|
2024-08-12 22:37:51 +02:00
|
|
|
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
|
2024-08-10 23:04:27 +02:00
|
|
|
|
2024-08-12 22:37:51 +02:00
|
|
|
start_time = time.time()
|
|
|
|
print(f'Using device: {device}')
|
|
|
|
print(f'start loading video')
|
2024-08-10 23:04:27 +02:00
|
|
|
video = read_video_from_path('./assets/F1_shorts.mp4')
|
2024-08-12 22:37:51 +02:00
|
|
|
print(f'video shape: {video.shape}')
|
|
|
|
# video = torch.from_numpy(video).permute(0, 3, 1, 2)[None].float().to(device)
|
2024-08-10 23:04:27 +02:00
|
|
|
video = torch.from_numpy(video).permute(0, 3, 1, 2)[None].float()
|
2024-08-12 22:37:51 +02:00
|
|
|
end_time = time.time()
|
|
|
|
print(f'video shape after permute: {video.shape}')
|
|
|
|
print("Load video Time taken: {:.2f} seconds".format(end_time - start_time))
|
2024-08-10 23:04:27 +02:00
|
|
|
|
|
|
|
from cotracker.predictor import CoTrackerPredictor
|
|
|
|
|
2024-08-11 13:42:03 +02:00
|
|
|
|
2024-08-10 23:04:27 +02:00
|
|
|
model = CoTrackerPredictor(
|
|
|
|
checkpoint=os.path.join(
|
|
|
|
'./checkpoints/cotracker2.pth'
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
# pred_tracks, pred_visibility = model(video, grid_size=30)
|
|
|
|
|
|
|
|
# vis = Visualizer(save_dir='./videos', pad_value=100)
|
|
|
|
# vis.visualize(video=video, tracks=pred_tracks, visibility=pred_visibility, filename='teaser');
|
|
|
|
|
|
|
|
grid_query_frame=20
|
|
|
|
|
|
|
|
import torch.nn.functional as F
|
2024-08-11 13:42:03 +02:00
|
|
|
# video_interp = F.interpolate(video[0], [200, 360], mode="bilinear")[None].to(device)
|
2024-08-12 22:37:51 +02:00
|
|
|
interp_size = (720, 1280)
|
|
|
|
video_interp = F.interpolate(video[0], [interp_size[0], interp_size[1]], mode="bilinear")[None].to(device)
|
|
|
|
print(f'video_interp shape: {video_interp.shape}')
|
2024-08-10 23:04:27 +02:00
|
|
|
|
|
|
|
start_time = time.time()
|
2024-08-11 13:42:03 +02:00
|
|
|
# pred_tracks, pred_visibility = model(video_interp,
|
2024-08-12 22:37:51 +02:00
|
|
|
input_mask='./assets/F1_mask.png'
|
|
|
|
segm_mask = Image.open(input_mask)
|
|
|
|
interp_size = (interp_size[1], interp_size[0])
|
|
|
|
segm_mask = segm_mask.resize(interp_size, Image.BILINEAR)
|
|
|
|
segm_mask = np.array(Image.open(input_mask))
|
|
|
|
segm_mask = torch.tensor(segm_mask).to(device)
|
|
|
|
# pred_tracks, pred_visibility = model(video,
|
|
|
|
pred_tracks, pred_visibility = model(video_interp,
|
|
|
|
grid_query_frame=grid_query_frame, backward_tracking=True,
|
|
|
|
segm_mask=segm_mask )
|
2024-08-10 23:04:27 +02:00
|
|
|
end_time = time.time()
|
|
|
|
|
|
|
|
print("Time taken: {:.2f} seconds".format(end_time - start_time))
|
|
|
|
|
2024-08-12 22:37:51 +02:00
|
|
|
start_time = time.time()
|
|
|
|
print(f'start visualizing')
|
2024-08-10 23:04:27 +02:00
|
|
|
vis = Visualizer(
|
|
|
|
save_dir='./videos',
|
|
|
|
pad_value=20,
|
|
|
|
linewidth=1,
|
|
|
|
mode='optical_flow'
|
|
|
|
)
|
2024-08-12 22:37:51 +02:00
|
|
|
print(f'vis initialized')
|
|
|
|
end_time = time.time()
|
|
|
|
print("Time taken: {:.2f} seconds".format(end_time - start_time))
|
|
|
|
start_time = time.time()
|
|
|
|
print(f'start visualize')
|
2024-08-10 23:04:27 +02:00
|
|
|
vis.visualize(
|
2024-08-12 22:37:51 +02:00
|
|
|
video=video_interp,
|
|
|
|
# video=video,
|
2024-08-10 23:04:27 +02:00
|
|
|
tracks=pred_tracks,
|
|
|
|
visibility=pred_visibility,
|
2024-08-12 22:37:51 +02:00
|
|
|
filename='dense2');
|
|
|
|
print(f'done')
|
|
|
|
end_time = time.time()
|
|
|
|
print("Time taken: {:.2f} seconds".format(end_time - start_time))
|