I am using opencv-python
to read images, for each image I have a list of 3d bounding boxes having xyz_location, xyz_scale, and xyz_rotation (euler angles) in lidar coordinates and the provided transformation matrices are extrinsic_matrix
from (lidar to camera coords) and intrinsic_matrix
(from camera coords to pixel coords).
I needed to create a way to overlay/draw the bounding boxes on top of the image and then add image to open3d.visualization.Visualizer. For that I created the following function:
def __add_bbox__(self, label_dict: dict): if 'camera_bbox' not in label_dict: return camera_bbox_dict = label_dict['camera_bbox'] center = camera_bbox_dict['xyz_center'] w, l, h = camera_bbox_dict['wlh_extent'] rotation_matrix = camera_bbox_dict['xyz_rotation_matrix'] color = camera_bbox_dict['rgb_bbox_color'] # define 3D bounding box x_corners = [l / 2, l / 2, -l / 2, -l / 2, l / 2, l / 2, -l / 2, -l / 2] y_corners = [0, 0, 0, 0, -h, -h, -h, -h] z_corners = [w / 2, -w / 2, -w / 2, w / 2, w / 2, -w / 2, -w / 2, w / 2] # rotate and translate 3D bounding box corners_3d = np.dot(rotation_matrix, np.vstack([x_corners, y_corners, z_corners])) # moving the center to object center corners_3d[0, :] = corners_3d[0, :] + center[0] corners_3d[1, :] = corners_3d[1, :] + center[1] corners_3d[2, :] = corners_3d[2, :] + center[2] # if any corner is behind camera, return if np.any(corners_3d[2, :] < 0.1): return # project 3D bounding box to 2D image corners_2d = label_dict['calib']['P2'].reshape(3, 4) @ nx3_to_nx4(corners_3d.T).T corners_2d = corners_2d.T # 3x8 -> 8x3 corners_2d = corners_2d[:, 0:2] / corners_2d[:, 2:3] corners_2d = corners_2d[:, 0:2].astype(np.int32) # draw 2D bounding box img_np = np.asarray(self.img) for k in range(0, 4): i, j = k, (k + 1) % 4 cv2.line(img_np, (corners_2d[i, 0], corners_2d[i, 1]), (corners_2d[j, 0], corners_2d[j, 1]), color, self.cfg.visualization.camera.bbox_line_width) i, j = k + 4, (k + 1) % 4 + 4 cv2.line(img_np, (corners_2d[i, 0], corners_2d[i, 1]), (corners_2d[j, 0], corners_2d[j, 1]), color, self.cfg.visualization.camera.bbox_line_width) i, j = k, k + 4 cv2.line(img_np, (corners_2d[i, 0], corners_2d[i, 1]), (corners_2d[j, 0], corners_2d[j, 1]), color, self.cfg.visualization.camera.bbox_line_width) self.img = o3d.geometry.Image(img_np) self.__add_geometry__('image', self.img, False)
whereas the __add_geometry__
function simply removes the previous open3d.geometry.Image and add the new one.
I have a calibration file reader function named Hanlder
as following:
def Handler(label_path: str, calib_path: str): output = [] # read calib calib_file_name = os.path.basename(calib_path).split('.')[0] calib_path = calib_path.replace(calib_file_name, 'front') # front camera calib calib_exists = os.path.exists(calib_path) if calib_exists: with open(calib_path, 'r') as f: calib = json.load(f) extrinsic_matrix = np.reshape(calib['extrinsic'], [4,4]) intrinsic_matrix = np.reshape(calib['intrinsic'], [3,3]) # read label file if os.path.exists(label_path) == False: return output with open(label_path, 'r') as f: lbls = json.load(f) for item in lbls: annotator = item['annotator'] if 'annotator' in item else 'Unknown' obj_id = int(item['obj_id']) obj_type = item['obj_type'] psr = item['psr'] psr_position_xyz = [float(psr['position']['x']), float(psr['position']['y']), float(psr['position']['z'])] psr_rotation_xyz = [float(psr['rotation']['x']), float(psr['rotation']['y']), float(psr['rotation']['z'])] psr_scale_xyz = [float(psr['scale']['x']), float(psr['scale']['y']), float(psr['scale']['z'])] label = dict() label['annotator'] = annotator label['id'] = obj_id label['type'] = obj_type label['psr'] = psr if calib_exists: label['calib'] = calib label['calib']['P2'] = nx3_to_nx4(intrinsic_matrix) lidar_xyz_center = np.array(psr_position_xyz, dtype=np.float32) lidar_wlh_extent = np.array(psr_scale_xyz, dtype=np.float32) lidar_rotation_matrix = o3d.geometry.OrientedBoundingBox.get_rotation_matrix_from_xyz(psr_rotation_xyz) if obj_type in colors: lidar_bbox_color = [i / 255.0 for i in colors[obj_type]] else: lidar_bbox_color = [0, 0, 0] label['lidar_bbox'] = {'xyz_center': lidar_xyz_center, 'wlh_extent': lidar_wlh_extent, 'xyz_rotation_matrix': lidar_rotation_matrix, 'rgb_bbox_color': lidar_bbox_color} if calib_exists: R_x = np.array([ [1, 0, 0], [0, math.cos(psr_rotation_xyz[0]), -math.sin(psr_rotation_xyz[0])], [0, math.sin(psr_rotation_xyz[0]), math.cos(psr_rotation_xyz[0])] ]) #Calculate rotation about y axis R_y = np.array([ [math.cos(psr_rotation_xyz[1]), 0, math.sin(psr_rotation_xyz[1])], [0, 1, 0], [-math.sin(psr_rotation_xyz[1]), 0, math.cos(psr_rotation_xyz[1])] ]) #Calculate rotation about z axis R_z = np.array([ [math.cos(psr_rotation_xyz[2]), -math.sin(psr_rotation_xyz[2]), 0], [math.sin(psr_rotation_xyz[2]), math.cos(psr_rotation_xyz[2]), 0], [0, 0, 1]]) camera_rotation_matrix = np.matmul(R_x, np.matmul(R_y, R_z)) camera_translation_matrix = lidar_xyz_center.reshape([-1,1]) rotation_and_translation_matrix = np.concatenate([camera_rotation_matrix, camera_translation_matrix], axis=-1) rotation_and_translation_matrix = np.concatenate([rotation_and_translation_matrix, np.array([0,0,0,1]).reshape([1,-1])], axis=0) origin_point = np.array([0, 0, 0, 1]) camera_xyz_center = np.matmul(extrinsic_matrix, np.matmul(rotation_and_translation_matrix, origin_point)) camera_xyz_center = camera_xyz_center[0:3] if obj_type in colors: camera_bbox_color = colors[obj_type] else: camera_bbox_color = [0, 0, 0] label['camera_bbox'] = {'xyz_center': camera_xyz_center, 'wlh_extent': lidar_wlh_extent, 'xyz_rotation_matrix': lidar_rotation_matrix, 'rgb_bbox_color': camera_bbox_color} output.append(label) return output
The Hanlder
creates a list of label_dict
and for each label_dict
I call __add_bbox__
function.
This setup draws bounding boxes but they seems off, example images shown below:
This is how it should look (ignore coloring and one face filled, just focus the bounds):
, I know for sure that transformation matrices are correct (they same label and calib file works in official github implementation here https://github.com/naurril/SUSTechPOINTS/blob/dev-auto-annotate/tools/visualize-camera.py.