Quantcast
Channel: Active questions tagged python - Stack Overflow
Viewing all articles
Browse latest Browse all 23160

Problem with creating dataset for visual object tracker

$
0
0

I wanted to train ET-Track(a nice video object tracker). which is based on Ocean(another video object tracker).

the problem is they did not share any code for training it before. now we have the code(changing codes from Ocean) but we have a big problem with it. we have a random shift when trying to crop images for our template-image. this is an example:

crop results

as you can see the search2 image which the model will train with it have a shift in it. I can figure out why I have this shift. would you help me please?

        (template,search,out_label,reg_label,reg_weight,bbox,) = dataset[0]        x1, y1, x2, y2 = map(int, bbox)                    search = search.transpose((1, 2, 0)).astype(np.uint8)        search = cv2.rectangle((search) , (x1, y1), (x2, y2), (200, 100, 150),1)        reg_weight = cv2.cvtColor(reg_weight.astype(np.uint8), cv2.COLOR_GRAY2RGB)        reg_weight = cv2.resize(reg_weight, (search.shape[1], search.shape[0]))        out_label = cv2.cvtColor(out_label.astype(np.uint8) * 255, cv2.COLOR_GRAY2RGB)        out_label = cv2.resize(out_label, (search.shape[1], search.shape[0]))        x1, y1, x2, y2 = map(int, bbox)        search2 = cv2.rectangle(search * reg_weight, (x1, y1), (x2, y2), (200, 100, 150))        cv2.imshow("search2", search2)        cv2.imshow("search",search)        cv2.imshow("out_label", out_label)

This is how these outputs are created.

and this is my dataset __getitem__:

        if self.random_data:             template, search = self._get_pairs(index)             #choose 2 random image for search and template        template_image = cv2.imread(template[0].as_posix())        search_image = cv2.imread(search[0].as_posix())        # change bboxes format and pick the first one        template_target_bbox = self.yolo2ocean(template[1], template_image)        search_target_bbox = self.yolo2ocean(search[1], search_image)        _, template_image = crop_like_SiamFC(            template_image,            bbox=template_target_bbox,            exemplar_size=self.template_size,            instance_size=self.search_size,        )        _, search_image = crop_like_SiamFC(            search_image,            bbox=search_target_bbox,            exemplar_size=self.template_size,            instance_size=self.search_size + self.search_margin,        )        template_box = self._toBBox(template_image, template_target_bbox)        search_box = self._toBBox(search_image, search_target_bbox)        template, _, _ = self._augmentation(            template_image, template_box, self.template_size        )        search, bbox, dag_param = self._augmentation(            search_image, search_box, self.search_size, search=True        )        #No augment i have turned off all of them!         # from PIL image to numpy        template = np.array(template)        search = np.array(search)        out_label = self._dynamic_label([self.size, self.size], dag_param.shift)        reg_label, reg_weight = self.reg_label(bbox)

I think the self.reg_label is actual problem but I don't know why?this is the function:

def reg_label(self, bbox):"""        generate regression label        :param bbox: [x1, y1, x2, y2]        :return: [l, t, r, b]"""        x1, y1, x2, y2 = bbox        l = self.grid_to_search_x - x1  # [17, 17]        t = self.grid_to_search_y - y1        r = x2 - self.grid_to_search_x        b = y2 - self.grid_to_search_y        l, t, r, b = map(lambda x: np.expand_dims(x, axis=-1), [l, t, r, b])        reg_label = np.concatenate((l, t, r, b), axis=-1)  # [17, 17, 4]        reg_label_min = np.min(reg_label, axis=-1)        inds_nonzero = (reg_label_min > 0).astype(float)        return reg_label, inds_nonzerodef grids(self):"""        each element of feature map on input search image        :return: H*W*2 (position for each element)"""        sz = self.size #25        sz_x = sz // 2        sz_y = sz // 2        x, y = np.meshgrid(            np.arange(0, sz) - np.floor(float(sz_x)),            np.arange(0, sz) - np.floor(float(sz_y)),        )        self.grid_to_search = {}        self.stride = 8        self.grid_to_search_x = x * self.stride + self.search_size // 2        self.grid_to_search_y = y * self.stride + self.search_size // 2

Any idea would be helpful. Thanks a lot


Viewing all articles
Browse latest Browse all 23160

Trending Articles



<script src="https://jsc.adskeeper.com/r/s/rssing.com.1596347.js" async> </script>