From 7274062824f2596bda9f3d25fa5e7d61c9498a2c Mon Sep 17 00:00:00 2001 From: Finn Lukas Busch Date: Wed, 30 Jul 2025 10:55:38 +0200 Subject: [PATCH] Fixed things to match evaluation setup. Reverted some changes introduced during refactoring. --- config/mon/mapping_conf_sim.yaml | 2 +- eval/habitat_evaluator.py | 1 - mapping/feature_map.py | 4 ++-- mapping/navigator.py | 28 +++++++++++++++++----------- mapping/rerun_logger.py | 4 +++- onemap_utils/vis_utils.py | 2 +- vision_models/clip_dense.py | 14 ++------------ vision_models/yolov7_model.py | 30 +++++++++++++++--------------- 8 files changed, 41 insertions(+), 44 deletions(-) diff --git a/config/mon/mapping_conf_sim.yaml b/config/mon/mapping_conf_sim.yaml index 3f496fc..dfb82df 100644 --- a/config/mon/mapping_conf_sim.yaml +++ b/config/mon/mapping_conf_sim.yaml @@ -14,4 +14,4 @@ MappingConf: # [all in feature_map.py] obstacle_max: 0.5 filter_stairs: True floor_level: -0.88 - floor_threshold: -1.1 + floor_threshold: -1.1 \ No newline at end of file diff --git a/eval/habitat_evaluator.py b/eval/habitat_evaluator.py index 4d2b5c3..57e12d7 100644 --- a/eval/habitat_evaluator.py +++ b/eval/habitat_evaluator.py @@ -302,7 +302,6 @@ def evaluate(self): results = [] # restart at 930 for n_ep, episode in enumerate(self.episodes): - # for n_ep, episode in enumerate(self.episodes[492:]): poses = [] results.append(Result.FAILURE_OOT) steps = 0 diff --git a/mapping/feature_map.py b/mapping/feature_map.py index 77aba0f..7055ae6 100644 --- a/mapping/feature_map.py +++ b/mapping/feature_map.py @@ -384,8 +384,8 @@ def project_dense(self, # TODO this will be wrong for sub-sampled as e.g. fx will be wrong depth_image_smoothed = depth_aligned - mask = depth_image_smoothed == float('inf') - depth_image_smoothed[mask] = depth_image_smoothed[~mask].max() + # mask = depth_image_smoothed == float('inf') + # depth_image_smoothed[mask] = depth_image_smoothed[~mask].max() kernel_size = 11 pad = kernel_size // 2 diff --git a/mapping/navigator.py b/mapping/navigator.py index 4c689c4..9211c26 100644 --- a/mapping/navigator.py +++ b/mapping/navigator.py @@ -324,14 +324,16 @@ def compute_best_path(self, best_nav_goal = self.nav_goals[best_idx] self.cyclic_checker.add_state_action(start, best_nav_goal.get_descr_point(), top_two_vals) if isinstance(best_nav_goal, Frontier): - self.path = Planning.compute_to_goal(start, self.one_map.navigable_map & ( - self.one_map.confidence_map > 0).cpu().numpy(), + # NOTE Allow more aggressive planning through unknown regions + self.path = Planning.compute_to_goal(start, self.one_map.navigable_map, # & ( + #self.one_map.confidence_map > 0).cpu().numpy(), (self.one_map.confidence_map > 0).cpu().numpy(), best_nav_goal.get_descr_point(), self.obstcl_kernel_size, 2) elif isinstance(best_nav_goal, Cluster): - self.path = Planning.compute_to_goal(start, self.one_map.navigable_map & ( - self.one_map.confidence_map > 0).cpu().numpy(), + # NOTE Allow more aggressive planning through unknown regions + self.path = Planning.compute_to_goal(start, self.one_map.navigable_map,# & ( + # self.one_map.confidence_map > 0).cpu().numpy(), (self.one_map.confidence_map > 0).cpu().numpy(), best_nav_goal.get_descr_point(), # TODO we might want to consider all the points of the cluster! @@ -488,7 +490,7 @@ def add_data(self, px, py = self.one_map.metric_to_px(x, y) if self.last_pose: - if np.linalg.norm(np.array([px, py, yaw]) - np.array(self.last_pose)) < 0.01: + if px == self.last_pose[0] and py == self.last_pose[1] and abs(yaw - self.last_pose[2]) < 0.001: if self.path is not None: self.stuck_at_cell_counter += 1 else: @@ -610,12 +612,16 @@ def add_data(self, self.chosen_detection = (x_masked[best], y_masked[best]) else: best = np.argmin(depths) - if self.object_detected: - if adjusted_score[x_id[best], y_id[best]] < \ - adjusted_score[self.chosen_detection[0], self.chosen_detection[1]] * 1.1: - object_valid = False - if object_valid: - self.chosen_detection = (x_id[best], y_id[best]) + # NOTE More aggressive reselection of the best point + # ---- Commented out to match single-object results --- + # if self.object_detected: + # if adjusted_score[x_id[best], y_id[best]] < \ + # adjusted_score[self.chosen_detection[0], self.chosen_detection[1]] * 1.1: + # object_valid = False + # if object_valid: + # self.chosen_detection = (x_id[best], y_id[best]) + # --- End of comment --- + self.chosen_detection = (x_id[best], y_id[best]) if object_valid: self.object_detected = True self.compute_best_path(start) diff --git a/mapping/rerun_logger.py b/mapping/rerun_logger.py index 0ab0431..cee901c 100644 --- a/mapping/rerun_logger.py +++ b/mapping/rerun_logger.py @@ -275,7 +275,9 @@ def __init__(self, mapper: Navigator, to_file: bool, save_path: str, debug: bool if self.to_file: rr.save(save_path) else: - rr.connect("127.0.0.1:9876") + # NOTE Update to match newer rerun version + rr.connect_grpc("rerun+http://127.0.0.1:9876/proxy") # Connect to Rerun server + print("Connected to Rerun server.") if self.debug_log: setup_blueprint_debug() else: diff --git a/onemap_utils/vis_utils.py b/onemap_utils/vis_utils.py index 2007438..4a42985 100644 --- a/onemap_utils/vis_utils.py +++ b/onemap_utils/vis_utils.py @@ -20,7 +20,7 @@ def log_map_rerun(map_, path, needs_orientation=False): map_ = map_.transpose((1, 0)) map_ = np.flip(map_, axis=0) map_ = monochannel_to_inferno_rgb(map_) - rr.log(path, rr.Image(np.flip(map_, axis=-1)).compress(jpeg_quality=50)) + rr.log(path, rr.Image(np.flip(map_, axis=-1))) def publish_sim_map(sim_map, br, publisher): diff --git a/vision_models/clip_dense.py b/vision_models/clip_dense.py index 3095cb9..fc80566 100644 --- a/vision_models/clip_dense.py +++ b/vision_models/clip_dense.py @@ -105,22 +105,12 @@ def forward_text(self, text_tokenized): class_embeddings = self.clip_model.encode_text(text_tokenized) return F.normalize(class_embeddings, dim=1) - # def forward_text_trt(self, text_tokenized): - # - # - # - # #class_embeddings = self.clip_model.encode_text(text_tokenized) - # - # - # - # - # return F.normalize(torch.tensor(output), dim=1) - def image_forward_torch(self, clip_images: torch.Tensor): with torch.no_grad(): + # NOTE Moved normalization to the beginning to match the paper implementation + clip_images = (clip_images - self.clip_mean) / self.clip_std clip_images = F.interpolate(clip_images, size=self.clip_resolution, mode='bilinear', align_corners=False, ) - clip_images = (clip_images - self.clip_mean) / self.clip_std clip_features = self.clip_model.encode_image(clip_images, dense=True) clip_vis_dense = clip_features["clip_vis_dense"] diff --git a/vision_models/yolov7_model.py b/vision_models/yolov7_model.py index 34acdcb..a12bb00 100644 --- a/vision_models/yolov7_model.py +++ b/vision_models/yolov7_model.py @@ -61,13 +61,14 @@ def detect(self, ): a = time.time() orig_shape = image.shape - - img = cv2.resize( - image, - (self.image_size, int(self.image_size * 0.7)), - interpolation=cv2.INTER_AREA, - ) - img = img + # NOTE removed this, as it was not used in the original code. + # NOTE We added this to experiment with VLFM's yolov7 setup when investigating false positives. + # img = cv2.resize( + # image, + # (self.image_size, int(self.image_size * 0.7)), + # interpolation=cv2.INTER_AREA, + # ) + img = image img = letterbox(img, new_shape=self.image_size)[0] img = img.transpose(2, 0, 1) img = np.ascontiguousarray(img) @@ -86,7 +87,7 @@ def detect(self, # Apply NMS pred = non_max_suppression( pred, - 0.25, + self.confidence_threshold, 0.45, classes=self.classes_oi, agnostic=False, @@ -105,14 +106,13 @@ def detect(self, for i in range(pred.shape[0]): class_name = COCO_CLASSES[int(pred[i, 5])] if class_name == self.classes[0]: - if logits[i] > self.confidence_threshold: - box = boxes[i] - if not (box[0].item() == box[2].item() or box[1].item() == box[3].item()): - preds["boxes"].append([box[0].item(), box[1].item(), box[2].item(), box[3].item()]) - preds["scores"].append(logits[i]) - print(logits[i]) + box = boxes[i] + if not (box[0].item() == box[2].item() or box[1].item() == box[3].item()): + preds["boxes"].append([box[0].item(), box[1].item(), box[2].item(), box[3].item()]) + preds["scores"].append(logits[i]) + # print(logits[i]) - print(f"YOLO forward: {time.time() - a}") + # print(f"YOLO forward: {time.time() - a}") return preds if __name__ == "__main__":