From 7274062824f2596bda9f3d25fa5e7d61c9498a2c Mon Sep 17 00:00:00 2001
From: Finn Lukas Busch <finn.lukas.busch@gmail.com>
Date: Wed, 30 Jul 2025 10:55:38 +0200
Subject: [PATCH] Fixed things to match evaluation setup. Reverted some changes
 introduced during refactoring.

---
 config/mon/mapping_conf_sim.yaml |  2 +-
 eval/habitat_evaluator.py        |  1 -
 mapping/feature_map.py           |  4 ++--
 mapping/navigator.py             | 28 +++++++++++++++++-----------
 mapping/rerun_logger.py          |  4 +++-
 onemap_utils/vis_utils.py        |  2 +-
 vision_models/clip_dense.py      | 14 ++------------
 vision_models/yolov7_model.py    | 30 +++++++++++++++---------------
 8 files changed, 41 insertions(+), 44 deletions(-)

diff --git a/config/mon/mapping_conf_sim.yaml b/config/mon/mapping_conf_sim.yaml
index 3f496fc..dfb82df 100644
--- a/config/mon/mapping_conf_sim.yaml
+++ b/config/mon/mapping_conf_sim.yaml
@@ -14,4 +14,4 @@ MappingConf: # [all in feature_map.py]
   obstacle_max: 0.5
   filter_stairs: True
   floor_level: -0.88
-  floor_threshold: -1.1
+  floor_threshold: -1.1
\ No newline at end of file
diff --git a/eval/habitat_evaluator.py b/eval/habitat_evaluator.py
index 4d2b5c3..57e12d7 100644
--- a/eval/habitat_evaluator.py
+++ b/eval/habitat_evaluator.py
@@ -302,7 +302,6 @@ def evaluate(self):
         results = []
         # restart at 930
         for n_ep, episode in enumerate(self.episodes):
-        # for n_ep, episode in enumerate(self.episodes[492:]):
             poses = []
             results.append(Result.FAILURE_OOT)
             steps = 0
diff --git a/mapping/feature_map.py b/mapping/feature_map.py
index 77aba0f..7055ae6 100644
--- a/mapping/feature_map.py
+++ b/mapping/feature_map.py
@@ -384,8 +384,8 @@ def project_dense(self,
         # TODO this will be wrong for sub-sampled as e.g. fx will be wrong
         depth_image_smoothed = depth_aligned
 
-        mask = depth_image_smoothed == float('inf')
-        depth_image_smoothed[mask] = depth_image_smoothed[~mask].max()
+        # mask = depth_image_smoothed == float('inf')
+        # depth_image_smoothed[mask] = depth_image_smoothed[~mask].max()
         kernel_size = 11
         pad = kernel_size // 2
 
diff --git a/mapping/navigator.py b/mapping/navigator.py
index 4c689c4..9211c26 100644
--- a/mapping/navigator.py
+++ b/mapping/navigator.py
@@ -324,14 +324,16 @@ def compute_best_path(self,
                 best_nav_goal = self.nav_goals[best_idx]
                 self.cyclic_checker.add_state_action(start, best_nav_goal.get_descr_point(), top_two_vals)
                 if isinstance(best_nav_goal, Frontier):
-                    self.path = Planning.compute_to_goal(start, self.one_map.navigable_map & (
-                            self.one_map.confidence_map > 0).cpu().numpy(),
+                    # NOTE Allow more aggressive planning through unknown regions
+                    self.path = Planning.compute_to_goal(start, self.one_map.navigable_map, # & (
+                            #self.one_map.confidence_map > 0).cpu().numpy(),
                                                          (self.one_map.confidence_map > 0).cpu().numpy(),
                                                          best_nav_goal.get_descr_point(),
                                                          self.obstcl_kernel_size, 2)
                 elif isinstance(best_nav_goal, Cluster):
-                    self.path = Planning.compute_to_goal(start, self.one_map.navigable_map & (
-                            self.one_map.confidence_map > 0).cpu().numpy(),
+                    # NOTE Allow more aggressive planning through unknown regions
+                    self.path = Planning.compute_to_goal(start, self.one_map.navigable_map,# & (
+                            # self.one_map.confidence_map > 0).cpu().numpy(),
                                                          (self.one_map.confidence_map > 0).cpu().numpy(),
                                                          best_nav_goal.get_descr_point(),
                                                          # TODO we might want to consider all the points of the cluster!
@@ -488,7 +490,7 @@ def add_data(self,
 
         px, py = self.one_map.metric_to_px(x, y)
         if self.last_pose:
-            if np.linalg.norm(np.array([px, py, yaw]) - np.array(self.last_pose)) < 0.01:
+            if px == self.last_pose[0] and py == self.last_pose[1] and abs(yaw - self.last_pose[2]) < 0.001:
                 if self.path is not None:
                     self.stuck_at_cell_counter += 1
             else:
@@ -610,12 +612,16 @@ def add_data(self,
                                 self.chosen_detection = (x_masked[best], y_masked[best])
                     else:
                         best = np.argmin(depths)
-                        if self.object_detected:
-                            if adjusted_score[x_id[best], y_id[best]] < \
-                                    adjusted_score[self.chosen_detection[0], self.chosen_detection[1]] * 1.1:
-                                object_valid = False
-                        if object_valid:
-                            self.chosen_detection = (x_id[best], y_id[best])
+                        # NOTE More aggressive reselection of the best point
+                        # ---- Commented out to match single-object results ---
+                        # if self.object_detected:
+                        #     if adjusted_score[x_id[best], y_id[best]] < \
+                        #             adjusted_score[self.chosen_detection[0], self.chosen_detection[1]] * 1.1:
+                        #         object_valid = False
+                        # if object_valid:
+                            # self.chosen_detection = (x_id[best], y_id[best])
+                        # --- End of comment ---
+                        self.chosen_detection = (x_id[best], y_id[best])
                     if object_valid:
                         self.object_detected = True
                         self.compute_best_path(start)
diff --git a/mapping/rerun_logger.py b/mapping/rerun_logger.py
index 0ab0431..cee901c 100644
--- a/mapping/rerun_logger.py
+++ b/mapping/rerun_logger.py
@@ -275,7 +275,9 @@ def __init__(self, mapper: Navigator, to_file: bool, save_path: str, debug: bool
         if self.to_file:
             rr.save(save_path)
         else:
-            rr.connect("127.0.0.1:9876")
+            # NOTE Update to match newer rerun version
+            rr.connect_grpc("rerun+http://127.0.0.1:9876/proxy") # Connect to Rerun server
+            print("Connected to Rerun server.")
         if self.debug_log:
             setup_blueprint_debug()
         else:
diff --git a/onemap_utils/vis_utils.py b/onemap_utils/vis_utils.py
index 2007438..4a42985 100644
--- a/onemap_utils/vis_utils.py
+++ b/onemap_utils/vis_utils.py
@@ -20,7 +20,7 @@ def log_map_rerun(map_, path, needs_orientation=False):
         map_ = map_.transpose((1, 0))
         map_ = np.flip(map_, axis=0)
     map_ = monochannel_to_inferno_rgb(map_)
-    rr.log(path, rr.Image(np.flip(map_, axis=-1)).compress(jpeg_quality=50))
+    rr.log(path, rr.Image(np.flip(map_, axis=-1)))
 
 
 def publish_sim_map(sim_map, br, publisher):
diff --git a/vision_models/clip_dense.py b/vision_models/clip_dense.py
index 3095cb9..fc80566 100644
--- a/vision_models/clip_dense.py
+++ b/vision_models/clip_dense.py
@@ -105,22 +105,12 @@ def forward_text(self, text_tokenized):
             class_embeddings = self.clip_model.encode_text(text_tokenized)
             return F.normalize(class_embeddings, dim=1)
 
-    # def forward_text_trt(self, text_tokenized):
-    #
-    #
-    #
-    # #class_embeddings = self.clip_model.encode_text(text_tokenized)
-    #
-    #
-    #
-    #
-    # return F.normalize(torch.tensor(output), dim=1)
-
     def image_forward_torch(self, clip_images: torch.Tensor):
         with torch.no_grad():
+            # NOTE Moved normalization to the beginning to match the paper implementation
+            clip_images = (clip_images - self.clip_mean) / self.clip_std
             clip_images = F.interpolate(clip_images, size=self.clip_resolution, mode='bilinear',
                                         align_corners=False, )
-            clip_images = (clip_images - self.clip_mean) / self.clip_std
             clip_features = self.clip_model.encode_image(clip_images, dense=True)
             clip_vis_dense = clip_features["clip_vis_dense"]
 
diff --git a/vision_models/yolov7_model.py b/vision_models/yolov7_model.py
index 34acdcb..a12bb00 100644
--- a/vision_models/yolov7_model.py
+++ b/vision_models/yolov7_model.py
@@ -61,13 +61,14 @@ def detect(self,
                ):
         a = time.time()
         orig_shape = image.shape
-
-        img = cv2.resize(
-             image,
-             (self.image_size, int(self.image_size * 0.7)),
-             interpolation=cv2.INTER_AREA,
-        )
-        img = img
+        # NOTE removed this, as it was not used in the original code. 
+        # NOTE We added this to experiment with VLFM's yolov7 setup when investigating false positives.
+        # img = cv2.resize(
+        #      image,
+        #      (self.image_size, int(self.image_size * 0.7)),
+        #      interpolation=cv2.INTER_AREA,
+        # )
+        img = image
         img = letterbox(img, new_shape=self.image_size)[0]
         img = img.transpose(2, 0, 1)
         img = np.ascontiguousarray(img)
@@ -86,7 +87,7 @@ def detect(self,
         # Apply NMS
         pred = non_max_suppression(
             pred,
-            0.25,
+            self.confidence_threshold,
             0.45,
             classes=self.classes_oi,
             agnostic=False,
@@ -105,14 +106,13 @@ def detect(self,
         for i in range(pred.shape[0]):
             class_name = COCO_CLASSES[int(pred[i, 5])]
             if class_name == self.classes[0]:
-                if logits[i] > self.confidence_threshold:
-                    box = boxes[i]
-                    if not (box[0].item() == box[2].item() or box[1].item() == box[3].item()):
-                        preds["boxes"].append([box[0].item(), box[1].item(), box[2].item(), box[3].item()])
-                        preds["scores"].append(logits[i])
-                        print(logits[i])
+                box = boxes[i]
+                if not (box[0].item() == box[2].item() or box[1].item() == box[3].item()):
+                    preds["boxes"].append([box[0].item(), box[1].item(), box[2].item(), box[3].item()])
+                    preds["scores"].append(logits[i])
+                    # print(logits[i])
 
-        print(f"YOLO forward: {time.time() - a}")
+        # print(f"YOLO forward: {time.time() - a}")
         return preds
 
 if __name__ == "__main__":