diff --git a/.github/workflows/poetry-test.yml b/.github/workflows/poetry-test.yml
index 6aed9970b..88d4becfc 100644
--- a/.github/workflows/poetry-test.yml
+++ b/.github/workflows/poetry-test.yml
@@ -46,7 +46,7 @@ jobs:
           jackd -d dummy -r 44100 &
 
       - name: Install python dependencies
-        run: poetry install --with openset,nomad,s2s,simbench
+        run: poetry install --with perception,nomad,s2s,simbench
 
       - name: Update rosdep
         shell: bash
diff --git a/README.md b/README.md
index 3ff7ca386..b1290385c 100644
--- a/README.md
+++ b/README.md
@@ -50,7 +50,7 @@ guide and tutorials. 📚
 -   [x] rai_tts: Text-to-speech models and tools.
 -   [x] rai_sim: Package for connecting RAI to simulation environments.
 -   [x] rai_bench: Benchmarking suite for RAI. Test agents, models, tools, simulators, etc.
--   [x] rai_openset: Openset detection models and tools.
+-   [x] rai_perception: Object detection tools based on open-set models and machine learning techniques.
 -   [x] rai_nomad: Integration with NoMaD for navigation.
 -   [ ] rai_finetune: Finetune LLMs on your embodied data.
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 494092677..18053ecbd 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -25,7 +25,7 @@ RUN apt-get update && apt-get install -y \
     wget
 
 # Install Poetry
-RUN curl -sSL https://install.python-poetry.org | python3 - --version 1.8.4
+RUN curl -sSL https://install.python-poetry.org | python3 - --version 2.1.1
 ENV PATH="/root/.local/bin:$PATH"
 
 # Clone and setup RAI
diff --git a/docs/ROS_2/ros_packages.md b/docs/ROS_2/ros_packages.md
index b053af5cf..db8ca8ded 100644
--- a/docs/ROS_2/ros_packages.md
+++ b/docs/ROS_2/ros_packages.md
@@ -2,9 +2,9 @@
 
 RAI includes multiple configurable ROS 2 packages.
 
-| Package                 | Description                                                                                                                                                                                           | Documentation                                   |
-| ----------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------- |
-| **rai_open_set_vision** | Package enabling use of [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) and [GroundedSAM](https://github.com/IDEA-Research/Grounded-SAM-2) -- an open-set detection model with ROS 2. | [rai_open_set_vision](../extensions/openset.md) |
-| **rai_nomad**           | Package integrating [NoMaD](https://general-navigation-models.github.io/nomad/index.html) -- an exploration model with ROS2.                                                                          | [rai_nomad](../extensions/nomad.md)             |
-| **rai_interfaces**      | Definition of custom messages and services used in RAI.                                                                                                                                               |                                                 |
-| **rai_bringup**         | Launch files to run RAI.                                                                                                                                                                              |                                                 |
+| Package            | Description                                                                                                                                                                                                                                | Documentation                                 |
+| ------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------------------------- |
+| **rai_perception** | Object detection tools based on open-set models and machine learning techniques. Integrates [GroundingDINO](https://github.com/IDEA-Research/GroundingDINO) and [GroundedSAM](https://github.com/IDEA-Research/Grounded-SAM-2) with ROS 2. | [rai_perception](../extensions/perception.md) |
+| **rai_nomad**      | Package integrating [NoMaD](https://general-navigation-models.github.io/nomad/index.html) -- an exploration model with ROS2.                                                                                                               | [rai_nomad](../extensions/nomad.md)           |
+| **rai_interfaces** | Definition of custom messages and services used in RAI.                                                                                                                                                                                    |                                               |
+| **rai_bringup**    | Launch files to run RAI.                                                                                                                                                                                                                   |                                               |
diff --git a/docs/demos/manipulation.md b/docs/demos/manipulation.md
index 02ecd6a19..5b9199702 100644
--- a/docs/demos/manipulation.md
+++ b/docs/demos/manipulation.md
@@ -22,7 +22,7 @@ manipulation techniques.
 2. Download additional dependencies:
 
     ```shell
-    poetry install --with openset
+    poetry install --with perception
     vcs import < demos.repos
     rosdep install --from-paths src/examples/rai-manipulation-demo/ros2_ws/src --ignore-src -r -y
     ```
diff --git a/docs/demos/rosbot_xl.md b/docs/demos/rosbot_xl.md
index 778373427..49a820d37 100644
--- a/docs/demos/rosbot_xl.md
+++ b/docs/demos/rosbot_xl.md
@@ -30,7 +30,7 @@ platform in a nice apartment.
     sudo apt install ros-${ROS_DISTRO}-navigation2 ros-${ROS_DISTRO}-nav2-bringup
     vcs import < demos.repos
     rosdep install --from-paths src --ignore-src -r -y
-    poetry install --with openset
+    poetry install --with perception
     ```
 
 !!! tip "Alternative: Demo source build"
@@ -70,7 +70,7 @@ platform in a nice apartment.
 
 The rosbot demo utilizes several components:
 
-1. Vision processing using Grounded SAM 2 and Grounding DINO for object detection and segmentation. See [RAI OpenSet Vision](../extensions/openset.md).
+1. Vision processing using Grounded SAM 2 and Grounding DINO for object detection and segmentation. See [RAI perception](../extensions/perception.md).
 2. RAI agent to process the request and interact with environment via [tool-calling](https://python.langchain.com/docs/concepts/tool_calling/) mechanism.
 3. Navigation is enabled via [nav2 toolkit](../API_documentation/langchain_integration/ROS_2_tools.md#nav2), which interacts with [ROS 2 nav2](https://docs.nav2.org/) asynchronously by calling [ros2 actions](https://docs.ros.org/en/jazzy/Tutorials/Beginner-CLI-Tools/Understanding-ROS2-Actions/Understanding-ROS2-Actions.html).
 4. Embodiment of the Rosbot is achieved using [RAI Whoami](../tutorials/create_robots_whoami.md) module. This makes RAI agent aware of the hardware platform and its capabilities.
diff --git a/docs/extensions/openset.md b/docs/extensions/perception.md
similarity index 75%
rename from docs/extensions/openset.md
rename to docs/extensions/perception.md
index 33f2c8542..f648892ad 100644
--- a/docs/extensions/openset.md
+++ b/docs/extensions/perception.md
@@ -1,4 +1,4 @@
---8<-- "src/rai_extensions/rai_open_set_vision/README.md:sec1"
+--8<-- "src/rai_extensions/rai_perception/README.md:sec1"
 Agents create two ROS 2 Nodes: `grounding_dino` and `grounded_sam` using [ROS2Connector](../API_documentation/connectors/ROS_2_Connectors.md).
 These agents can be triggered by ROS2 services:
 
@@ -15,15 +15,15 @@ These agents can be triggered by ROS2 services:
 
 ## RAI Tools
 
-`rai_open_set_vision` package contains tools that can be used by [RAI LLM agents](../tutorials/walkthrough.md)
+`rai_perception` package contains tools that can be used by [RAI LLM agents](../tutorials/walkthrough.md)
 enhance their perception capabilities. For more information on RAI Tools see
 [Tool use and development](../tutorials/tools.md) tutorial.
 
---8<-- "src/rai_extensions/rai_open_set_vision/README.md:sec3"
+--8<-- "src/rai_extensions/rai_perception/README.md:sec3"
 
 > [!TIP]
 >
 > you can try example below with [rosbotxl demo](../demos/rosbot_xl.md) binary.
 > The binary exposes `/camera/camera/color/image_raw` and `/camera/camera/depth/image_raw` topics.
 
---8<-- "src/rai_extensions/rai_open_set_vision/README.md:sec4"
+--8<-- "src/rai_extensions/rai_perception/README.md:sec4"
diff --git a/docs/setup/install.md b/docs/setup/install.md
index ad4bca1ff..9ed94c0d7 100644
--- a/docs/setup/install.md
+++ b/docs/setup/install.md
@@ -43,7 +43,7 @@ There are two ways to start using RAI:
 
 !!! important "Package availability"
 
-    `rai_openset` and `rai_nomad` are not yet available through pip. If your workflow relies on openset detection or NoMaD integration, please refer to the
+    `rai_perception` and `rai_nomad` are not yet available through pip. If your workflow relies on openset detection or NoMaD integration, please refer to the
     [developer environment instructions](#setting-up-developer-environment) setup.
 
     `rai_interfaces` is available as `apt` package. However, due to package distribution delays, the latest version may not be immediately available. If you encounter missing imports, please build `rai_interfaces` from [source](https://github.com/RobotecAI/rai_interfaces).
@@ -113,14 +113,14 @@ rosdep install --from-paths src --ignore-src -r -y
     install additional dependencies:
 
     ```bash
-    poetry install --with openset,nomad,s2s,simbench # or `--all-groups` for full setup
+    poetry install --with perception,nomad,s2s,simbench # or `--all-groups` for full setup
     ```
 
     | Group Name | Description | Dependencies |
     |------------|-------------|--------------|
     | [s2s][s2s] | Speech-to-Speech functionality | rai_asr, rai_tts |
     | [simbench][simbench] | Simulation and benchmarking tools | rai_sim, rai_bench |
-    | [openset][openset] | Open-set detection capabilities | groundingdino, groundedsam |
+    | [perception][perception] | Open-set detection capabilities | groundingdino, groundedsam |
     | [nomad][nomad] | Visual Navigation - NoMaD integration | visualnav_transformer |
     | docs | Documentation-related dependencies | mkdocs, mkdocs-material, pymdown-extensions |
 
@@ -168,5 +168,5 @@ Pick your local solution or service provider and follow one of these guides:
 
 [s2s]: ../tutorials/voice_interface.md
 [simbench]: ../simulation_and_benchmarking/overview.md
-[openset]: ../extensions/openset.md
+[perception]: ../extensions/perception.md
 [nomad]: ../extensions/nomad.md
diff --git a/examples/manipulation-demo.py b/examples/manipulation-demo.py
index 29d7832cd..4f8cdabef 100644
--- a/examples/manipulation-demo.py
+++ b/examples/manipulation-demo.py
@@ -30,7 +30,7 @@
     ResetArmTool,
 )
 from rai.tools.ros2.simple import GetROS2ImageConfiguredTool
-from rai_open_set_vision.tools import GetGrabbingPointTool
+from rai_perception.tools import GetGrabbingPointTool
 
 from rai_whoami.models import EmbodimentInfo
 
diff --git a/examples/rosbot-xl-demo.py b/examples/rosbot-xl-demo.py
index 33a97513e..077cbd31e 100644
--- a/examples/rosbot-xl-demo.py
+++ b/examples/rosbot-xl-demo.py
@@ -32,7 +32,7 @@
     Nav2Toolkit,
 )
 from rai.tools.time import WaitForSecondsTool
-from rai_open_set_vision.tools import GetGrabbingPointTool
+from rai_perception.tools import GetGrabbingPointTool
 
 from rai_whoami import EmbodimentInfo
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 15b046711..59342b9c2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -128,7 +128,7 @@ nav:
     - RAI Bench: simulation_and_benchmarking/rai_bench.md
     - Simulators: simulation_and_benchmarking/simulators.md
   - Extensions:
-    - OpenSet Vision: extensions/openset.md
+    - Perception: extensions/perception.md
     - NoMaD Visual Navigation: extensions/nomad.md
   - FAQ:
     - Overview: faq/faq.md
diff --git a/poetry.lock b/poetry.lock
index d30fe9d1f..2bca1ec9f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -51,7 +51,7 @@ version = "2.4.0"
 description = "Addict is a dictionary whose items can be set using both attribute and item syntax."
 optional = false
 python-versions = "*"
-groups = ["openset"]
+groups = ["perception"]
 files = [
     {file = "addict-2.4.0-py3-none-any.whl", hash = "sha256:249bb56bbfd3cdc2a004ea0ff4c2b6ddc84d53bc2194761636eb314d5cfa5dfc"},
     {file = "addict-2.4.0.tar.gz", hash = "sha256:b3b2210e0e067a281f5646c8c5db92e99b7231ea8b0eb5f74dbdf9e259d4e494"},
@@ -252,7 +252,7 @@ version = "4.9.3"
 description = "ANTLR 4.9.3 runtime for Python 3.7"
 optional = false
 python-versions = "*"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b"},
 ]
@@ -564,7 +564,7 @@ version = "2025.7.14"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "certifi-2025.7.14-py3-none-any.whl", hash = "sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2"},
     {file = "certifi-2025.7.14.tar.gz", hash = "sha256:8ea99dbdfaaf2ba2f9bac77b9249ef62ec5218e7c2b2e903378ed5fccf765995"},
@@ -669,7 +669,7 @@ version = "3.4.2"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941"},
     {file = "charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd"},
@@ -827,12 +827,12 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
-markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", nomad = "sys_platform == \"win32\" or platform_system == \"Windows\"", openset = "platform_system == \"Windows\"", simbench = "platform_system == \"Windows\""}
+markers = {main = "sys_platform == \"win32\" or platform_system == \"Windows\"", nomad = "sys_platform == \"win32\" or platform_system == \"Windows\"", perception = "platform_system == \"Windows\"", simbench = "platform_system == \"Windows\""}
 
 [[package]]
 name = "coloredlogs"
@@ -909,7 +909,7 @@ version = "1.3.2"
 description = "Python library for calculating contours of 2D quadrilateral grids"
 optional = false
 python-versions = ">=3.10"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "contourpy-1.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba38e3f9f330af820c4b27ceb4b9c7feee5fe0493ea53a8720f4792667465934"},
     {file = "contourpy-1.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dc41ba0714aa2968d1f8674ec97504a8f7e334f48eeacebcaa6256213acb0989"},
@@ -1164,7 +1164,7 @@ version = "0.12.1"
 description = "Composable style cycles"
 optional = false
 python-versions = ">=3.8"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"},
     {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"},
@@ -1358,7 +1358,7 @@ version = "0.7.1"
 description = "XML bomb protection for Python stdlib modules"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
-groups = ["openset"]
+groups = ["perception"]
 files = [
     {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
     {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
@@ -1668,6 +1668,7 @@ files = [
     {file = "faiss_cpu-1.11.0.post1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dc12b3f89cf48be3f2a20b37f310c3f1a7a5708fdf705f88d639339a24bb590b"},
     {file = "faiss_cpu-1.11.0.post1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:773fa45aa98a210ab4e2c17c1b5fb45f6d7e9acb4979c9a0b320b678984428ac"},
     {file = "faiss_cpu-1.11.0.post1-cp39-cp39-win_amd64.whl", hash = "sha256:6240c4b1551eedc07e76813c2e14a1583a1db6c319a92a3934bf212d0e4c7791"},
+    {file = "faiss_cpu-1.11.0.post1.tar.gz", hash = "sha256:06b1ea9ddec9e4d9a41c8ef7478d493b08d770e9a89475056e963081eed757d1"},
 ]
 
 [package.dependencies]
@@ -1717,7 +1718,7 @@ version = "3.18.0"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "nomad", "openset", "s2s"]
+groups = ["main", "nomad", "perception", "s2s"]
 files = [
     {file = "filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de"},
     {file = "filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2"},
@@ -1746,7 +1747,7 @@ version = "4.59.0"
 description = "Tools to manipulate font files"
 optional = false
 python-versions = ">=3.9"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "fonttools-4.59.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:524133c1be38445c5c0575eacea42dbd44374b310b1ffc4b60ff01d881fabb96"},
     {file = "fonttools-4.59.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:21e606b2d38fed938dde871c5736822dd6bda7a4631b92e509a1f5cd1b90c5df"},
@@ -1925,7 +1926,7 @@ version = "2025.3.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "fsspec-2025.3.0-py3-none-any.whl", hash = "sha256:efb87af3efa9103f94ca91a7f8cb7a4df91af9f74fc106c9c7ea0efd7277c1b3"},
     {file = "fsspec-2025.3.0.tar.gz", hash = "sha256:a935fd1ea872591f2b5148907d103488fc523295e6c64b835cfad8c3eca44972"},
@@ -2366,7 +2367,7 @@ version = "1.1.5"
 description = "Fast transfer of large files with the Hugging Face Hub."
 optional = false
 python-versions = ">=3.8"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\""
 files = [
     {file = "hf_xet-1.1.5-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:f52c2fa3635b8c37c7764d8796dfa72706cc4eded19d638331161e82b0792e23"},
@@ -2447,7 +2448,7 @@ version = "0.33.4"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "huggingface_hub-0.33.4-py3-none-any.whl", hash = "sha256:09f9f4e7ca62547c70f8b82767eefadd2667f4e116acba2e3e62a5a81815a7bb"},
     {file = "huggingface_hub-0.33.4.tar.gz", hash = "sha256:6af13478deae120e765bfd92adad0ae1aec1ad8c439b46f23058ad5956cbca0a"},
@@ -2501,7 +2502,7 @@ version = "1.3.2"
 description = "A framework for elegantly configuring complex applications"
 optional = false
 python-versions = "*"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824"},
     {file = "hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b"},
@@ -2533,7 +2534,7 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
@@ -2704,7 +2705,7 @@ version = "2021.4.0"
 description = "Intel OpenMP* Runtime Library"
 optional = false
 python-versions = "*"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Windows\""
 files = [
     {file = "intel_openmp-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:41c01e266a7fdb631a7609191709322da2bbf24b252ba763f125dd651bcc7675"},
@@ -2720,7 +2721,7 @@ version = "0.1.10"
 description = "A library for providing I/O abstraction."
 optional = false
 python-versions = ">=3.6"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "iopath-0.1.10.tar.gz", hash = "sha256:3311c16a4d9137223e20f141655759933e1eda24f8bff166af834af3c645ef01"},
 ]
@@ -2844,7 +2845,7 @@ version = "3.1.6"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"},
     {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"},
@@ -3081,7 +3082,7 @@ version = "1.4.8"
 description = "A fast implementation of the Cassowary constraint solver"
 optional = false
 python-versions = ">=3.10"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "kiwisolver-1.4.8-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88c6f252f6816a73b1f8c904f7bbe02fd67c09a69f7cb8a0eecdbf5ce78e63db"},
     {file = "kiwisolver-1.4.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c72941acb7b67138f35b879bbe85be0f6c6a70cab78fe3ef6db9c024d9223e5b"},
@@ -3605,7 +3606,7 @@ version = "3.0.2"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
     {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
@@ -3696,7 +3697,7 @@ version = "3.10.3"
 description = "Python plotting package"
 optional = false
 python-versions = ">=3.10"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "matplotlib-3.10.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:213fadd6348d106ca7db99e113f1bea1e65e383c3ba76e8556ba4a3054b65ae7"},
     {file = "matplotlib-3.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d3bec61cb8221f0ca6313889308326e7bb303d0d302c5cc9e523b2f2e6c73deb"},
@@ -3965,7 +3966,7 @@ version = "2021.4.0"
 description = "Intel® oneAPI Math Kernel Library"
 optional = false
 python-versions = "*"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Windows\""
 files = [
     {file = "mkl-2021.4.0-py2.py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.whl", hash = "sha256:67460f5cd7e30e405b54d70d1ed3ca78118370b65f7327d495e9c8847705e2fb"},
@@ -3997,7 +3998,7 @@ version = "1.3.0"
 description = "Python library for arbitrary-precision floating-point arithmetic"
 optional = false
 python-versions = "*"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
     {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
@@ -4281,7 +4282,7 @@ version = "3.4.2"
 description = "Python package for creating and manipulating graphs and networks"
 optional = false
 python-versions = ">=3.10"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "networkx-3.4.2-py3-none-any.whl", hash = "sha256:df5d4365b724cf81b8c6a7312509d0c22386097011ad1abe274afd5e9d3bbc5f"},
     {file = "networkx-3.4.2.tar.gz", hash = "sha256:307c3669428c5362aab27c8a1260aa8f47c4e91d3891f48be0141738d8d053e1"},
@@ -4386,7 +4387,7 @@ version = "1.26.4"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
     {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
@@ -4432,7 +4433,7 @@ version = "12.1.3.1"
 description = "CUBLAS native runtime libraries"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
@@ -4445,7 +4446,7 @@ version = "12.1.105"
 description = "CUDA profiling tools runtime libs."
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
@@ -4458,7 +4459,7 @@ version = "12.1.105"
 description = "NVRTC native runtime libraries"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
@@ -4471,7 +4472,7 @@ version = "12.1.105"
 description = "CUDA Runtime native Libraries"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
@@ -4484,7 +4485,7 @@ version = "8.9.2.26"
 description = "cuDNN runtime libraries"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl", hash = "sha256:5ccb288774fdfb07a7e7025ffec286971c06d8d7b4fb162525334616d7629ff9"},
@@ -4499,7 +4500,7 @@ version = "11.0.2.54"
 description = "CUFFT native runtime libraries"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
@@ -4512,7 +4513,7 @@ version = "10.3.2.106"
 description = "CURAND native runtime libraries"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
@@ -4525,7 +4526,7 @@ version = "11.4.5.107"
 description = "CUDA solver native runtime libraries"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
@@ -4543,7 +4544,7 @@ version = "12.1.0.106"
 description = "CUSPARSE native runtime libraries"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
@@ -4559,7 +4560,7 @@ version = "2.20.5"
 description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
@@ -4572,7 +4573,7 @@ version = "12.9.86"
 description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nvjitlink_cu12-12.9.86-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:e3f1171dbdc83c5932a45f0f4c99180a70de9bd2718c1ab77d14104f6d7147f9"},
@@ -4586,7 +4587,7 @@ version = "12.1.105"
 description = "NVIDIA Tools Extension"
 optional = false
 python-versions = ">=3"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
@@ -4615,7 +4616,7 @@ version = "2.3.0"
 description = "A flexible configuration library"
 optional = false
 python-versions = ">=3.6"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b"},
     {file = "omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7"},
@@ -4779,7 +4780,7 @@ version = "4.11.0.86"
 description = "Wrapper package for OpenCV python bindings."
 optional = false
 python-versions = ">=3.6"
-groups = ["main", "openset", "simbench"]
+groups = ["main", "perception", "simbench"]
 files = [
     {file = "opencv-python-4.11.0.86.tar.gz", hash = "sha256:03d60ccae62304860d232272e4a4fda93c39d595780cb40b161b310244b736a4"},
     {file = "opencv_python-4.11.0.86-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:432f67c223f1dc2824f5e73cdfcd9db0efc8710647d4e813012195dc9122a52a"},
@@ -5070,7 +5071,7 @@ version = "24.2"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"},
     {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"},
@@ -5267,7 +5268,7 @@ version = "11.3.0"
 description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "nomad", "openset", "simbench"]
+groups = ["main", "nomad", "perception", "simbench"]
 files = [
     {file = "pillow-11.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b9c17fd4ace828b3003dfd1e30bff24863e0eb59b535e8f80194d9cc7ecf860"},
     {file = "pillow-11.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:65dc69160114cdd0ca0f35cb434633c75e8e7fad4cf855177a05bf38678f73ad"},
@@ -5392,7 +5393,7 @@ version = "4.3.8"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "docs", "nomad", "openset", "s2s"]
+groups = ["main", "docs", "nomad", "perception", "s2s"]
 files = [
     {file = "platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4"},
     {file = "platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc"},
@@ -5471,7 +5472,7 @@ version = "3.2.0"
 description = "Wraps the portalocker recipe for easy usage"
 optional = false
 python-versions = ">=3.9"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968"},
     {file = "portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac"},
@@ -5838,7 +5839,7 @@ version = "2.0.10"
 description = "Official APIs for the MS-COCO dataset"
 optional = false
 python-versions = ">=3.9"
-groups = ["openset"]
+groups = ["perception"]
 files = [
     {file = "pycocotools-2.0.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:94d558e6a4b92620dad1684b74b6c1404e20d5ed3b4f3aed64ad817d5dd46c72"},
     {file = "pycocotools-2.0.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c4d61959f505f1333afd1666ece1a9f8dad318de160c56c7d03f22d7b5556478"},
@@ -6200,7 +6201,7 @@ version = "3.2.3"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 optional = false
 python-versions = ">=3.9"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf"},
     {file = "pyparsing-3.2.3.tar.gz", hash = "sha256:b9c13f1ab8b3b542f72e28f634bad4de758ab3ce4546e4301970ad6fa77c38be"},
@@ -6345,7 +6346,7 @@ version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -6410,7 +6411,7 @@ version = "311"
 description = "Python for Window Extensions"
 optional = false
 python-versions = "*"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "pywin32-311-cp310-cp310-win32.whl", hash = "sha256:d03ff496d2a0cd4a5893504789d4a15399133fe82517455e78bad62efbb7f0a3"},
     {file = "pywin32-311-cp310-cp310-win_amd64.whl", hash = "sha256:797c2772017851984b97180b0bebe4b620bb86328e8a884bb626156295a63b3b"},
@@ -6433,7 +6434,7 @@ files = [
     {file = "pywin32-311-cp39-cp39-win_amd64.whl", hash = "sha256:e0c4cfb0621281fe40387df582097fd796e80430597cb9944f0ae70447bacd91"},
     {file = "pywin32-311-cp39-cp39-win_arm64.whl", hash = "sha256:62ea666235135fee79bb154e695f3ff67370afefd71bd7fea7512fc70ef31e3d"},
 ]
-markers = {nomad = "(sys_platform == \"win32\" or platform_system == \"Windows\") and (platform_python_implementation != \"PyPy\" or platform_system == \"Windows\")", openset = "platform_system == \"Windows\""}
+markers = {nomad = "(sys_platform == \"win32\" or platform_system == \"Windows\") and (platform_python_implementation != \"PyPy\" or platform_system == \"Windows\")", perception = "platform_system == \"Windows\""}
 
 [[package]]
 name = "pyyaml"
@@ -6441,7 +6442,7 @@ version = "6.0.2"
 description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -6627,7 +6628,7 @@ url = "src/rai_bench"
 
 [[package]]
 name = "rai-core"
-version = "2.5.4"
+version = "2.5.5"
 description = "Core functionality for RAI framework"
 optional = false
 python-versions = "^3.10, <3.13"
@@ -6663,6 +6664,26 @@ transforms3d = "^0.4.1"
 type = "directory"
 url = "src/rai_core"
 
+[[package]]
+name = "rai-perception"
+version = "0.1.0"
+description = "Package enabling perception capabilities for RAI"
+optional = false
+python-versions = "*"
+groups = ["perception"]
+files = []
+develop = true
+
+[package.dependencies]
+rf-groundingdino = "^0.2.0"
+sam2 = {git = "https://github.com/RobotecAI/Grounded-SAM-2", branch = "main"}
+torch = "^2.3.1"
+torchvision = "^0.18.1"
+
+[package.source]
+type = "directory"
+url = "src/rai_extensions/rai_perception"
+
 [[package]]
 name = "rai-s2s"
 version = "1.0.0"
@@ -6866,7 +6887,7 @@ version = "2024.11.6"
 description = "Alternative regular expression module, to replace re."
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"},
     {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"},
@@ -6970,7 +6991,7 @@ version = "2.32.4"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c"},
     {file = "requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422"},
@@ -7008,7 +7029,7 @@ version = "0.2.0"
 description = "open-set object detector"
 optional = false
 python-versions = "*"
-groups = ["openset"]
+groups = ["perception"]
 files = [
     {file = "rf_groundingdino-0.2.0-py2.py3-none-any.whl", hash = "sha256:a7bd83412538c304ae191a78390bcca11ee3479568d7cafe28acbab9a79f1149"},
     {file = "rf_groundingdino-0.2.0.tar.gz", hash = "sha256:be08ad94cd25a4959468eca2fabbeb8900c65573e3d05d3b659811c6f33da100"},
@@ -7234,7 +7255,7 @@ version = "0.5.3"
 description = ""
 optional = false
 python-versions = ">=3.7"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "safetensors-0.5.3-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:bd20eb133db8ed15b40110b7c00c6df51655a2998132193de2f75f72d99c7073"},
     {file = "safetensors-0.5.3-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:21d01c14ff6c415c485616b8b0bf961c46b3b343ca59110d38d744e577f9cce7"},
@@ -7272,7 +7293,7 @@ version = "1.0"
 description = "SAM 2: Segment Anything in Images and Videos"
 optional = false
 python-versions = ">=3.10.0"
-groups = ["openset"]
+groups = ["perception"]
 files = []
 develop = false
 
@@ -7419,7 +7440,7 @@ version = "1.15.3"
 description = "Fundamental algorithms for scientific computing in Python"
 optional = false
 python-versions = ">=3.10"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c"},
     {file = "scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253"},
@@ -7639,7 +7660,7 @@ version = "1.17.0"
 description = "Python 2 and 3 compatibility utilities"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"},
     {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"},
@@ -7945,7 +7966,7 @@ version = "0.26.0"
 description = "A set of easy-to-use utils that will come in handy in any Computer Vision project"
 optional = false
 python-versions = ">=3.9"
-groups = ["openset"]
+groups = ["perception"]
 files = [
     {file = "supervision-0.26.0-py3-none-any.whl", hash = "sha256:dfece5805f8511817f140de2a94a5bcf55b3bb329a2ed6e2b00fb11218301ea6"},
     {file = "supervision-0.26.0.tar.gz", hash = "sha256:03801068ab55f75be10142772c3b1f68826b3d7af98ae341dfe6a474af299fcc"},
@@ -7971,7 +7992,7 @@ version = "1.14.0"
 description = "Computer algebra system (CAS) in Python"
 optional = false
 python-versions = ">=3.9"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5"},
     {file = "sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517"},
@@ -8004,7 +8025,7 @@ version = "2021.13.1"
 description = "Intel® oneAPI Threading Building Blocks (oneTBB)"
 optional = false
 python-versions = "*"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 markers = "platform_system == \"Windows\""
 files = [
     {file = "tbb-2021.13.1-py2.py3-none-manylinux1_i686.whl", hash = "sha256:bb5bdea0c0e9e6ad0739e7a8796c2635ce9eccca86dd48c426cd8027ac70fb1d"},
@@ -8186,7 +8207,7 @@ version = "1.0.17"
 description = "PyTorch Image Models"
 optional = false
 python-versions = ">=3.8"
-groups = ["openset"]
+groups = ["perception"]
 files = [
     {file = "timm-1.0.17-py3-none-any.whl", hash = "sha256:a84c73a9ec560a6e347dba2716ea1acccf71987aefade02b882cc89dc0a0ee21"},
     {file = "timm-1.0.17.tar.gz", hash = "sha256:90ccf07894d6ae38259b7aa7c94d683002f5e360fd43471c84f9da6c3afb8a0d"},
@@ -8205,7 +8226,7 @@ version = "0.21.2"
 description = ""
 optional = false
 python-versions = ">=3.9"
-groups = ["openset", "s2s"]
+groups = ["perception", "s2s"]
 files = [
     {file = "tokenizers-0.21.2-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:342b5dfb75009f2255ab8dec0041287260fed5ce00c323eb6bab639066fef8ec"},
     {file = "tokenizers-0.21.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:126df3205d6f3a93fea80c7a8a266a78c1bd8dd2fe043386bafdd7736a23e45f"},
@@ -8250,7 +8271,7 @@ version = "2.2.1"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
-groups = ["main", "openset", "simbench"]
+groups = ["main", "perception", "simbench"]
 files = [
     {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
     {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@@ -8285,7 +8306,7 @@ files = [
     {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"},
     {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"},
 ]
-markers = {openset = "python_version == \"3.10\""}
+markers = {perception = "python_version == \"3.10\""}
 
 [[package]]
 name = "tomli-w"
@@ -8305,7 +8326,7 @@ version = "2.3.1"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 optional = false
 python-versions = ">=3.8.0"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "torch-2.3.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:605a25b23944be5ab7c3467e843580e1d888b8066e5aaf17ff7bf9cc30001cc3"},
     {file = "torch-2.3.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:f2357eb0965583a0954d6f9ad005bba0091f956aef879822274b1bcdb11bd308"},
@@ -8393,7 +8414,7 @@ version = "0.18.1"
 description = "image and video datasets and models for torch deep learning"
 optional = false
 python-versions = ">=3.8"
-groups = ["nomad", "openset"]
+groups = ["nomad", "perception"]
 files = [
     {file = "torchvision-0.18.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3e694e54b0548dad99c12af6bf0c8e4f3350137d391dcd19af22a1c5f89322b3"},
     {file = "torchvision-0.18.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:0b3bda0aa5b416eeb547143b8eeaf17720bdba9cf516dc991aacb81811aa96a5"},
@@ -8453,7 +8474,7 @@ version = "4.67.1"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
-groups = ["main", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"},
     {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"},
@@ -8491,7 +8512,7 @@ version = "4.53.3"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.9.0"
-groups = ["openset"]
+groups = ["perception"]
 files = [
     {file = "transformers-4.53.3-py3-none-any.whl", hash = "sha256:5aba81c92095806b6baf12df35d756cf23b66c356975fb2a7fa9e536138d7c75"},
     {file = "transformers-4.53.3.tar.gz", hash = "sha256:b2eda1a261de79b78b97f7888fe2005fc0c3fabf5dad33d52cc02983f9f675d8"},
@@ -8579,7 +8600,7 @@ version = "2.3.1"
 description = "A language and compiler for custom Deep Learning operations"
 optional = false
 python-versions = "*"
-groups = ["nomad", "openset", "s2s"]
+groups = ["nomad", "perception", "s2s"]
 files = [
     {file = "triton-2.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c84595cbe5e546b1b290d2a58b1494df5a2ef066dd890655e5b8a8a92205c33"},
     {file = "triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d64ae33bcb3a7a18081e3a746e8cf87ca8623ca13d2c362413ce7a486f893e"},
@@ -8588,7 +8609,7 @@ files = [
     {file = "triton-2.3.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63381e35ded3304704ea867ffde3b7cfc42c16a55b3062d41e017ef510433d66"},
     {file = "triton-2.3.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d968264523c7a07911c8fb51b4e0d1b920204dae71491b1fe7b01b62a31e124"},
 ]
-markers = {nomad = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\"", openset = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
+markers = {nomad = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\"", perception = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.12\""}
 
 [package.dependencies]
 filelock = "*"
@@ -8619,7 +8640,7 @@ version = "4.14.1"
 description = "Backported and Experimental Type Hints for Python 3.9+"
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "typing_extensions-4.14.1-py3-none-any.whl", hash = "sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76"},
     {file = "typing_extensions-4.14.1.tar.gz", hash = "sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36"},
@@ -8687,7 +8708,7 @@ version = "2.5.0"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = ">=3.9"
-groups = ["main", "docs", "nomad", "openset", "s2s", "simbench"]
+groups = ["main", "docs", "nomad", "perception", "s2s", "simbench"]
 files = [
     {file = "urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc"},
     {file = "urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760"},
@@ -9205,7 +9226,7 @@ version = "0.43.0"
 description = "A formatter for Python code"
 optional = false
 python-versions = ">=3.7"
-groups = ["openset"]
+groups = ["perception"]
 files = [
     {file = "yapf-0.43.0-py3-none-any.whl", hash = "sha256:224faffbc39c428cb095818cf6ef5511fdab6f7430a10783fdfb292ccf2852ca"},
     {file = "yapf-0.43.0.tar.gz", hash = "sha256:00d3aa24bfedff9420b2e0d5d9f5ab6d9d4268e72afbf59bb3fa542781d5218e"},
@@ -9492,4 +9513,4 @@ cffi = ["cffi (>=1.11)"]
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10, <3.13"
-content-hash = "de33c70f531a05547558afffd5c852cf4d3e645a60b16de6dd5a5fc77d81c8ef"
+content-hash = "13b04415fd768a2d73f2bc1cd01c5ca80371112693cfd8086c4466281fcc24f2"
diff --git a/pyproject.toml b/pyproject.toml
index 70bfc33f5..a78a2389f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,14 +47,11 @@ optional = true
 rai_sim = {path = "src/rai_sim", develop = true}
 rai_bench = {path = "src/rai_bench", develop = true}
 
-[tool.poetry.group.openset]
+[tool.poetry.group.perception]
 optional = true
 
-[tool.poetry.group.openset.dependencies]
-torch = "^2.3.1"
-torchvision = "^0.18.1"
-rf-groundingdino = "^0.2.0"
-sam2 = { git = "https://github.com/RobotecAI/Grounded-SAM-2", branch = "main" }
+[tool.poetry.group.perception.dependencies]
+rai_perception = {path = "src/rai_extensions/rai_perception", develop = true}
 
 [tool.poetry.group.nomad]
 optional = true
diff --git a/src/rai_bench/rai_bench/manipulation_o3de/benchmark.py b/src/rai_bench/rai_bench/manipulation_o3de/benchmark.py
index 8bf9421d5..56e7a833f 100644
--- a/src/rai_bench/rai_bench/manipulation_o3de/benchmark.py
+++ b/src/rai_bench/rai_bench/manipulation_o3de/benchmark.py
@@ -43,7 +43,7 @@
     GetROS2TopicsNamesAndTypesTool,
     MoveToPointTool,
 )
-from rai_open_set_vision.tools import GetGrabbingPointTool
+from rai_perception.tools import GetGrabbingPointTool
 
 from rai_bench.base_benchmark import BaseBenchmark, RunSummary, TimeoutException
 from rai_bench.manipulation_o3de.interfaces import Task
diff --git a/src/rai_bench/rai_bench/tool_calling_agent/mocked_tools.py b/src/rai_bench/rai_bench/tool_calling_agent/mocked_tools.py
index b33d3d66e..b961a34b3 100644
--- a/src/rai_bench/rai_bench/tool_calling_agent/mocked_tools.py
+++ b/src/rai_bench/rai_bench/tool_calling_agent/mocked_tools.py
@@ -45,7 +45,7 @@
     StartROS2ActionTool,
 )
 from rai.types import Point
-from rai_open_set_vision.tools import (
+from rai_perception.tools import (
     DistanceMeasurement,
     GetDistanceToObjectsTool,
     GetGrabbingPointTool,
diff --git a/src/rai_bringup/launch/openset.launch.py b/src/rai_bringup/launch/openset.launch.py
index 5b583b091..47ea9b9f4 100644
--- a/src/rai_bringup/launch/openset.launch.py
+++ b/src/rai_bringup/launch/openset.launch.py
@@ -21,8 +21,8 @@ def generate_launch_description():
     return LaunchDescription(
         [
             ExecuteProcess(
-                cmd=["python", "run_vision_agents.py"],
-                cwd="src/rai_extensions/rai_open_set_vision/scripts",
+                cmd=["python", "run_perception_agents.py"],
+                cwd="src/rai_extensions/rai_perception/scripts",
                 output="screen",
             ),
         ]
diff --git a/src/rai_core/pyproject.toml b/src/rai_core/pyproject.toml
index a456875b0..e542ca2ba 100644
--- a/src/rai_core/pyproject.toml
+++ b/src/rai_core/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "rai_core"
-version = "2.5.5"
+version = "2.5.6"
 description = "Core functionality for RAI framework"
 authors = ["Maciej Majek <maciej.majek@robotec.ai>", "Bartłomiej Boczek <bartlomiej.boczek@robotec.ai>", "Kajetan Rachwał <kajetan.rachwal@robotec.ai>"]
 readme = "README.md"
diff --git a/src/rai_core/rai/frontend/configurator.py b/src/rai_core/rai/frontend/configurator.py
index 5624440c9..7d24fe42b 100644
--- a/src/rai_core/rai/frontend/configurator.py
+++ b/src/rai_core/rai/frontend/configurator.py
@@ -707,15 +707,15 @@ def additional_features():
     """
     )
 
-    # OpenSET Instructions
-    st.subheader("OpenSET (Visual Understanding)")
+    # Perception Instructions
+    st.subheader("Perception (Visual Understanding)")
     st.markdown(
         """
-    OpenSET provides visual understanding through Grounding DINO and Grounded SAM models.
+    Perception provides visual understanding through Grounding DINO and Grounded SAM models.
 
-    To install OpenSET dependencies, run:
+    To install Perception dependencies, run:
     ```bash
-    poetry install --with openset
+    poetry install --with perception
     ```
 
     This will install:
diff --git a/src/rai_core/rai/tools/ros2/manipulation/custom.py b/src/rai_core/rai/tools/ros2/manipulation/custom.py
index 6e0d9655c..18881d437 100644
--- a/src/rai_core/rai/tools/ros2/manipulation/custom.py
+++ b/src/rai_core/rai/tools/ros2/manipulation/custom.py
@@ -31,10 +31,10 @@
     )
 
 try:
-    from rai_open_set_vision.tools import GetGrabbingPointTool
+    from rai_perception.tools import GetGrabbingPointTool
 except ImportError:
     logging.warning(
-        "rai_open_set_vision is not installed, GetGrabbingPointTool will not work"
+        "rai-perception is not installed, GetGrabbingPointTool will not work"
     )
 
 
diff --git a/src/rai_extensions/rai_open_set_vision/README.md b/src/rai_extensions/rai_open_set_vision/README.md
deleted file mode 100644
index 3794edd08..000000000
--- a/src/rai_extensions/rai_open_set_vision/README.md
+++ /dev/null
@@ -1,150 +0,0 @@
-<!--- --8<-- [start:sec1] -->
-
-# RAI Open Set Vision
-
-This package provides a ROS2 Node which is an interface to the [Idea-Research GroundingDINO Model](https://github.com/IDEA-Research/GroundingDINO).
-It allows for open-set detection.
-
-## Installation
-
-In your workspace you need to have an `src` folder containing this package `rai_open_set_vision` and the `rai_interfaces` package.
-
-### Preparing the GroundingDINO
-
-Add required ROS dependencies:
-
-```
-rosdep install --from-paths src --ignore-src -r
-```
-
-## Build and run
-
-In the base directory of the `RAI` package install dependencies:
-
-```
-poetry install --with openset
-```
-
-Source the ros installation
-
-```
-source /opt/ros/${ROS_DISTRO}/setup.bash
-```
-
-Run the build process:
-
-```
-colcon build --symlink-install
-```
-
-Source the environment
-
-```
-source setup_shell.sh
-```
-
-Run the `GroundedSamAgent` and `GroundingDinoAgent` agents.
-
-```
-python run_vision_agents.py
-```
-
-<!--- --8<-- [end:sec1] -->
-
-Agents create two ROS 2 Nodes: `grounding_dino` and `grounded_sam` using [ROS2Connector](../../../docs/API_documentation/connectors/ROS_2_Connectors.md).
-These agents can be triggered by ROS2 services:
-
--   `grounding_dino_classify`: `rai_interfaces/srv/RAIGroundingDino`
--   `grounded_sam_segment`: `rai_interfaces/srv/RAIGroundedSam`
-
-> [!TIP]
->
-> If you wish to integrate open-set vision into your ros2 launch file, a premade launch
-> file can be found in `rai/src/rai_bringup/launch/openset.launch.py`
-
-> [!NOTE]
-> The weights will be downloaded to `~/.cache/rai` directory.
-
-## RAI Tools
-
-`rai_open_set_vision` package contains tools that can be used by [RAI LLM agents](../../../docs/tutorials/walkthrough.md)
-enhance their perception capabilities. For more information on RAI Tools see
-[Tool use and development](../../../docs/tutorials/tools.md) tutorial.
-
-<!--- --8<-- [start:sec3] -->
-
-### `GetDetectionTool`
-
-This tool calls the grounding dino service to use the model to see if the message from the provided camera topic contains objects from a comma separated prompt.
-
-<!--- --8<-- [end:sec3] -->
-
-> [!TIP]
->
-> you can try example below with [rosbotxl demo](../../../docs/demos/rosbot_xl.md) binary.
-> The binary exposes `/camera/camera/color/image_raw` and `/camera/camera/depth/image_raw` topics.
-
-<!--- --8<-- [start:sec4] -->
-
-**Example call**
-
-```python
-from rai_open_set_vision.tools import GetDetectionTool
-from rai.communication.ros2 import ROS2Connector, ROS2Context
-
-with ROS2Context():
-    connector=ROS2Connector(node_name="test_node")
-    x = GetDetectionTool(connector=connector)._run(
-        camera_topic="/camera/camera/color/image_raw",
-        object_names=["chair", "human", "plushie", "box", "ball"],
-    )
-```
-
-**Example output**
-
-```
-I have detected the following items in the picture - chair, human
-```
-
-### `GetDistanceToObjectsTool`
-
-This tool calls the grounding dino service to use the model to see if the message from the provided camera topic contains objects from a comma separated prompt. Then it utilises messages from depth camera to create an estimation of distance to a detected object.
-
-**Example call**
-
-```python
-from rai_open_set_vision.tools import GetDetectionTool
-from rai.communication.ros2 import ROS2Connector, ROS2Context
-
-with ROS2Context():
-    connector=ROS2Connector(node_name="test_node")
-    connector.node.declare_parameter("conversion_ratio", 1.0) # scale parameter for the depth map
-    x = GetDistanceToObjectsTool(connector=connector)._run(
-        camera_topic="/camera/camera/color/image_raw",
-        depth_topic="/camera/camera/depth/image_rect_raw",
-        object_names=["chair", "human", "plushie", "box", "ball"],
-    )
-
-```
-
-**Example output**
-
-```
-I have detected the following items in the picture human: 3.77m away
-```
-
-## Simple ROS2 Client Node Example
-
-An example client is provided with the package as `rai_open_set_vision/talker.py`
-
-You can see it working by running:
-
-```
-python run_vision_agents.py
-cd rai # rai repo BASE directory
-ros2 run rai_open_set_vision talker --ros-args -p image_path:=src/rai_extensions/rai_open_set_vision/images/sample.jpg
-```
-
-If everything was set up properly you should see a couple of detections with classes `dinosaur`, `dragon`, and `lizard`.
-
-<!--- --8<-- [end:sec4] -->
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/examples/talker.py b/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/examples/talker.py
deleted file mode 100644
index 2498c2574..000000000
--- a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/examples/talker.py
+++ /dev/null
@@ -1,119 +0,0 @@
-# Copyright (C) 2024 Robotec.AI
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#         http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import cv2
-import numpy as np
-import rclpy
-from cv_bridge import CvBridge
-from rclpy.node import Node
-
-from rai_interfaces.srv import RAIGroundedSam, RAIGroundingDino
-
-
-class GDClientExample(Node):
-    def __init__(self):
-        super().__init__(node_name="GDClientExample", parameter_overrides=[])
-        self.declare_parameter("image_path", "")
-        self.cli = self.create_client(RAIGroundingDino, "grounding_dino_classify")
-        while not self.cli.wait_for_service(timeout_sec=1.0):
-            self.get_logger().info(
-                "service grounding_dino_classify not available, waiting again..."
-            )
-        self.req = RAIGroundingDino.Request()
-        self.bridge = CvBridge()
-
-    def get_image_path(self) -> str:
-        image_path = self.get_parameter("image_path").value
-        assert isinstance(image_path, str)
-        return image_path
-
-    def send_request(self):
-        image_path = self.get_parameter("image_path").value
-        assert isinstance(image_path, str)
-        img = cv2.imread(image_path)
-        # convert img to numpy array
-        img = np.array(img)
-        self.req.source_img = self.bridge.cv2_to_imgmsg(img, encoding="bgr8")
-        self.req.classes = "dragon , lizard , dinosaur"
-        self.req.box_threshold = 0.4
-        self.req.text_threshold = 0.4
-
-        self.future = self.cli.call_async(self.req)
-
-
-class GSClientExample(Node):
-    def __init__(self):
-        super().__init__(node_name="GSClientExample", parameter_overrides=[])
-        self.cli = self.create_client(RAIGroundedSam, "grounded_sam_segment")
-        while not self.cli.wait_for_service(timeout_sec=1.0):
-            self.get_logger().info(
-                "service grounded_sam_segment not available, waiting again..."
-            )
-        self.req = RAIGroundedSam.Request()
-        self.bridge = CvBridge()
-
-    def send_request(self, image_path: str, data: RAIGroundingDino.Response):
-        self.req.detections = data.detections
-        img = cv2.imread(image_path)
-        # convert img to numpy array
-        img = np.array(img)
-        self.req.source_img = self.bridge.cv2_to_imgmsg(img, encoding="bgr8")
-        self.future = self.cli.call_async(self.req)
-
-
-def main(args=None):
-    rclpy.init(args=args)
-
-    gdino_client = GDClientExample()
-    gdino_client.send_request()
-
-    gsam_client = GSClientExample()
-
-    response = None
-    while rclpy.ok():
-        rclpy.spin_once(gdino_client)
-        if gdino_client.future.done():
-            try:
-                response: RAIGroundingDino.Response = gdino_client.future.result()  # type: ignore
-            except Exception as e:
-                gdino_client.get_logger().info("Service call failed %r" % (e,))
-            else:
-                assert response is not None
-                gdino_client.get_logger().info(f"{response.detections}")  # CHANGE
-            break
-    assert response is not None
-    gsam_client.send_request(gdino_client.get_image_path(), response)
-    gsam_client.get_logger().info("making segmentation request")
-    while rclpy.ok():
-        rclpy.spin_once(gsam_client)
-        if gsam_client.future.done():
-            try:
-                gsam_client.get_logger().info("request finished")
-                response: RAIGroundedSam.Response = gsam_client.future.result()  # type: ignore
-                gsam_client.get_logger().info(f"response: {response}")
-            except Exception as e:
-                gsam_client.get_logger().info("Service call failed %r" % (e,))
-            else:
-                assert response is not None
-                gsam_client.get_logger().info(f"{response.masks}")  # CHANGE
-            break
-
-    gdino_client.destroy_node()
-    gsam_client.destroy_node()
-    rclpy.shutdown()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/rai_extensions/rai_open_set_vision/setup.cfg b/src/rai_extensions/rai_open_set_vision/setup.cfg
deleted file mode 100644
index 8f4725851..000000000
--- a/src/rai_extensions/rai_open_set_vision/setup.cfg
+++ /dev/null
@@ -1,6 +0,0 @@
-[build_scripts]
-executable = /usr/bin/env python3
-[develop]
-script_dir=$base/lib/rai_open_set_vision
-[install]
-install_scripts=$base/lib/rai_open_set_vision
diff --git a/src/rai_extensions/rai_open_set_vision/.gitignore b/src/rai_extensions/rai_perception/.gitignore
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/.gitignore
rename to src/rai_extensions/rai_perception/.gitignore
diff --git a/src/rai_extensions/rai_perception/README.md b/src/rai_extensions/rai_perception/README.md
new file mode 100644
index 000000000..ab3028e23
--- /dev/null
+++ b/src/rai_extensions/rai_perception/README.md
@@ -0,0 +1,227 @@
+<!--- --8<-- [start:sec1] -->
+
+# RAI Perception
+
+This package provides ROS2 integration with [Idea-Research GroundingDINO Model](https://github.com/IDEA-Research/GroundingDINO) and [Grounded-SAM-2, RobotecAI fork](https://github.com/RobotecAI/Grounded-SAM-2) for object detection, segmentation, and gripping point calculation. The `GroundedSamAgent` and `GroundingDinoAgent` are ROS2 service nodes that can be readily added to ROS2 applications. It also provides tools that can be used with [RAI LLM agents](../tutorials/walkthrough.md) to construct conversational scenarios.
+
+In addition to these building blocks, this package includes utilities to facilitate development, such as a ROS2 client that demonstrates interactions with agent nodes.
+
+## Installation
+
+While installing `rai_perception` via Pip is being actively worked on, to incorporate it into your application, you will need to set up a ROS2 workspace.
+
+### ROS2 Workspace Setup
+
+Create a ROS2 workspace and copy this package:
+
+```bash
+mkdir -p ~/rai_perception_ws/src
+cd ~/rai_perception_ws/src
+
+# only checkout rai_perception package
+git clone --depth 1 --branch main https://github.com/RobotecAI/rai.git temp
+cd temp
+git archive --format=tar --prefix=rai_perception/ HEAD:src/rai_extensions/rai_perception | tar -xf -
+mv rai_perception ../rai_perception
+cd ..
+rm -rf temp
+```
+
+### ROS2 Dependencies
+
+Add required ROS dependencies. From the workspace root, run
+
+```bash
+rosdep install --from-paths src --ignore-src -r
+```
+
+### Build and Run
+
+Source ROS2 and build:
+
+```bash
+# Source ROS2 (humble or jazzy)
+source /opt/ros/${ROS_DISTRO}/setup.bash
+
+# Build workspace
+cd ~/rai_perception_ws
+colcon build --symlink-install
+
+# Source ROS2 packages
+source install/setup.bash
+```
+
+### Python Dependencies
+
+`rai_perception` depends on `rai-core` and `sam2`. There are many ways to set up a virtual environment and install these dependencies. Below, we provide an example using Poetry.
+
+**Step 1:** Copy the following template to `pyproject.toml` in your workspace root, updating it according to your directory setup:
+
+```toml
+# rai_perception_project pyproject template
+[tool.poetry]
+name = "rai_perception_ws"
+version = "0.1.0"
+description = "ROS2 workspace for RAI perception"
+package-mode = false
+
+[tool.poetry.dependencies]
+python = "^3.10, <3.13"
+rai-core = ">=2.5.4"
+rai-perception = {path = "src/rai_perception", develop = true}
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
+```
+
+**Step 2:** Install dependencies:
+
+First, we create Virtual Environment with Poetry:
+
+```bash
+cd ~/rai_perception_ws
+poetry lock
+poetry install
+```
+
+Now, we are ready to launch perception agents:
+
+```bash
+# Activate virtual environment
+source "$(poetry env info --path)"/bin/activate
+export PYTHONPATH
+PYTHONPATH="$(dirname "$(dirname "$(poetry run which python)")")/lib/python$(poetry run python --version | awk '{print $2}' | cut -d. -f1,2)/site-packages:$PYTHONPATH"
+
+# run agents
+python src/rai_perception/scripts/run_perception_agents.py
+```
+
+> [!TIP]
+> To manage ROS 2 + Poetry environment with less friction: Keep build tools (colcon) at system level, use Poetry only for runtime dependencies of your packages.
+
+<!--- --8<-- [end:sec1] -->
+
+`rai-perception` agents create two ROS 2 nodes: `grounding_dino` and `grounded_sam` using [ROS2Connector](../../../docs/API_documentation/connectors/ROS_2_Connectors.md).
+These agents can be triggered by ROS2 services:
+
+-   `grounding_dino_classify`: `rai_interfaces/srv/RAIGroundingDino`
+-   `grounded_sam_segment`: `rai_interfaces/srv/RAIGroundedSam`
+
+> [!TIP]
+>
+> If you wish to integrate open-set vision into your ros2 launch file, a premade launch
+> file can be found in `rai/src/rai_bringup/launch/openset.launch.py`
+
+> [!NOTE]
+> The weights will be downloaded to `~/.cache/rai` directory.
+
+## RAI Tools
+
+`rai_perception` package contains tools that can be used by [RAI LLM agents](../../../docs/tutorials/walkthrough.md)
+to enhance their perception capabilities. For more information on RAI Tools see
+[Tool use and development](../../../docs/tutorials/tools.md) tutorial.
+
+<!--- --8<-- [start:sec2] -->
+
+### `GetDetectionTool`
+
+This tool calls the GroundingDINO service to detect objects from a comma-separated prompt in the provided camera topic.
+
+<!--- --8<-- [end:sec2] -->
+
+> [!TIP]
+>
+> you can try example below with [rosbotxl demo](../../../docs/demos/rosbot_xl.md) binary.
+> The binary exposes `/camera/camera/color/image_raw` and `/camera/camera/depth/image_rect_raw` topics.
+
+<!--- --8<-- [start:sec3] -->
+
+**Example call**
+
+```python
+import time
+from rai_perception.tools import GetDetectionTool
+from rai.communication.ros2 import ROS2Connector, ROS2Context
+
+with ROS2Context():
+    connector=ROS2Connector(node_name="test_node")
+
+    # Wait for topic discovery to complete
+    print("Waiting for topic discovery...")
+    time.sleep(3)
+
+    x = GetDetectionTool(connector=connector)._run(
+        camera_topic="/camera/camera/color/image_raw",
+        object_names=["bed", "bed pillow", "table lamp", "plant", "desk"],
+    )
+    print(x)
+```
+
+**Example output**
+
+```
+I have detected the following items in the picture plant, table lamp, table lamp, bed, desk
+```
+
+### `GetDistanceToObjectsTool`
+
+This tool calls the GroundingDINO service to detect objects from a comma-separated prompt in the provided camera topic. Then it utilizes messages from the depth camera to estimate the distance to detected objects.
+
+**Example call**
+
+```python
+from rai_perception.tools import GetDistanceToObjectsTool
+from rai.communication.ros2 import ROS2Connector, ROS2Context
+import time
+
+with ROS2Context():
+    connector=ROS2Connector(node_name="test_node")
+    connector.node.declare_parameter("conversion_ratio", 1.0)  # scale parameter for the depth map
+
+    # Wait for topic discovery to complete
+    print("Waiting for topic discovery...")
+    time.sleep(3)
+
+    x = GetDistanceToObjectsTool(connector=connector)._run(
+        camera_topic="/camera/camera/color/image_raw",
+        depth_topic="/camera/camera/depth/image_rect_raw",
+        object_names=["desk"],
+    )
+
+    print(x)
+```
+
+**Example output**
+
+```
+I have detected the following items in the picture desk: 2.43m away
+```
+
+## Simple ROS2 Client Node Example
+
+The `rai_perception/talker.py` example demonstrates how to use the perception services for object detection and segmentation. It shows the complete pipeline: GroundingDINO for object detection followed by GroundedSAM for instance segmentation, with visualization output.
+
+This example is useful for:
+
+-   Testing perception services integration
+-   Understanding the ROS2 service call patterns
+-   Seeing detection and segmentation results with bounding boxes and masks
+
+Run the example:
+
+```bash
+cd ~/rai_perception_ws
+python src/rai_perception/scripts/run_perception_agents.py
+```
+
+In a different window, run
+
+```bash
+cd ~/rai_perception_ws
+ros2 run rai_perception talker --ros-args -p image_path:=src/rai_perception/images/sample.jpg
+```
+
+The example will detect objects (dragon, lizard, dinosaur) and save a visualization with bounding boxes and masks to `masks.png`.
+
+<!--- --8<-- [end:sec3] -->
diff --git a/src/rai_extensions/rai_open_set_vision/images/sample.jpg b/src/rai_extensions/rai_perception/images/sample.jpg
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/images/sample.jpg
rename to src/rai_extensions/rai_perception/images/sample.jpg
diff --git a/src/rai_extensions/rai_open_set_vision/launch/example_communication_launch.xml b/src/rai_extensions/rai_perception/launch/example_communication_launch.xml
similarity index 78%
rename from src/rai_extensions/rai_open_set_vision/launch/example_communication_launch.xml
rename to src/rai_extensions/rai_perception/launch/example_communication_launch.xml
index 94ba45ef9..dc325880e 100644
--- a/src/rai_extensions/rai_open_set_vision/launch/example_communication_launch.xml
+++ b/src/rai_extensions/rai_perception/launch/example_communication_launch.xml
@@ -19,15 +19,15 @@
     <arg name="sam_weights_path" default = ""/>
     <arg name="image_path" />
 
-    <node pkg="rai_open_set_vision" exec="grounding_dino" name="grounding_dino" output="screen">
+    <node pkg="rai_perception" exec="grounding_dino" name="grounding_dino" output="screen">
         <param name="weights_path" value="$(var dino_weights_path)" />
     </node>
 
-    <node pkg="rai_open_set_vision" exec="grounded_sam" name="grounded_sam" output="screen">
+    <node pkg="rai_perception" exec="grounded_sam" name="grounded_sam" output="screen">
         <param name="weights_path" value="$(var sam_weights_path)" />
     </node>
 
-    <node pkg="rai_open_set_vision" exec="talker" name="example_client" output="screen">
+    <node pkg="rai_perception" exec="talker" name="example_client" output="screen">
       <param name="image_path" value="$(var image_path)" />
     </node>
 </launch>
diff --git a/src/rai_extensions/rai_open_set_vision/launch/gdino_launch.xml b/src/rai_extensions/rai_perception/launch/gdino_launch.xml
similarity index 88%
rename from src/rai_extensions/rai_open_set_vision/launch/gdino_launch.xml
rename to src/rai_extensions/rai_perception/launch/gdino_launch.xml
index e04733bc9..f3d6a8f90 100644
--- a/src/rai_extensions/rai_open_set_vision/launch/gdino_launch.xml
+++ b/src/rai_extensions/rai_perception/launch/gdino_launch.xml
@@ -17,7 +17,7 @@
 <launch>
     <arg name="weights_path" default = ""/>
 
-    <node pkg="rai_open_set_vision" exec="grounding_dino" name="grounding_dino" output="screen">
+    <node pkg="rai_perception" exec="grounding_dino" name="grounding_dino" output="screen">
         <param name="weights_path" value="$(var weights_path)" />
     </node>
 </launch>
diff --git a/src/rai_extensions/rai_open_set_vision/launch/gsam_launch.xml b/src/rai_extensions/rai_perception/launch/gsam_launch.xml
similarity index 89%
rename from src/rai_extensions/rai_open_set_vision/launch/gsam_launch.xml
rename to src/rai_extensions/rai_perception/launch/gsam_launch.xml
index 20a8da475..a89fc8570 100644
--- a/src/rai_extensions/rai_open_set_vision/launch/gsam_launch.xml
+++ b/src/rai_extensions/rai_perception/launch/gsam_launch.xml
@@ -17,7 +17,7 @@
 <launch>
     <arg name="weights_path" default = ""/>
 
-    <node pkg="rai_open_set_vision" exec="grounded_sam" name="grounded_sam" output="screen">
+    <node pkg="rai_perception" exec="grounded_sam" name="grounded_sam" output="screen">
         <param name="weights_path" value="$(var weights_path)" />
     </node>
 </launch>
diff --git a/src/rai_extensions/rai_open_set_vision/package.xml b/src/rai_extensions/rai_perception/package.xml
similarity index 95%
rename from src/rai_extensions/rai_open_set_vision/package.xml
rename to src/rai_extensions/rai_perception/package.xml
index 9b22f49ee..a5b03096f 100644
--- a/src/rai_extensions/rai_open_set_vision/package.xml
+++ b/src/rai_extensions/rai_perception/package.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
-  <name>rai_open_set_vision</name>
+  <name>rai_perception</name>
   <version>0.1.0</version>
   <description>Package enabling open set vision for RAI</description>
   <maintainer email="kajetan.rachwal@robotec.ai">Kajetan Rachwał</maintainer>
diff --git a/src/rai_extensions/rai_perception/pyproject.toml b/src/rai_extensions/rai_perception/pyproject.toml
new file mode 100644
index 000000000..84948d5fa
--- /dev/null
+++ b/src/rai_extensions/rai_perception/pyproject.toml
@@ -0,0 +1,18 @@
+[tool.poetry]
+name = "rai_perception"
+version = "0.1.0"
+description = "Package enabling perception capabilities for RAI"
+authors = ["Kajetan Rachwał <kajetan.rachwal@robotec.ai>"]
+license = "Apache License 2.0"
+readme = "README.md"
+
+[tool.poetry.dependencies]
+# TODO:(juliaj) update sam2 dependency after https://github.com/RobotecAI/Grounded-SAM-2/pull/3 is merged
+torch = "^2.3.1"
+torchvision = "^0.18.1"
+rf-groundingdino = "^0.2.0"
+sam2 = { git = "https://github.com/RobotecAI/Grounded-SAM-2", branch = "main" }
+
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/NOTICE b/src/rai_extensions/rai_perception/rai_perception/NOTICE
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/NOTICE
rename to src/rai_extensions/rai_perception/rai_perception/NOTICE
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/__init__.py b/src/rai_extensions/rai_perception/rai_perception/__init__.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/__init__.py
rename to src/rai_extensions/rai_perception/rai_perception/__init__.py
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/__init__.py b/src/rai_extensions/rai_perception/rai_perception/agents/__init__.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/__init__.py
rename to src/rai_extensions/rai_perception/rai_perception/agents/__init__.py
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/base_vision_agent.py b/src/rai_extensions/rai_perception/rai_perception/agents/base_vision_agent.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/base_vision_agent.py
rename to src/rai_extensions/rai_perception/rai_perception/agents/base_vision_agent.py
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/grounded_sam.py b/src/rai_extensions/rai_perception/rai_perception/agents/grounded_sam.py
similarity index 94%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/grounded_sam.py
rename to src/rai_extensions/rai_perception/rai_perception/agents/grounded_sam.py
index 62bc96deb..c016965ac 100644
--- a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/grounded_sam.py
+++ b/src/rai_extensions/rai_perception/rai_perception/agents/grounded_sam.py
@@ -19,8 +19,8 @@
 from cv_bridge import CvBridge
 
 from rai_interfaces.srv import RAIGroundedSam
-from rai_open_set_vision.agents.base_vision_agent import BaseVisionAgent
-from rai_open_set_vision.vision_markup.segmenter import GDSegmenter
+from rai_perception.agents.base_vision_agent import BaseVisionAgent
+from rai_perception.vision_markup.segmenter import GDSegmenter
 
 GSAM_NODE_NAME = "grounded_sam"
 GSAM_SERVICE_NAME = "grounded_sam_segment"
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/grounding_dino.py b/src/rai_extensions/rai_perception/rai_perception/agents/grounding_dino.py
similarity index 94%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/grounding_dino.py
rename to src/rai_extensions/rai_perception/rai_perception/agents/grounding_dino.py
index bb01ee4a9..e809a3dda 100644
--- a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/agents/grounding_dino.py
+++ b/src/rai_extensions/rai_perception/rai_perception/agents/grounding_dino.py
@@ -16,8 +16,8 @@
 from pathlib import Path
 
 from rai_interfaces.msg import RAIDetectionArray
-from rai_open_set_vision.agents.base_vision_agent import BaseVisionAgent
-from rai_open_set_vision.vision_markup.boxer import GDBoxer
+from rai_perception.agents.base_vision_agent import BaseVisionAgent
+from rai_perception.vision_markup.boxer import GDBoxer
 
 GDINO_NODE_NAME = "grounding_dino"
 GDINO_SERVICE_NAME = "grounding_dino_classify"
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/configs/__init__.py b/src/rai_extensions/rai_perception/rai_perception/configs/__init__.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/configs/__init__.py
rename to src/rai_extensions/rai_perception/rai_perception/configs/__init__.py
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/configs/gdino_config.py b/src/rai_extensions/rai_perception/rai_perception/configs/gdino_config.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/configs/gdino_config.py
rename to src/rai_extensions/rai_perception/rai_perception/configs/gdino_config.py
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/configs/seg_config.yml b/src/rai_extensions/rai_perception/rai_perception/configs/seg_config.yml
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/configs/seg_config.yml
rename to src/rai_extensions/rai_perception/rai_perception/configs/seg_config.yml
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/examples/__init__.py b/src/rai_extensions/rai_perception/rai_perception/examples/__init__.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/examples/__init__.py
rename to src/rai_extensions/rai_perception/rai_perception/examples/__init__.py
diff --git a/src/rai_extensions/rai_perception/rai_perception/examples/talker.py b/src/rai_extensions/rai_perception/rai_perception/examples/talker.py
new file mode 100644
index 000000000..d6d4742fd
--- /dev/null
+++ b/src/rai_extensions/rai_perception/rai_perception/examples/talker.py
@@ -0,0 +1,221 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from typing import List
+
+import cv2
+import numpy as np
+import rclpy
+from cv_bridge import CvBridge
+from rclpy.node import Node
+
+from rai_interfaces.srv import RAIGroundedSam, RAIGroundingDino
+
+
+class GDClientExample(Node):
+    def __init__(self, detection_classes: List[str]):
+        super().__init__(node_name="GDClientExample", parameter_overrides=[])
+        self.declare_parameter("image_path", "")
+        self.cli = self.create_client(RAIGroundingDino, "grounding_dino_classify")
+        while not self.cli.wait_for_service(timeout_sec=1.0):
+            self.get_logger().info(
+                "service grounding_dino_classify not available, waiting again..."
+            )
+        self.req = RAIGroundingDino.Request()
+        self.detection_classes = detection_classes
+        self.bridge = CvBridge()
+
+    def get_image_path(self) -> str:
+        image_path = self.get_parameter("image_path").value
+        assert isinstance(image_path, str)
+        return image_path
+
+    def send_request(self):
+        image_path = self.get_parameter("image_path").value
+        assert isinstance(image_path, str)
+        img = cv2.imread(image_path)
+        # convert img to numpy array
+        img = np.array(img)
+        self.req.source_img = self.bridge.cv2_to_imgmsg(img, encoding="bgr8")
+        self.req.classes = ", ".join(self.detection_classes)
+        self.req.box_threshold = 0.4
+        self.req.text_threshold = 0.4
+
+        self.future = self.cli.call_async(self.req)
+
+
+class GSClientExample(Node):
+    def __init__(self):
+        super().__init__(node_name="GSClientExample", parameter_overrides=[])
+        self.cli = self.create_client(RAIGroundedSam, "grounded_sam_segment")
+        while not self.cli.wait_for_service(timeout_sec=1.0):
+            self.get_logger().info(
+                "service grounded_sam_segment not available, waiting again..."
+            )
+        self.req = RAIGroundedSam.Request()
+        self.bridge = CvBridge()
+
+    def send_request(self, image_path: str, data: RAIGroundingDino.Response):
+        self.req.detections = data.detections
+        img = cv2.imread(image_path)
+        # convert img to numpy array
+        img = np.array(img)
+        self.req.source_img = self.bridge.cv2_to_imgmsg(img, encoding="bgr8")
+        self.future = self.cli.call_async(self.req)
+
+
+def draw_bounding_box(img, detection, color=(0, 255, 0)):
+    """Draw a single bounding box with label on the image."""
+    bbox = detection.bbox
+    class_name = detection.results[0].hypothesis.class_id
+    confidence = detection.results[0].hypothesis.score
+
+    # Calculate coordinates
+    x1 = int(bbox.center.position.x - bbox.size_x / 2)
+    y1 = int(bbox.center.position.y - bbox.size_y / 2)
+    x2 = int(bbox.center.position.x + bbox.size_x / 2)
+    y2 = int(bbox.center.position.y + bbox.size_y / 2)
+
+    # Draw rectangle
+    cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
+
+    # Add label
+    label = f"{class_name}: {confidence:.2f}"
+    label_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
+    cv2.rectangle(
+        img, (x1, y1 - label_size[1] - 10), (x1 + label_size[0], y1), color, -1
+    )
+    cv2.putText(
+        img, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2
+    )
+
+
+def overlay_mask(img, mask_msg, bridge, mask_index):
+    """Overlay a single mask on the image with a unique color."""
+    # Convert ROS2 Image message to numpy array
+    mask = bridge.imgmsg_to_cv2(mask_msg, desired_encoding="mono8")
+
+    # Use different colors for different masks
+    colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]  # BGR
+    color = colors[mask_index % len(colors)]
+
+    # Create colored mask
+    color_mask = np.zeros_like(img)
+    color_mask[mask > 0] = color
+
+    # Blend with original image
+    return cv2.addWeighted(img, 0.7, color_mask, 0.3, 0)
+
+
+def create_visualization(image, masks, detections, gsam_client):
+    """Create final visualization with masks and bounding boxes."""
+    img = image.copy()
+
+    # Draw all bounding boxes
+    for detection in detections.detections:
+        draw_bounding_box(img, detection)
+
+    # Overlay all masks
+    for i, mask_msg in enumerate(masks):
+        img = overlay_mask(img, mask_msg, gsam_client.bridge, i)
+
+    return img
+
+
+def wait_for_detection(gdino_client):
+    """Wait for GroundingDINO detection to complete and return results."""
+    while rclpy.ok():
+        rclpy.spin_once(gdino_client)
+        if gdino_client.future.done():
+            try:
+                gdino_response = gdino_client.future.result()
+                gdino_client.get_logger().info(
+                    f"Number of detections: {len(gdino_response.detections.detections)}"
+                )
+
+                # Log detection details
+                for i, detection in enumerate(gdino_response.detections.detections):
+                    class_name = detection.results[0].hypothesis.class_id
+                    confidence = detection.results[0].hypothesis.score
+                    bbox = detection.bbox
+                    gdino_client.get_logger().info(
+                        f"Detection {i}: {class_name} (conf: {confidence:.3f}) "
+                        f"at ({bbox.center.position.x:.1f}, {bbox.center.position.y:.1f}) "
+                        f"size {bbox.size_x:.1f}x{bbox.size_y:.1f}"
+                    )
+                return gdino_response
+            except Exception as e:
+                gdino_client.get_logger().error(f"Detection failed: {e}")
+                return None
+    return None
+
+
+def wait_for_segmentation(gsam_client):
+    """Wait for GroundedSAM segmentation to complete and return results."""
+    while rclpy.ok():
+        rclpy.spin_once(gsam_client)
+        if gsam_client.future.done():
+            try:
+                gsam_response = gsam_client.future.result()
+                gsam_client.get_logger().info(
+                    f"Number of masks: {len(gsam_response.masks)}"
+                )
+                return gsam_response
+            except Exception as e:
+                gsam_client.get_logger().error(f"Segmentation failed: {e}")
+                return None
+    return None
+
+
+def main(args=None):
+    rclpy.init(args=args)
+
+    # Initialize clients
+    gdino_client = GDClientExample(detection_classes=["dragon", "lizard", "dinosaur"])
+    gsam_client = GSClientExample()
+
+    try:
+        # Stage 1: Object Detection
+        gdino_client.send_request()
+        gdino_response = wait_for_detection(gdino_client)
+
+        if gdino_response is None:
+            gdino_client.get_logger().error("Detection failed, exiting")
+            return
+
+        # Stage 2: Object Segmentation
+        gsam_client.send_request(gdino_client.get_image_path(), gdino_response)
+        gsam_response = wait_for_segmentation(gsam_client)
+
+        if gsam_response is None:
+            gsam_client.get_logger().error("Segmentation failed, exiting")
+            return
+
+        # Stage 3: Create Visualization
+        img = cv2.imread(gdino_client.get_image_path())
+        result_img = create_visualization(
+            img, gsam_response.masks, gdino_response.detections, gsam_client
+        )
+        cv2.imwrite("masks.png", result_img)
+        print("Visualization saved to masks.png")
+
+    finally:
+        gdino_client.destroy_node()
+        gsam_client.destroy_node()
+        rclpy.shutdown()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/tools/__init__.py b/src/rai_extensions/rai_perception/rai_perception/tools/__init__.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/tools/__init__.py
rename to src/rai_extensions/rai_perception/rai_perception/tools/__init__.py
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/tools/gdino_tools.py b/src/rai_extensions/rai_perception/rai_perception/tools/gdino_tools.py
similarity index 99%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/tools/gdino_tools.py
rename to src/rai_extensions/rai_perception/rai_perception/tools/gdino_tools.py
index ab8c868aa..416fba619 100644
--- a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/tools/gdino_tools.py
+++ b/src/rai_extensions/rai_perception/rai_perception/tools/gdino_tools.py
@@ -28,7 +28,7 @@
 from rclpy.task import Future
 
 from rai_interfaces.srv import RAIGroundingDino
-from rai_open_set_vision import GDINO_SERVICE_NAME
+from rai_perception import GDINO_SERVICE_NAME
 
 
 # --------------------- Inputs ---------------------
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/tools/segmentation_tools.py b/src/rai_extensions/rai_perception/rai_perception/tools/segmentation_tools.py
similarity index 99%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/tools/segmentation_tools.py
rename to src/rai_extensions/rai_perception/rai_perception/tools/segmentation_tools.py
index 16c6fc2df..310b707f9 100644
--- a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/tools/segmentation_tools.py
+++ b/src/rai_extensions/rai_perception/rai_perception/tools/segmentation_tools.py
@@ -33,7 +33,7 @@
 )
 
 from rai_interfaces.srv import RAIGroundedSam, RAIGroundingDino
-from rai_open_set_vision import GDINO_SERVICE_NAME
+from rai_perception import GDINO_SERVICE_NAME
 
 # --------------------- Inputs ---------------------
 
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/vision_markup/__init__.py b/src/rai_extensions/rai_perception/rai_perception/vision_markup/__init__.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/vision_markup/__init__.py
rename to src/rai_extensions/rai_perception/rai_perception/vision_markup/__init__.py
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/vision_markup/boxer.py b/src/rai_extensions/rai_perception/rai_perception/vision_markup/boxer.py
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/vision_markup/boxer.py
rename to src/rai_extensions/rai_perception/rai_perception/vision_markup/boxer.py
diff --git a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/vision_markup/segmenter.py b/src/rai_extensions/rai_perception/rai_perception/vision_markup/segmenter.py
similarity index 97%
rename from src/rai_extensions/rai_open_set_vision/rai_open_set_vision/vision_markup/segmenter.py
rename to src/rai_extensions/rai_perception/rai_perception/vision_markup/segmenter.py
index 310baa46c..1d51acb5c 100644
--- a/src/rai_extensions/rai_open_set_vision/rai_open_set_vision/vision_markup/segmenter.py
+++ b/src/rai_extensions/rai_perception/rai_perception/vision_markup/segmenter.py
@@ -35,7 +35,7 @@ def __init__(
     ):
         self.cfg_path = "seg_config.yml"
         hydra.core.global_hydra.GlobalHydra.instance().clear()
-        hydra.initialize_config_module("rai_open_set_vision.configs")
+        hydra.initialize_config_module("rai_perception.configs")
 
         self.weight_path = str(weight_path)
         if use_cuda:
diff --git a/src/rai_extensions/rai_open_set_vision/resource/rai_open_set_vision b/src/rai_extensions/rai_perception/resource/rai_perception
similarity index 100%
rename from src/rai_extensions/rai_open_set_vision/resource/rai_open_set_vision
rename to src/rai_extensions/rai_perception/resource/rai_perception
diff --git a/src/rai_extensions/rai_open_set_vision/scripts/run_vision_agents.py b/src/rai_extensions/rai_perception/scripts/run_perception_agents.py
similarity index 91%
rename from src/rai_extensions/rai_open_set_vision/scripts/run_vision_agents.py
rename to src/rai_extensions/rai_perception/scripts/run_perception_agents.py
index 70e526475..dc29c3221 100644
--- a/src/rai_extensions/rai_open_set_vision/scripts/run_vision_agents.py
+++ b/src/rai_extensions/rai_perception/scripts/run_perception_agents.py
@@ -15,7 +15,7 @@
 
 import rclpy
 from rai.agents import wait_for_shutdown
-from rai_open_set_vision.agents import GroundedSamAgent, GroundingDinoAgent
+from rai_perception.agents import GroundedSamAgent, GroundingDinoAgent
 
 
 def main():
diff --git a/src/rai_extensions/rai_perception/setup.cfg b/src/rai_extensions/rai_perception/setup.cfg
new file mode 100644
index 000000000..d5d86068f
--- /dev/null
+++ b/src/rai_extensions/rai_perception/setup.cfg
@@ -0,0 +1,6 @@
+[build_scripts]
+executable = /usr/bin/env python3
+[develop]
+script_dir=$base/lib/rai_perception
+[install]
+install_scripts=$base/lib/rai_perception
diff --git a/src/rai_extensions/rai_open_set_vision/setup.py b/src/rai_extensions/rai_perception/setup.py
similarity index 89%
rename from src/rai_extensions/rai_open_set_vision/setup.py
rename to src/rai_extensions/rai_perception/setup.py
index 07b52a827..488ede1ca 100644
--- a/src/rai_extensions/rai_open_set_vision/setup.py
+++ b/src/rai_extensions/rai_perception/setup.py
@@ -18,7 +18,7 @@
 
 from setuptools import find_packages, setup
 
-package_name = "rai_open_set_vision"
+package_name = "rai_perception"
 
 setup(
     name=package_name,
@@ -38,12 +38,12 @@
     zip_safe=True,
     maintainer="Kajetan Rachwał",
     maintainer_email="kajetan.rachwal@robotec.ai",
-    description="Package enabling open set vision for RAI",
+    description="Package enabling perception capabilities for RAI",
     license="Apache License 2.0",
     tests_require=["pytest"],
     entry_points={
         "console_scripts": [
-            "talker = rai_open_set_vision.examples.talker:main",
+            "talker = rai_perception.examples.talker:main",
         ],
     },
 )
diff --git a/tests/rai_open_set_vision/test_base_vision_agent.py b/tests/rai_perception/test_base_vision_agent.py
similarity index 97%
rename from tests/rai_open_set_vision/test_base_vision_agent.py
rename to tests/rai_perception/test_base_vision_agent.py
index 314cc77c7..13f6637e3 100644
--- a/tests/rai_open_set_vision/test_base_vision_agent.py
+++ b/tests/rai_perception/test_base_vision_agent.py
@@ -17,7 +17,7 @@
 
 import pytest
 import rclpy
-from rai_open_set_vision.agents.base_vision_agent import BaseVisionAgent
+from rai_perception.agents.base_vision_agent import BaseVisionAgent
 
 
 class MockBaseVisionAgent(BaseVisionAgent):
diff --git a/tests/tools/ros2/test_manipulation_custom.py b/tests/tools/ros2/test_manipulation_custom.py
new file mode 100644
index 000000000..53e6a8b42
--- /dev/null
+++ b/tests/tools/ros2/test_manipulation_custom.py
@@ -0,0 +1,45 @@
+# Copyright (C) 2025 Robotec.AI
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import sys
+from unittest.mock import patch
+
+
+def test_rai_perception_import_error_handling(caplog):
+    """Test that ImportError for rai_perception.tools is handled gracefully with a warning."""
+    # Remove the module from cache to allow re-import
+    module_name = "rai.tools.ros2.manipulation.custom"
+    if module_name in sys.modules:
+        del sys.modules[module_name]
+
+    # Mock the import to raise ImportError for rai_perception.tools
+    original_import = __import__
+
+    def mock_import(name, *args, **kwargs):
+        if name == "rai_perception.tools":
+            raise ImportError("No module named 'rai_perception'")
+        return original_import(name, *args, **kwargs)
+
+    with patch("builtins.__import__", side_effect=mock_import):
+        with caplog.at_level(logging.WARNING):
+            # Import should succeed despite the ImportError for rai_perception
+            import rai.tools.ros2.manipulation.custom  # noqa: F401
+
+            # Check that the warning was logged
+            assert any(
+                "rai-perception is not installed, GetGrabbingPointTool will not work"
+                in record.message
+                for record in caplog.records
+            )