@article{uninimx27154, title = {End-to-end emergency response protocol for tunnel accidents augmentation with reinforcement learning}, journal = {Scientific Reports}, month = {Enero}, author = {Hafiz Muhammad Raza ur Rehman and M. Junaid Gul and Rabbiya Younas and Muhammad Zeeshan Jhandir and Roberto Marcelo {\'A}lvarez and Yini Airet Mir{\'o} Vera and Imran Ashraf}, year = {2026}, keywords = {Robotic systems; drones; multi-agents system; path finding; reinforcement learning; tunnel hazards; unmanned aerial vehicles}, abstract = {Autonomous unmanned aerial vehicles (UAVs) offer cost-effective and flexible solutions for a wide range of real-world applications, particularly in hazardous and time-critical environments. Their ability to navigate autonomously, communicate rapidly, and avoid collisions makes UAVs well suited for emergency response scenarios. However, real-time path planning in dynamic and unpredictable environments remains a major challenge, especially in confined tunnel infrastructures where accidents may trigger fires, smoke propagation, debris, and rapid environmental changes. In such conditions, conventional preplanned or model-based navigation approaches often fail due to limited visibility, narrow passages, and the absence of reliable localization signals. To address these challenges, this work proposes an end-to-end emergency response framework for tunnel accidents based on Multi-Agent Reinforcement Learning (MARL). Each UAV operates as an independent learning agent using an Independent Q-Learning paradigm, enabling real-time decision-making under limited computational resources. To mitigate premature convergence and local optima during exploration, Grey Wolf Optimization (GWO) is integrated as a policy-guidance mechanism within the reinforcement learning (RL) framework. A customized reward function is designed to prioritize victim discovery, penalize unsafe behavior, and explicitly discourage redundant exploration among agents. The proposed approach is evaluated using a frontier-based exploration simulator under both single-agent and multi-agent settings with multiple goals. Extensive simulation results demonstrate that the proposed framework achieves faster goal discovery, improved map coverage, and reduced rescue time compared to state-of-the-art GWO-based exploration and random search algorithms. These results highlight the effectiveness of lightweight MARL-based coordination for autonomous UAV-assisted tunnel emergency response.}, url = {http://repositorio.unini.edu.mx/id/eprint/27154/} }