import asyncio import datetime import json import os from contextlib import asynccontextmanager from typing import Dict, Optional, AsyncGenerator from concurrent.futures import ThreadPoolExecutor # 新增:显式线程池 from service.device_service import update_online_status_by_ip, increment_alarm_count_by_ip from service.device_action_service import add_device_action from schema.device_action_schema import DeviceActionCreate import cv2 import numpy as np from fastapi import WebSocket, APIRouter, WebSocketDisconnect, FastAPI from queue import Queue # 线程安全队列,无需额外Lock from ocr.model_violation_detector import MultiModelViolationDetector # -------------------------- 配置调整 -------------------------- # 模型路径(建议改为环境变量) YOLO_MODEL_PATH = r"D:\Git\bin\video\ocr\models\best.pt" OCR_CONFIG_PATH = r"D:\Git\bin\video\ocr\config\1.yaml" # 核心优化:模型池大小(决定最大并发任务数,显存占用=大小×单模型显存) MODEL_POOL_SIZE = 5 # 示例:设为5,支持5个任务并行,显存会明显上升 THREAD_POOL_SIZE = MODEL_POOL_SIZE * 2 # 线程池大小≥模型池,避免线程瓶颈 # 其他配置 HEARTBEAT_INTERVAL = 30 # 心跳间隔(秒) HEARTBEAT_TIMEOUT = 600 # 客户端超时阈值(秒) WS_ENDPOINT = "/ws" # WebSocket端点 FRAME_QUEUE_SIZE = 5 # 增大帧队列,允许缓存更多帧(避免丢帧) # -------------------------- 工具函数 -------------------------- def get_current_time_str() -> str: return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") def get_current_time_file_str() -> str: return datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f") # -------------------------- 模型池重构(核心修改1) -------------------------- class ModelPool: def __init__(self, pool_size: int = MODEL_POOL_SIZE): self.pool = Queue(maxsize=pool_size) # 移除冗余Lock:Queue.get()/put()本身线程安全 self._init_models(pool_size) print(f"[{get_current_time_str()}] 模型池初始化完成(共{pool_size}个实例,显存已预分配)") def _init_models(self, pool_size: int): """预加载所有模型实例(初始化时显存会一次性上升)""" for i in range(pool_size): try: detector = MultiModelViolationDetector( ocr_config_path=OCR_CONFIG_PATH, yolo_model_path=YOLO_MODEL_PATH, ocr_confidence_threshold=0.5 ) self.pool.put(detector) print(f"[{get_current_time_str()}] 模型实例{i+1}/{pool_size}加载完成") except Exception as e: raise RuntimeError(f"模型实例{i+1}加载失败:{str(e)}") def get_model(self) -> MultiModelViolationDetector: """获取模型(阻塞直到有空闲实例,确保并发安全)""" return self.pool.get() def return_model(self, detector: MultiModelViolationDetector): """归还模型(立即释放资源供其他任务使用)""" self.pool.put(detector) # -------------------------- 全局资源初始化 -------------------------- model_pool = ModelPool(pool_size=MODEL_POOL_SIZE) # 初始化模型池(预占显存) thread_pool = ThreadPoolExecutor( # 显式创建线程池(核心修改2) max_workers=THREAD_POOL_SIZE, thread_name_prefix="ModelWorker-" # 线程命名,便于调试 ) # -------------------------- 客户端连接封装(核心修改3) -------------------------- class ClientConnection: def __init__(self, websocket: WebSocket, client_ip: str): self.websocket = websocket self.client_ip = client_ip self.last_heartbeat = datetime.datetime.now() self.frame_queue = asyncio.Queue(maxsize=FRAME_QUEUE_SIZE) # 增大队列 self.consumer_task: Optional[asyncio.Task] = None # 移除“客户端独占模型”:不再持有detector属性 def update_heartbeat(self): self.last_heartbeat = datetime.datetime.now() def is_alive(self) -> bool: timeout = (datetime.datetime.now() - self.last_heartbeat).total_seconds() return timeout < HEARTBEAT_TIMEOUT def start_consumer(self): """启动帧消费任务(每个客户端一个独立任务)""" self.consumer_task = asyncio.create_task(self.consume_frames()) return self.consumer_task async def send_frame_permit(self): """发送帧许可信号(允许客户端继续发帧)""" try: await self.websocket.send_json({ "type": "frame", "timestamp": get_current_time_str(), "client_ip": self.client_ip }) except Exception as e: print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧许可发送失败 - {str(e)}") async def consume_frames(self) -> None: """消费帧队列(并发核心:每帧临时借模型处理)""" try: while True: # 1. 从队列取帧(无帧时阻塞) frame_data = await self.frame_queue.get() # 2. 立即发送下一帧许可(让客户端持续发帧,积累并发任务) await self.send_frame_permit() try: # 3. 并行处理帧(核心:任务级借模型) await self.process_frame(frame_data) finally: self.frame_queue.task_done() # 标记帧处理完成 except asyncio.CancelledError: print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧消费任务已取消") except Exception as e: print(f"[{get_current_time_str()}] 客户端{self.client_ip}:消费逻辑错误 - {str(e)}") async def process_frame(self, frame_data: bytes) -> None: """处理单帧(核心修改4:任务级借还模型)""" # 1. 临时借用模型(阻塞直到有空闲实例,显存随借用数上升) detector = model_pool.get_model() try: # 2. 二进制转OpenCV图像 nparr = np.frombuffer(frame_data, np.uint8) img = cv2.imdecode(nparr, cv2.IMREAD_COLOR) if img is None: print(f"[{get_current_time_str()}] 客户端{self.client_ip}:图像解析失败") return # 3. 保存图像(可选) os.makedirs('images', exist_ok=True) filename = f"images/{self.client_ip.replace('.', '_')}_{get_current_time_file_str()}.jpg" cv2.imwrite(filename, img) # 4. 显式线程池执行AI检测(真正并发,无线程瓶颈) loop = asyncio.get_running_loop() has_violation, violation_type, details = await loop.run_in_executor( thread_pool, # 用自定义线程池,避免默认线程不足 detector.detect_violations, # 临时借用的模型 img # 输入图像 ) # 5. 违规处理(与原逻辑一致) if has_violation: print(f"[{get_current_time_str()}] 客户端{self.client_ip}:违规 - {violation_type}") # 违规次数更新(用线程池避免阻塞事件循环) await loop.run_in_executor(thread_pool, increment_alarm_count_by_ip, self.client_ip) # 发送危险通知 await self.websocket.send_json({ "type": "danger", "timestamp": get_current_time_str(), "client_ip": self.client_ip, "violation_type": violation_type, "details": details }) else: print(f"[{get_current_time_str()}] 客户端{self.client_ip}:无违规") except Exception as e: print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧处理错误 - {str(e)}") finally: # 6. 无论成功/失败,强制归还模型(核心:释放资源供其他任务使用) model_pool.return_model(detector) print(f"[{get_current_time_str()}] 客户端{self.client_ip}:模型已归还(可复用)") # -------------------------- 全局状态与心跳 -------------------------- connected_clients: Dict[str, ClientConnection] = {} client_lock = asyncio.Lock() # 保护客户端字典的异步锁 heartbeat_task: Optional[asyncio.Task] = None async def heartbeat_checker(): """心跳检查(移除模型归还逻辑,因模型已任务级归还)""" while True: current_time = get_current_time_str() async with client_lock: # 筛选超时客户端 timeout_ips = [ip for ip, conn in connected_clients.items() if not conn.is_alive()] for ip in timeout_ips: async with client_lock: conn = connected_clients.get(ip) if not conn: continue # 取消消费任务+关闭连接 if conn.consumer_task and not conn.consumer_task.done(): conn.consumer_task.cancel() await conn.websocket.close(code=1008, reason="心跳超时") # 标记离线(用线程池) loop = asyncio.get_running_loop() await loop.run_in_executor(thread_pool, update_online_status_by_ip, ip, 0) await loop.run_in_executor( thread_pool, add_device_action, DeviceActionCreate(client_ip=ip, action=0) ) connected_clients.pop(ip) print(f"[{current_time}] 客户端{ip}:超时离线(资源已清理)") # 打印在线状态 async with client_lock: print(f"[{current_time}] 心跳检查:{len(connected_clients)}个客户端在线") await asyncio.sleep(HEARTBEAT_INTERVAL) # -------------------------- 应用生命周期(核心修改5:管理线程池) -------------------------- @asynccontextmanager async def lifespan(app: FastAPI): global heartbeat_task # 启动心跳任务 heartbeat_task = asyncio.create_task(heartbeat_checker()) print(f"[{get_current_time_str()}] 心跳任务启动(ID:{id(heartbeat_task)})") print(f"[{get_current_time_str()}] 线程池启动(最大线程数:{THREAD_POOL_SIZE})") yield # 应用运行期间 # 清理资源 if heartbeat_task and not heartbeat_task.done(): heartbeat_task.cancel() await heartbeat_task print(f"[{get_current_time_str()}] 心跳任务已关闭") # 关闭线程池(等待所有任务完成) thread_pool.shutdown(wait=True) print(f"[{get_current_time_str()}] 线程池已关闭") # -------------------------- WebSocket路由 -------------------------- ws_router = APIRouter() @ws_router.websocket(WS_ENDPOINT) async def websocket_endpoint(websocket: WebSocket): await websocket.accept() client_ip = websocket.client.host if websocket.client else "unknown_ip" current_time = get_current_time_str() print(f"[{current_time}] 客户端{client_ip}:连接建立") new_conn = None is_online_updated = False try: # 处理重复连接(关闭旧连接) async with client_lock: if client_ip in connected_clients: old_conn = connected_clients[client_ip] if old_conn.consumer_task and not old_conn.consumer_task.done(): old_conn.consumer_task.cancel() await old_conn.websocket.close(code=1008, reason="新连接抢占") connected_clients.pop(client_ip) print(f"[{current_time}] 客户端{client_ip}:旧连接已关闭") # 创建新连接+启动消费任务 new_conn = ClientConnection(websocket, client_ip) new_conn.start_consumer() # 初始发送帧许可(让客户端立即发帧) await new_conn.send_frame_permit() # 标记客户端在线 loop = asyncio.get_running_loop() await loop.run_in_executor(thread_pool, update_online_status_by_ip, client_ip, 1) await loop.run_in_executor( thread_pool, add_device_action, DeviceActionCreate(client_ip=client_ip, action=1) ) is_online_updated = True async with client_lock: connected_clients[client_ip] = new_conn print(f"[{current_time}] 客户端{client_ip}:注册成功(在线数:{len(connected_clients)})") # 消息循环(接收文本/二进制帧) while True: data = await websocket.receive() if "text" in data: # 处理文本消息(如心跳) try: msg = json.loads(data["text"]) if msg.get("type") == "heart": new_conn.update_heartbeat() # 回复心跳确认 await websocket.send_json({ "type": "heart", "timestamp": get_current_time_str(), "client_ip": client_ip }) except json.JSONDecodeError: print(f"[{get_current_time_str()}] 客户端{client_ip}:无效JSON") elif "bytes" in data: # 处理二进制帧(图像) try: await new_conn.frame_queue.put(data["bytes"]) print(f"[{get_current_time_str()}] 客户端{client_ip}:帧已入队(队列大小:{new_conn.frame_queue.qsize()})") except asyncio.QueueFull: print(f"[{get_current_time_str()}] 客户端{client_ip}:帧队列满(丢弃当前帧)") except WebSocketDisconnect as e: print(f"[{get_current_time_str()}] 客户端{client_ip}:主动断开(代码:{e.code})") except Exception as e: print(f"[{get_current_time_str()}] 客户端{client_ip}:连接异常 - {str(e)[:50]}") finally: # 清理资源(无需归还模型,已在process_frame中归还) if new_conn and client_ip in connected_clients: async with client_lock: conn = connected_clients.get(client_ip) if conn: if conn.consumer_task and not conn.consumer_task.done(): conn.consumer_task.cancel() # 标记离线(仅当在线状态已更新时) if is_online_updated: loop = asyncio.get_running_loop() await loop.run_in_executor(thread_pool, update_online_status_by_ip, client_ip, 0) await loop.run_in_executor( thread_pool, add_device_action, DeviceActionCreate(client_ip=client_ip, action=0) ) connected_clients.pop(client_ip) async with client_lock: print(f"[{get_current_time_str()}] 客户端{client_ip}:资源清理完成(在线数:{len(connected_clients)})")