Files
video/ws/ws.py
ZZX9599 ec6dbfde90 优化
2025-09-04 17:33:20 +08:00

318 lines
15 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import asyncio
import datetime
import json
import os
from contextlib import asynccontextmanager
from typing import Dict, Optional, AsyncGenerator
from concurrent.futures import ThreadPoolExecutor # 新增:显式线程池
from service.device_service import update_online_status_by_ip, increment_alarm_count_by_ip
from service.device_action_service import add_device_action
from schema.device_action_schema import DeviceActionCreate
import cv2
import numpy as np
from fastapi import WebSocket, APIRouter, WebSocketDisconnect, FastAPI
from queue import Queue # 线程安全队列无需额外Lock
from ocr.model_violation_detector import MultiModelViolationDetector
# -------------------------- 配置调整 --------------------------
# 模型路径(建议改为环境变量)
YOLO_MODEL_PATH = r"D:\Git\bin\video\ocr\models\best.pt"
OCR_CONFIG_PATH = r"D:\Git\bin\video\ocr\config\1.yaml"
# 核心优化:模型池大小(决定最大并发任务数,显存占用=大小×单模型显存)
MODEL_POOL_SIZE = 5 # 示例设为5支持5个任务并行显存会明显上升
THREAD_POOL_SIZE = MODEL_POOL_SIZE * 2 # 线程池大小≥模型池,避免线程瓶颈
# 其他配置
HEARTBEAT_INTERVAL = 30 # 心跳间隔(秒)
HEARTBEAT_TIMEOUT = 600 # 客户端超时阈值(秒)
WS_ENDPOINT = "/ws" # WebSocket端点
FRAME_QUEUE_SIZE = 5 # 增大帧队列,允许缓存更多帧(避免丢帧)
# -------------------------- 工具函数 --------------------------
def get_current_time_str() -> str:
return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def get_current_time_file_str() -> str:
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
# -------------------------- 模型池重构核心修改1 --------------------------
class ModelPool:
def __init__(self, pool_size: int = MODEL_POOL_SIZE):
self.pool = Queue(maxsize=pool_size)
# 移除冗余LockQueue.get()/put()本身线程安全
self._init_models(pool_size)
print(f"[{get_current_time_str()}] 模型池初始化完成(共{pool_size}个实例,显存已预分配)")
def _init_models(self, pool_size: int):
"""预加载所有模型实例(初始化时显存会一次性上升)"""
for i in range(pool_size):
try:
detector = MultiModelViolationDetector(
ocr_config_path=OCR_CONFIG_PATH,
yolo_model_path=YOLO_MODEL_PATH,
ocr_confidence_threshold=0.5
)
self.pool.put(detector)
print(f"[{get_current_time_str()}] 模型实例{i+1}/{pool_size}加载完成")
except Exception as e:
raise RuntimeError(f"模型实例{i+1}加载失败:{str(e)}")
def get_model(self) -> MultiModelViolationDetector:
"""获取模型(阻塞直到有空闲实例,确保并发安全)"""
return self.pool.get()
def return_model(self, detector: MultiModelViolationDetector):
"""归还模型(立即释放资源供其他任务使用)"""
self.pool.put(detector)
# -------------------------- 全局资源初始化 --------------------------
model_pool = ModelPool(pool_size=MODEL_POOL_SIZE) # 初始化模型池(预占显存)
thread_pool = ThreadPoolExecutor( # 显式创建线程池核心修改2
max_workers=THREAD_POOL_SIZE,
thread_name_prefix="ModelWorker-" # 线程命名,便于调试
)
# -------------------------- 客户端连接封装核心修改3 --------------------------
class ClientConnection:
def __init__(self, websocket: WebSocket, client_ip: str):
self.websocket = websocket
self.client_ip = client_ip
self.last_heartbeat = datetime.datetime.now()
self.frame_queue = asyncio.Queue(maxsize=FRAME_QUEUE_SIZE) # 增大队列
self.consumer_task: Optional[asyncio.Task] = None
# 移除“客户端独占模型”不再持有detector属性
def update_heartbeat(self):
self.last_heartbeat = datetime.datetime.now()
def is_alive(self) -> bool:
timeout = (datetime.datetime.now() - self.last_heartbeat).total_seconds()
return timeout < HEARTBEAT_TIMEOUT
def start_consumer(self):
"""启动帧消费任务(每个客户端一个独立任务)"""
self.consumer_task = asyncio.create_task(self.consume_frames())
return self.consumer_task
async def send_frame_permit(self):
"""发送帧许可信号(允许客户端继续发帧)"""
try:
await self.websocket.send_json({
"type": "frame",
"timestamp": get_current_time_str(),
"client_ip": self.client_ip
})
except Exception as e:
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧许可发送失败 - {str(e)}")
async def consume_frames(self) -> None:
"""消费帧队列(并发核心:每帧临时借模型处理)"""
try:
while True:
# 1. 从队列取帧(无帧时阻塞)
frame_data = await self.frame_queue.get()
# 2. 立即发送下一帧许可(让客户端持续发帧,积累并发任务)
await self.send_frame_permit()
try:
# 3. 并行处理帧(核心:任务级借模型)
await self.process_frame(frame_data)
finally:
self.frame_queue.task_done() # 标记帧处理完成
except asyncio.CancelledError:
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧消费任务已取消")
except Exception as e:
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:消费逻辑错误 - {str(e)}")
async def process_frame(self, frame_data: bytes) -> None:
"""处理单帧核心修改4任务级借还模型"""
# 1. 临时借用模型(阻塞直到有空闲实例,显存随借用数上升)
detector = model_pool.get_model()
try:
# 2. 二进制转OpenCV图像
nparr = np.frombuffer(frame_data, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img is None:
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:图像解析失败")
return
# 3. 保存图像(可选)
os.makedirs('images', exist_ok=True)
filename = f"images/{self.client_ip.replace('.', '_')}_{get_current_time_file_str()}.jpg"
cv2.imwrite(filename, img)
# 4. 显式线程池执行AI检测真正并发无线程瓶颈
loop = asyncio.get_running_loop()
has_violation, violation_type, details = await loop.run_in_executor(
thread_pool, # 用自定义线程池,避免默认线程不足
detector.detect_violations, # 临时借用的模型
img # 输入图像
)
# 5. 违规处理(与原逻辑一致)
if has_violation:
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:违规 - {violation_type}")
# 违规次数更新(用线程池避免阻塞事件循环)
await loop.run_in_executor(thread_pool, increment_alarm_count_by_ip, self.client_ip)
# 发送危险通知
await self.websocket.send_json({
"type": "danger",
"timestamp": get_current_time_str(),
"client_ip": self.client_ip,
"violation_type": violation_type,
"details": details
})
else:
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:无违规")
except Exception as e:
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧处理错误 - {str(e)}")
finally:
# 6. 无论成功/失败,强制归还模型(核心:释放资源供其他任务使用)
model_pool.return_model(detector)
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:模型已归还(可复用)")
# -------------------------- 全局状态与心跳 --------------------------
connected_clients: Dict[str, ClientConnection] = {}
client_lock = asyncio.Lock() # 保护客户端字典的异步锁
heartbeat_task: Optional[asyncio.Task] = None
async def heartbeat_checker():
"""心跳检查(移除模型归还逻辑,因模型已任务级归还)"""
while True:
current_time = get_current_time_str()
async with client_lock:
# 筛选超时客户端
timeout_ips = [ip for ip, conn in connected_clients.items() if not conn.is_alive()]
for ip in timeout_ips:
async with client_lock:
conn = connected_clients.get(ip)
if not conn:
continue
# 取消消费任务+关闭连接
if conn.consumer_task and not conn.consumer_task.done():
conn.consumer_task.cancel()
await conn.websocket.close(code=1008, reason="心跳超时")
# 标记离线(用线程池)
loop = asyncio.get_running_loop()
await loop.run_in_executor(thread_pool, update_online_status_by_ip, ip, 0)
await loop.run_in_executor(
thread_pool, add_device_action, DeviceActionCreate(client_ip=ip, action=0)
)
connected_clients.pop(ip)
print(f"[{current_time}] 客户端{ip}:超时离线(资源已清理)")
# 打印在线状态
async with client_lock:
print(f"[{current_time}] 心跳检查:{len(connected_clients)}个客户端在线")
await asyncio.sleep(HEARTBEAT_INTERVAL)
# -------------------------- 应用生命周期核心修改5管理线程池 --------------------------
@asynccontextmanager
async def lifespan(app: FastAPI):
global heartbeat_task
# 启动心跳任务
heartbeat_task = asyncio.create_task(heartbeat_checker())
print(f"[{get_current_time_str()}] 心跳任务启动ID{id(heartbeat_task)}")
print(f"[{get_current_time_str()}] 线程池启动(最大线程数:{THREAD_POOL_SIZE}")
yield # 应用运行期间
# 清理资源
if heartbeat_task and not heartbeat_task.done():
heartbeat_task.cancel()
await heartbeat_task
print(f"[{get_current_time_str()}] 心跳任务已关闭")
# 关闭线程池(等待所有任务完成)
thread_pool.shutdown(wait=True)
print(f"[{get_current_time_str()}] 线程池已关闭")
# -------------------------- WebSocket路由 --------------------------
ws_router = APIRouter()
@ws_router.websocket(WS_ENDPOINT)
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
client_ip = websocket.client.host if websocket.client else "unknown_ip"
current_time = get_current_time_str()
print(f"[{current_time}] 客户端{client_ip}:连接建立")
new_conn = None
is_online_updated = False
try:
# 处理重复连接(关闭旧连接)
async with client_lock:
if client_ip in connected_clients:
old_conn = connected_clients[client_ip]
if old_conn.consumer_task and not old_conn.consumer_task.done():
old_conn.consumer_task.cancel()
await old_conn.websocket.close(code=1008, reason="新连接抢占")
connected_clients.pop(client_ip)
print(f"[{current_time}] 客户端{client_ip}:旧连接已关闭")
# 创建新连接+启动消费任务
new_conn = ClientConnection(websocket, client_ip)
new_conn.start_consumer()
# 初始发送帧许可(让客户端立即发帧)
await new_conn.send_frame_permit()
# 标记客户端在线
loop = asyncio.get_running_loop()
await loop.run_in_executor(thread_pool, update_online_status_by_ip, client_ip, 1)
await loop.run_in_executor(
thread_pool, add_device_action, DeviceActionCreate(client_ip=client_ip, action=1)
)
is_online_updated = True
async with client_lock:
connected_clients[client_ip] = new_conn
print(f"[{current_time}] 客户端{client_ip}:注册成功(在线数:{len(connected_clients)}")
# 消息循环(接收文本/二进制帧)
while True:
data = await websocket.receive()
if "text" in data:
# 处理文本消息(如心跳)
try:
msg = json.loads(data["text"])
if msg.get("type") == "heart":
new_conn.update_heartbeat()
# 回复心跳确认
await websocket.send_json({
"type": "heart",
"timestamp": get_current_time_str(),
"client_ip": client_ip
})
except json.JSONDecodeError:
print(f"[{get_current_time_str()}] 客户端{client_ip}无效JSON")
elif "bytes" in data:
# 处理二进制帧(图像)
try:
await new_conn.frame_queue.put(data["bytes"])
print(f"[{get_current_time_str()}] 客户端{client_ip}:帧已入队(队列大小:{new_conn.frame_queue.qsize()}")
except asyncio.QueueFull:
print(f"[{get_current_time_str()}] 客户端{client_ip}:帧队列满(丢弃当前帧)")
except WebSocketDisconnect as e:
print(f"[{get_current_time_str()}] 客户端{client_ip}:主动断开(代码:{e.code}")
except Exception as e:
print(f"[{get_current_time_str()}] 客户端{client_ip}:连接异常 - {str(e)[:50]}")
finally:
# 清理资源无需归还模型已在process_frame中归还
if new_conn and client_ip in connected_clients:
async with client_lock:
conn = connected_clients.get(client_ip)
if conn:
if conn.consumer_task and not conn.consumer_task.done():
conn.consumer_task.cancel()
# 标记离线(仅当在线状态已更新时)
if is_online_updated:
loop = asyncio.get_running_loop()
await loop.run_in_executor(thread_pool, update_online_status_by_ip, client_ip, 0)
await loop.run_in_executor(
thread_pool, add_device_action, DeviceActionCreate(client_ip=client_ip, action=0)
)
connected_clients.pop(client_ip)
async with client_lock:
print(f"[{get_current_time_str()}] 客户端{client_ip}:资源清理完成(在线数:{len(connected_clients)}")