Files
video/ws/ws.py

318 lines
15 KiB
Python
Raw Normal View History

2025-09-02 18:51:50 +08:00
import asyncio
2025-09-03 16:27:53 +08:00
import datetime
2025-09-02 18:51:50 +08:00
import json
2025-09-03 17:02:22 +08:00
import os
2025-09-02 18:51:50 +08:00
from contextlib import asynccontextmanager
2025-09-03 17:08:28 +08:00
from typing import Dict, Optional, AsyncGenerator
2025-09-04 17:33:20 +08:00
from concurrent.futures import ThreadPoolExecutor # 新增:显式线程池
2025-09-04 12:29:27 +08:00
from service.device_service import update_online_status_by_ip, increment_alarm_count_by_ip
from service.device_action_service import add_device_action
from schema.device_action_schema import DeviceActionCreate
2025-09-02 18:51:50 +08:00
2025-09-03 17:02:22 +08:00
import cv2
2025-09-03 16:27:53 +08:00
import numpy as np
from fastapi import WebSocket, APIRouter, WebSocketDisconnect, FastAPI
2025-09-04 17:33:20 +08:00
from queue import Queue # 线程安全队列无需额外Lock
2025-09-03 16:27:53 +08:00
2025-09-03 17:02:22 +08:00
from ocr.model_violation_detector import MultiModelViolationDetector
2025-09-03 17:08:28 +08:00
2025-09-04 17:33:20 +08:00
# -------------------------- 配置调整 --------------------------
# 模型路径(建议改为环境变量)
2025-09-03 17:08:28 +08:00
YOLO_MODEL_PATH = r"D:\Git\bin\video\ocr\models\best.pt"
2025-09-03 17:02:22 +08:00
OCR_CONFIG_PATH = r"D:\Git\bin\video\ocr\config\1.yaml"
2025-09-04 17:33:20 +08:00
# 核心优化:模型池大小(决定最大并发任务数,显存占用=大小×单模型显存)
MODEL_POOL_SIZE = 5 # 示例设为5支持5个任务并行显存会明显上升
THREAD_POOL_SIZE = MODEL_POOL_SIZE * 2 # 线程池大小≥模型池,避免线程瓶颈
2025-09-03 17:02:22 +08:00
2025-09-04 17:33:20 +08:00
# 其他配置
HEARTBEAT_INTERVAL = 30 # 心跳间隔(秒)
2025-09-03 17:02:22 +08:00
HEARTBEAT_TIMEOUT = 600 # 客户端超时阈值(秒)
2025-09-04 17:33:20 +08:00
WS_ENDPOINT = "/ws" # WebSocket端点
FRAME_QUEUE_SIZE = 5 # 增大帧队列,允许缓存更多帧(避免丢帧)
2025-09-04 12:29:27 +08:00
2025-09-04 17:33:20 +08:00
# -------------------------- 工具函数 --------------------------
2025-09-04 17:29:52 +08:00
def get_current_time_str() -> str:
return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
def get_current_time_file_str() -> str:
return datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
2025-09-04 17:33:20 +08:00
# -------------------------- 模型池重构核心修改1 --------------------------
2025-09-04 17:08:25 +08:00
class ModelPool:
def __init__(self, pool_size: int = MODEL_POOL_SIZE):
self.pool = Queue(maxsize=pool_size)
2025-09-04 17:33:20 +08:00
# 移除冗余LockQueue.get()/put()本身线程安全
self._init_models(pool_size)
print(f"[{get_current_time_str()}] 模型池初始化完成(共{pool_size}个实例,显存已预分配)")
def _init_models(self, pool_size: int):
"""预加载所有模型实例(初始化时显存会一次性上升)"""
2025-09-04 17:08:25 +08:00
for i in range(pool_size):
2025-09-04 17:33:20 +08:00
try:
detector = MultiModelViolationDetector(
ocr_config_path=OCR_CONFIG_PATH,
yolo_model_path=YOLO_MODEL_PATH,
ocr_confidence_threshold=0.5
)
self.pool.put(detector)
print(f"[{get_current_time_str()}] 模型实例{i+1}/{pool_size}加载完成")
except Exception as e:
raise RuntimeError(f"模型实例{i+1}加载失败:{str(e)}")
2025-09-04 17:08:25 +08:00
def get_model(self) -> MultiModelViolationDetector:
2025-09-04 17:33:20 +08:00
"""获取模型(阻塞直到有空闲实例,确保并发安全)"""
return self.pool.get()
2025-09-04 17:08:25 +08:00
def return_model(self, detector: MultiModelViolationDetector):
2025-09-04 17:33:20 +08:00
"""归还模型(立即释放资源供其他任务使用)"""
self.pool.put(detector)
2025-09-04 17:08:25 +08:00
2025-09-04 17:33:20 +08:00
# -------------------------- 全局资源初始化 --------------------------
model_pool = ModelPool(pool_size=MODEL_POOL_SIZE) # 初始化模型池(预占显存)
thread_pool = ThreadPoolExecutor( # 显式创建线程池核心修改2
max_workers=THREAD_POOL_SIZE,
thread_name_prefix="ModelWorker-" # 线程命名,便于调试
)
2025-09-04 17:08:25 +08:00
2025-09-04 17:33:20 +08:00
# -------------------------- 客户端连接封装核心修改3 --------------------------
2025-09-02 18:51:50 +08:00
class ClientConnection:
def __init__(self, websocket: WebSocket, client_ip: str):
self.websocket = websocket
self.client_ip = client_ip
2025-09-03 16:27:53 +08:00
self.last_heartbeat = datetime.datetime.now()
2025-09-04 17:33:20 +08:00
self.frame_queue = asyncio.Queue(maxsize=FRAME_QUEUE_SIZE) # 增大队列
2025-09-04 12:29:27 +08:00
self.consumer_task: Optional[asyncio.Task] = None
2025-09-04 17:33:20 +08:00
# 移除“客户端独占模型”不再持有detector属性
2025-09-04 17:08:25 +08:00
2025-09-02 18:51:50 +08:00
def update_heartbeat(self):
self.last_heartbeat = datetime.datetime.now()
2025-09-03 16:27:53 +08:00
def is_alive(self) -> bool:
2025-09-02 18:51:50 +08:00
timeout = (datetime.datetime.now() - self.last_heartbeat).total_seconds()
2025-09-03 16:27:53 +08:00
return timeout < HEARTBEAT_TIMEOUT
2025-09-02 18:51:50 +08:00
2025-09-03 17:08:28 +08:00
def start_consumer(self):
2025-09-04 17:33:20 +08:00
"""启动帧消费任务(每个客户端一个独立任务)"""
2025-09-03 17:08:28 +08:00
self.consumer_task = asyncio.create_task(self.consume_frames())
return self.consumer_task
2025-09-04 12:29:27 +08:00
async def send_frame_permit(self):
2025-09-04 17:33:20 +08:00
"""发送帧许可信号(允许客户端继续发帧)"""
2025-09-03 18:05:34 +08:00
try:
2025-09-04 17:33:20 +08:00
await self.websocket.send_json({
2025-09-04 12:29:27 +08:00
"type": "frame",
"timestamp": get_current_time_str(),
"client_ip": self.client_ip
2025-09-04 17:33:20 +08:00
})
2025-09-03 18:05:34 +08:00
except Exception as e:
2025-09-04 17:33:20 +08:00
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧许可发送失败 - {str(e)}")
2025-09-03 18:05:34 +08:00
2025-09-03 17:08:28 +08:00
async def consume_frames(self) -> None:
2025-09-04 17:33:20 +08:00
"""消费帧队列(并发核心:每帧临时借模型处理)"""
2025-09-03 17:08:28 +08:00
try:
while True:
2025-09-04 17:33:20 +08:00
# 1. 从队列取帧(无帧时阻塞)
2025-09-03 17:08:28 +08:00
frame_data = await self.frame_queue.get()
2025-09-04 17:33:20 +08:00
# 2. 立即发送下一帧许可(让客户端持续发帧,积累并发任务)
2025-09-04 17:08:25 +08:00
await self.send_frame_permit()
2025-09-03 17:08:28 +08:00
try:
2025-09-04 17:33:20 +08:00
# 3. 并行处理帧(核心:任务级借模型)
2025-09-03 17:08:28 +08:00
await self.process_frame(frame_data)
finally:
2025-09-04 17:33:20 +08:00
self.frame_queue.task_done() # 标记帧处理完成
2025-09-03 17:08:28 +08:00
except asyncio.CancelledError:
2025-09-04 12:29:27 +08:00
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧消费任务已取消")
2025-09-03 17:08:28 +08:00
except Exception as e:
2025-09-04 17:33:20 +08:00
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:消费逻辑错误 - {str(e)}")
2025-09-03 17:08:28 +08:00
async def process_frame(self, frame_data: bytes) -> None:
2025-09-04 17:33:20 +08:00
"""处理单帧核心修改4任务级借还模型"""
# 1. 临时借用模型(阻塞直到有空闲实例,显存随借用数上升)
detector = model_pool.get_model()
2025-09-03 17:08:28 +08:00
try:
2025-09-04 17:33:20 +08:00
# 2. 二进制转OpenCV图像
nparr = np.frombuffer(frame_data, np.uint8)
img = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
if img is None:
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:图像解析失败")
return
# 3. 保存图像(可选)
os.makedirs('images', exist_ok=True)
filename = f"images/{self.client_ip.replace('.', '_')}_{get_current_time_file_str()}.jpg"
2025-09-03 17:08:28 +08:00
cv2.imwrite(filename, img)
2025-09-04 12:29:27 +08:00
2025-09-04 17:33:20 +08:00
# 4. 显式线程池执行AI检测真正并发无线程瓶颈
loop = asyncio.get_running_loop()
has_violation, violation_type, details = await loop.run_in_executor(
thread_pool, # 用自定义线程池,避免默认线程不足
detector.detect_violations, # 临时借用的模型
2025-09-04 17:08:25 +08:00
img # 输入图像
)
2025-09-04 17:33:20 +08:00
# 5. 违规处理(与原逻辑一致)
2025-09-04 12:29:27 +08:00
if has_violation:
2025-09-04 17:33:20 +08:00
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:违规 - {violation_type}")
# 违规次数更新(用线程池避免阻塞事件循环)
await loop.run_in_executor(thread_pool, increment_alarm_count_by_ip, self.client_ip)
2025-09-04 17:08:25 +08:00
# 发送危险通知
2025-09-04 17:33:20 +08:00
await self.websocket.send_json({
2025-09-04 12:29:27 +08:00
"type": "danger",
"timestamp": get_current_time_str(),
2025-09-04 17:33:20 +08:00
"client_ip": self.client_ip,
"violation_type": violation_type,
"details": details
})
2025-09-03 17:08:28 +08:00
else:
2025-09-04 17:33:20 +08:00
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:无违规")
2025-09-03 17:08:28 +08:00
except Exception as e:
2025-09-04 17:33:20 +08:00
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:帧处理错误 - {str(e)}")
finally:
# 6. 无论成功/失败,强制归还模型(核心:释放资源供其他任务使用)
model_pool.return_model(detector)
print(f"[{get_current_time_str()}] 客户端{self.client_ip}:模型已归还(可复用)")
2025-09-02 18:51:50 +08:00
2025-09-04 17:33:20 +08:00
# -------------------------- 全局状态与心跳 --------------------------
2025-09-02 18:51:50 +08:00
connected_clients: Dict[str, ClientConnection] = {}
2025-09-04 17:33:20 +08:00
client_lock = asyncio.Lock() # 保护客户端字典的异步锁
2025-09-02 18:51:50 +08:00
heartbeat_task: Optional[asyncio.Task] = None
async def heartbeat_checker():
2025-09-04 17:33:20 +08:00
"""心跳检查(移除模型归还逻辑,因模型已任务级归还)"""
2025-09-02 18:51:50 +08:00
while True:
2025-09-04 12:29:27 +08:00
current_time = get_current_time_str()
2025-09-04 17:08:25 +08:00
async with client_lock:
2025-09-04 17:33:20 +08:00
# 筛选超时客户端
2025-09-04 17:08:25 +08:00
timeout_ips = [ip for ip, conn in connected_clients.items() if not conn.is_alive()]
2025-09-03 16:27:53 +08:00
2025-09-04 17:33:20 +08:00
for ip in timeout_ips:
2025-09-04 17:08:25 +08:00
async with client_lock:
2025-09-04 17:33:20 +08:00
conn = connected_clients.get(ip)
if not conn:
continue
# 取消消费任务+关闭连接
if conn.consumer_task and not conn.consumer_task.done():
conn.consumer_task.cancel()
await conn.websocket.close(code=1008, reason="心跳超时")
# 标记离线(用线程池)
loop = asyncio.get_running_loop()
await loop.run_in_executor(thread_pool, update_online_status_by_ip, ip, 0)
await loop.run_in_executor(
thread_pool, add_device_action, DeviceActionCreate(client_ip=ip, action=0)
)
connected_clients.pop(ip)
print(f"[{current_time}] 客户端{ip}:超时离线(资源已清理)")
# 打印在线状态
async with client_lock:
print(f"[{current_time}] 心跳检查:{len(connected_clients)}个客户端在线")
2025-09-03 16:27:53 +08:00
await asyncio.sleep(HEARTBEAT_INTERVAL)
2025-09-02 18:51:50 +08:00
2025-09-04 17:33:20 +08:00
# -------------------------- 应用生命周期核心修改5管理线程池 --------------------------
2025-09-02 18:51:50 +08:00
@asynccontextmanager
async def lifespan(app: FastAPI):
global heartbeat_task
2025-09-04 17:33:20 +08:00
# 启动心跳任务
2025-09-02 18:51:50 +08:00
heartbeat_task = asyncio.create_task(heartbeat_checker())
2025-09-04 17:33:20 +08:00
print(f"[{get_current_time_str()}] 心跳任务启动ID{id(heartbeat_task)}")
print(f"[{get_current_time_str()}] 线程池启动(最大线程数:{THREAD_POOL_SIZE}")
yield # 应用运行期间
# 清理资源
2025-09-02 18:51:50 +08:00
if heartbeat_task and not heartbeat_task.done():
heartbeat_task.cancel()
2025-09-04 17:33:20 +08:00
await heartbeat_task
print(f"[{get_current_time_str()}] 心跳任务已关闭")
# 关闭线程池(等待所有任务完成)
thread_pool.shutdown(wait=True)
print(f"[{get_current_time_str()}] 线程池已关闭")
2025-09-02 18:51:50 +08:00
2025-09-04 17:33:20 +08:00
# -------------------------- WebSocket路由 --------------------------
2025-09-04 12:29:27 +08:00
ws_router = APIRouter()
2025-09-03 17:08:28 +08:00
2025-09-03 16:27:53 +08:00
@ws_router.websocket(WS_ENDPOINT)
2025-09-02 18:51:50 +08:00
async def websocket_endpoint(websocket: WebSocket):
await websocket.accept()
2025-09-04 12:29:27 +08:00
client_ip = websocket.client.host if websocket.client else "unknown_ip"
current_time = get_current_time_str()
2025-09-04 17:33:20 +08:00
print(f"[{current_time}] 客户端{client_ip}:连接建立")
2025-09-04 12:29:27 +08:00
2025-09-04 17:08:25 +08:00
new_conn = None
2025-09-04 17:33:20 +08:00
is_online_updated = False
2025-09-02 18:51:50 +08:00
try:
2025-09-04 17:33:20 +08:00
# 处理重复连接(关闭旧连接)
2025-09-04 17:08:25 +08:00
async with client_lock:
if client_ip in connected_clients:
old_conn = connected_clients[client_ip]
if old_conn.consumer_task and not old_conn.consumer_task.done():
old_conn.consumer_task.cancel()
2025-09-04 17:33:20 +08:00
await old_conn.websocket.close(code=1008, reason="新连接抢占")
2025-09-04 17:08:25 +08:00
connected_clients.pop(client_ip)
2025-09-04 17:33:20 +08:00
print(f"[{current_time}] 客户端{client_ip}:旧连接已关闭")
2025-09-03 16:27:53 +08:00
2025-09-04 17:33:20 +08:00
# 创建新连接+启动消费任务
2025-09-03 16:27:53 +08:00
new_conn = ClientConnection(websocket, client_ip)
2025-09-04 12:29:27 +08:00
new_conn.start_consumer()
2025-09-04 17:33:20 +08:00
# 初始发送帧许可(让客户端立即发帧)
2025-09-04 12:29:27 +08:00
await new_conn.send_frame_permit()
2025-09-03 17:08:28 +08:00
2025-09-04 17:33:20 +08:00
# 标记客户端在线
loop = asyncio.get_running_loop()
await loop.run_in_executor(thread_pool, update_online_status_by_ip, client_ip, 1)
await loop.run_in_executor(
thread_pool, add_device_action, DeviceActionCreate(client_ip=client_ip, action=1)
)
is_online_updated = True
2025-09-04 17:08:25 +08:00
async with client_lock:
2025-09-04 17:33:20 +08:00
connected_clients[client_ip] = new_conn
print(f"[{current_time}] 客户端{client_ip}:注册成功(在线数:{len(connected_clients)}")
2025-09-03 16:27:53 +08:00
2025-09-04 17:33:20 +08:00
# 消息循环(接收文本/二进制帧)
2025-09-02 18:51:50 +08:00
while True:
2025-09-03 16:27:53 +08:00
data = await websocket.receive()
if "text" in data:
2025-09-04 17:33:20 +08:00
# 处理文本消息(如心跳)
try:
msg = json.loads(data["text"])
if msg.get("type") == "heart":
new_conn.update_heartbeat()
# 回复心跳确认
await websocket.send_json({
"type": "heart",
"timestamp": get_current_time_str(),
"client_ip": client_ip
})
except json.JSONDecodeError:
print(f"[{get_current_time_str()}] 客户端{client_ip}无效JSON")
2025-09-03 16:27:53 +08:00
elif "bytes" in data:
2025-09-04 17:33:20 +08:00
# 处理二进制帧(图像)
try:
await new_conn.frame_queue.put(data["bytes"])
print(f"[{get_current_time_str()}] 客户端{client_ip}:帧已入队(队列大小:{new_conn.frame_queue.qsize()}")
except asyncio.QueueFull:
print(f"[{get_current_time_str()}] 客户端{client_ip}:帧队列满(丢弃当前帧)")
2025-09-02 18:51:50 +08:00
except WebSocketDisconnect as e:
2025-09-04 17:33:20 +08:00
print(f"[{get_current_time_str()}] 客户端{client_ip}:主动断开(代码:{e.code}")
2025-09-02 18:51:50 +08:00
except Exception as e:
2025-09-04 12:29:27 +08:00
print(f"[{get_current_time_str()}] 客户端{client_ip}:连接异常 - {str(e)[:50]}")
2025-09-02 18:51:50 +08:00
finally:
2025-09-04 17:33:20 +08:00
# 清理资源无需归还模型已在process_frame中归还
2025-09-04 17:08:25 +08:00
if new_conn and client_ip in connected_clients:
async with client_lock:
conn = connected_clients.get(client_ip)
if conn:
if conn.consumer_task and not conn.consumer_task.done():
conn.consumer_task.cancel()
2025-09-04 17:33:20 +08:00
# 标记离线(仅当在线状态已更新时)
2025-09-04 17:08:25 +08:00
if is_online_updated:
2025-09-04 17:33:20 +08:00
loop = asyncio.get_running_loop()
await loop.run_in_executor(thread_pool, update_online_status_by_ip, client_ip, 0)
await loop.run_in_executor(
thread_pool, add_device_action, DeviceActionCreate(client_ip=client_ip, action=0)
)
connected_clients.pop(client_ip)
2025-09-04 17:08:25 +08:00
async with client_lock:
2025-09-04 17:33:20 +08:00
print(f"[{get_current_time_str()}] 客户端{client_ip}:资源清理完成(在线数:{len(connected_clients)}")