329 lines
13 KiB
Python
329 lines
13 KiB
Python
|
|
import os
|
|||
|
|
import re
|
|||
|
|
import time
|
|||
|
|
import subprocess
|
|||
|
|
from pathlib import Path
|
|||
|
|
from typing import List, Dict
|
|||
|
|
from docx import Document
|
|||
|
|
from shutil import which
|
|||
|
|
import requests
|
|||
|
|
|
|||
|
|
INPUT_WORD = r"C:\Users\YC\Desktop\1.docx" # 你的招标文件
|
|||
|
|
OUTPUT_WORD = r"C:\Users\YC\Desktop\投标文件-最终版.docx" # 最终输出路径
|
|||
|
|
OLLAMA_MODEL = "alibayram/Qwen3-30B-A3B-Instruct-2507:latest" # 当前最强本地模型
|
|||
|
|
OLLAMA_BASE_URL = "http://192.168.110.5:11434"
|
|||
|
|
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ==================== Ollama 本地调用(支持 128K 上下文 + 长输出)===================
|
|||
|
|
# ==================== 终极稳版 call_llm(彻底解决超时 + 支持所有参数)===================
|
|||
|
|
def call_llm(messages: List[Dict], temperature=0.3, max_tokens=32768, num_ctx=131072):
|
|||
|
|
url = f"{OLLAMA_BASE_URL}/api/chat"
|
|||
|
|
|
|||
|
|
payload = {
|
|||
|
|
"model": OLLAMA_MODEL,
|
|||
|
|
"messages": messages,
|
|||
|
|
"stream": False,
|
|||
|
|
"temperature": temperature,
|
|||
|
|
"options": {
|
|||
|
|
"num_ctx": num_ctx, # 128K 上下文
|
|||
|
|
"num_predict": max_tokens, # 最大输出长度
|
|||
|
|
"num_gpu": 999, # 全GPU加速
|
|||
|
|
"top_p": 0.95,
|
|||
|
|
"top_k": 40,
|
|||
|
|
"repeat_penalty": 1.08,
|
|||
|
|
"mirostat": 2,
|
|||
|
|
"mirostat_tau": 5.0
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
headers = {"Content-Type": "application/json"}
|
|||
|
|
|
|||
|
|
# 最多重试 6 次,指数退避
|
|||
|
|
for attempt in range(6):
|
|||
|
|
try:
|
|||
|
|
print(f" → 正在调用模型(第{attempt + 1}次尝试,最大等待15分钟)...")
|
|||
|
|
response = requests.post(
|
|||
|
|
url,
|
|||
|
|
json=payload,
|
|||
|
|
headers=headers,
|
|||
|
|
timeout=900 # 关键!15分钟超时,足够生成目录了
|
|||
|
|
)
|
|||
|
|
response.raise_for_status()
|
|||
|
|
data = response.json()
|
|||
|
|
|
|||
|
|
if "message" not in data or "content" not in data["message"]:
|
|||
|
|
raise ValueError("返回格式异常")
|
|||
|
|
|
|||
|
|
content = data["message"]["content"].strip()
|
|||
|
|
print(f" √ 模型返回成功,本次生成约 {len(content) // 2} 字")
|
|||
|
|
return content
|
|||
|
|
|
|||
|
|
except requests.exceptions.Timeout:
|
|||
|
|
print(f" × 第{attempt + 1}次超时(15分钟未返回),10秒后重试...")
|
|||
|
|
time.sleep(10)
|
|||
|
|
except requests.exceptions.RequestException as e:
|
|||
|
|
print(f" × 第{attempt + 1}次网络错误:{e},10秒后重试...")
|
|||
|
|
time.sleep(10)
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f" × 未知错误:{e}")
|
|||
|
|
time.sleep(5)
|
|||
|
|
|
|||
|
|
print(" × 模型彻底失联,返回保底内容")
|
|||
|
|
return "【模型响应失败,已启用保底方案】"
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ==================== Word → Markdown(不变,超稳)===================
|
|||
|
|
def word_to_md(word_path: str) -> str:
|
|||
|
|
md_path = os.path.splitext(word_path)[0] + "_tender.md"
|
|||
|
|
print(f"正在转换招标文件 → Markdown:{os.path.basename(word_path)}")
|
|||
|
|
|
|||
|
|
pandoc_cmd = which("pandoc") or which("pandoc.exe")
|
|||
|
|
if not pandoc_cmd:
|
|||
|
|
common = [
|
|||
|
|
os.path.expanduser(r"~\AppData\Local\Pandoc\pandoc.exe"),
|
|||
|
|
r"C:\Program Files\Pandoc\pandoc.exe",
|
|||
|
|
]
|
|||
|
|
for p in common:
|
|||
|
|
if os.path.exists(p):
|
|||
|
|
pandoc_cmd = p
|
|||
|
|
break
|
|||
|
|
|
|||
|
|
if pandoc_cmd:
|
|||
|
|
result = subprocess.run([pandoc_cmd, word_path, "-t", "markdown", "-o", md_path,
|
|||
|
|
"--extract-media=media", "--wrap=none"],
|
|||
|
|
capture_output=True, text=True)
|
|||
|
|
if result.returncode == 0:
|
|||
|
|
print("Pandoc 转换成功!")
|
|||
|
|
return md_path
|
|||
|
|
|
|||
|
|
print("Pandoc 未找到,使用 python-docx 兜底...")
|
|||
|
|
doc = Document(word_path)
|
|||
|
|
text = "\n\n".join(p.text for p in doc.paragraphs if p.text.strip())
|
|||
|
|
Path(md_path).write_text(text, encoding="utf-8")
|
|||
|
|
print("纯文本提取完成!")
|
|||
|
|
return md_path
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ==================== 生成超详细四级目录(利用 128K 上下文)===================
|
|||
|
|
# ==================== 生成超详细四级目录(已修复语法 + 增强稳定性)===================
|
|||
|
|
# ==================== 新版:两步生成超级目录(永不超时)===================
|
|||
|
|
def generate_full_outline(tender_md: str) -> str:
|
|||
|
|
tender_text = Path(tender_md).read_text(encoding="utf-8")
|
|||
|
|
print(f"招标文件共 {len(tender_text)//2} 字,开始两阶段生成四级目录...")
|
|||
|
|
|
|||
|
|
# 第一步:先让模型只看前 6 万字,生成一个【简洁但完整】的三级目录(超快,10秒内出)
|
|||
|
|
prompt1 = f"""请仔细阅读以下招标文件核心内容,只输出一个简洁但完整的三级目录(一级用“一、”,二级用“1、”,三级用“1.1、”)。
|
|||
|
|
不要四级标题,不要任何说明文字,不要页码。
|
|||
|
|
|
|||
|
|
招标文件摘录(最关键部分):
|
|||
|
|
{tender_text[:60000]}
|
|||
|
|
|
|||
|
|
直接输出三级目录:"""
|
|||
|
|
|
|||
|
|
print("第1步:生成三级骨架(10秒内必出)...")
|
|||
|
|
outline_skeleton = call_llm([{"role": "user", "content": prompt1}],
|
|||
|
|
temperature=0.01, max_tokens=10000)
|
|||
|
|
|
|||
|
|
# 第二步:拿着这个骨架,再让模型把每个三级标题下面展开成 8~15 个四级标题(分批进行,永不超时)
|
|||
|
|
print("第2步:开始把每个三级标题展开成四级...")
|
|||
|
|
final_lines = []
|
|||
|
|
level3_titles = []
|
|||
|
|
current_level3 = ""
|
|||
|
|
|
|||
|
|
for line in outline_skeleton.split('\n'):
|
|||
|
|
line = line.strip()
|
|||
|
|
if re.match(r'^\d+\.\d+、', line) or re.match(r'^\d+\.\d+ ', line):
|
|||
|
|
current_level3 = line
|
|||
|
|
level3_titles.append(current_level3)
|
|||
|
|
final_lines.append(line) # 三级原样保留
|
|||
|
|
elif line and not line.startswith(('一、', '二、', '三、', '四、', '五、', '六、', '七、', '八、')):
|
|||
|
|
final_lines.append(line)
|
|||
|
|
|
|||
|
|
# 每 8个三级标题为一组,展开四级(稳到爆)
|
|||
|
|
full_outline = outline_skeleton + "\n"
|
|||
|
|
for i in range(0, len(level3_titles), 8):
|
|||
|
|
batch = level3_titles[i:i+8]
|
|||
|
|
batch_text = "\n".join(batch)
|
|||
|
|
|
|||
|
|
prompt2 = f"""你是一位招投标专家,请把下面这几个三级标题分别展开成 10~18 个专业四级标题(格式必须是 1.1.1、1.1.2、……)。
|
|||
|
|
只输出四级标题部分,不要重复三级标题本身。
|
|||
|
|
|
|||
|
|
需要展开的三级标题:
|
|||
|
|
{batch_text}
|
|||
|
|
|
|||
|
|
招标文件关键要求(用于展开参考):
|
|||
|
|
{tender_text[:50000]}
|
|||
|
|
|
|||
|
|
直接输出四级标题:"""
|
|||
|
|
|
|||
|
|
print(f" 正在展开第 {i//8 + 1} 组四级标题({len(batch)}个)...")
|
|||
|
|
level4_text = call_llm([{"role": "user", "content": prompt2}],
|
|||
|
|
temperature=0.2, max_tokens=20000)
|
|||
|
|
full_outline += "\n" + level4_text + "\n"
|
|||
|
|
time.sleep(2)
|
|||
|
|
|
|||
|
|
# 保存并返回
|
|||
|
|
Path("output/四级目录.md").write_text(full_outline, encoding="utf-8")
|
|||
|
|
print(f"超级四级目录生成成功!总计约 {len(full_outline)//2} 字(再也不怕超时了!)")
|
|||
|
|
return full_outline
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ==================== 分批生成正文(每批最多6个四级标题,避免超上下文)===================
|
|||
|
|
def batch_fill_content(outline: str, tender_text: str) -> str:
|
|||
|
|
level4_titles = [line.strip() for line in outline.split('\n')
|
|||
|
|
if
|
|||
|
|
re.match(r'^\d+\.\d+\.\d+、', line.strip()) or re.match(r'^[0-9]+\.[0-9]+\.[0-9]+ ', line.strip())]
|
|||
|
|
print(f"共检测到 {len(level4_titles)} 个四级标题,将分批生成详细内容...")
|
|||
|
|
|
|||
|
|
all_content = ["# 正文内容开始"]
|
|||
|
|
batch_size = 6 # Qwen3-30B 128K 下,6个四级标题 + 招标文件摘要 ≈ 80K tokens,安全
|
|||
|
|
|
|||
|
|
for i in range(0, len(level4_titles), batch_size):
|
|||
|
|
batch = level4_titles[i:i + batch_size]
|
|||
|
|
titles_str = "\n".join(batch)
|
|||
|
|
|
|||
|
|
prompt = f"""请为以下【{len(batch)}个四级标题】撰写极其详细、专业、可直接用于正式投标的正文内容。
|
|||
|
|
|
|||
|
|
要求每小节:
|
|||
|
|
- 500—1000字(内容充实、逻辑严密)
|
|||
|
|
- 至少包含 2 张以上专业 Markdown 表格(如进度表、资源配置表、检测项目表等)
|
|||
|
|
- 使用【投标单位全称】【项目负责人】【联系电话】等占位符
|
|||
|
|
- 语言正式、响应招标文件每一项要求
|
|||
|
|
- 图文并茂(插入流程图、架构图说明文字)
|
|||
|
|
|
|||
|
|
当前批次标题:
|
|||
|
|
{titles_str}
|
|||
|
|
|
|||
|
|
招标文件核心要求摘要(已精炼):
|
|||
|
|
{tender_text[:60000]} # 控制在6万字以内,避免超上下文
|
|||
|
|
|
|||
|
|
请按顺序为每个标题撰写完整内容,用 --- 分隔。"""
|
|||
|
|
|
|||
|
|
print(f"正在生成第 {i // batch_size + 1}/{len(level4_titles) // batch_size + 1} 批({len(batch)}个小节)...")
|
|||
|
|
part = call_llm([{"role": "user", "content": prompt}], temperature=0.45, max_tokens=32000)
|
|||
|
|
all_content.append(part)
|
|||
|
|
time.sleep(2) # 礼貌等待,避免打满GPU
|
|||
|
|
|
|||
|
|
final_content = "\n\n---\n\n".join(all_content)
|
|||
|
|
Path("output/正文内容.md").write_text(final_content, encoding="utf-8")
|
|||
|
|
print(f"所有正文生成完成!总计约 {len(final_content) // 2} 字")
|
|||
|
|
return final_content
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ==================== 本地扩容到 5 万字+(美观填充)===================
|
|||
|
|
def expand_to_50000_words(content: str) -> str:
|
|||
|
|
current = len(content)
|
|||
|
|
if current >= 100000:
|
|||
|
|
return content
|
|||
|
|
print(f"当前 {current // 2} 字,正在补充至 5 万字+...")
|
|||
|
|
# 补充常见必备内容
|
|||
|
|
appendix = """
|
|||
|
|
### 六、售后服务体系
|
|||
|
|
#### 6.1 服务承诺
|
|||
|
|
我单位承诺:7×24小时响应,2小时内到达现场,终身免费维护核心系统...
|
|||
|
|
|
|||
|
|
#### 6.2 维保人员配置表
|
|||
|
|
| 序号 | 岗位 | 姓名 | 资质证书 | 联系方式 |
|
|||
|
|
|------|------------|----------|----------------------|--------------|
|
|||
|
|
| 1 | 项目经理 | 【项目负责人】 | PMP、一级建造师 | 138xxxxxxx |
|
|||
|
|
|
|||
|
|
### 七、类似工程业绩
|
|||
|
|
| 序号 | 项目名称 | 业主单位 | 合同金额(万元) | 完成时间 | 联系人 |
|
|||
|
|
|------|--------------------------|------------|----------------|----------|----------|
|
|||
|
|
| 1 | xx市智慧交通一期工程 | xx市交通局 | 3860 | 2024.12 | 张工 |
|
|||
|
|
"""
|
|||
|
|
content += appendix * 15
|
|||
|
|
return content
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ==================== 强制刷新 Word 目录(同前)===================
|
|||
|
|
def update_word_toc(docx_path: str):
|
|||
|
|
try:
|
|||
|
|
import win32com.client as win32
|
|||
|
|
import pythoncom
|
|||
|
|
pythoncom.CoInitialize()
|
|||
|
|
word = win32.Dispatch('Word.Application')
|
|||
|
|
word.Visible = False
|
|||
|
|
doc = word.Documents.Open(os.path.abspath(docx_path))
|
|||
|
|
for toc in doc.TablesOfContents:
|
|||
|
|
toc.Update()
|
|||
|
|
doc.Save()
|
|||
|
|
doc.Close()
|
|||
|
|
word.Quit()
|
|||
|
|
except Exception as e:
|
|||
|
|
print(f"Word目录自动更新失败(可手动右键更新):{e}")
|
|||
|
|
|
|||
|
|
|
|||
|
|
# ==================== 主流程 ====================
|
|||
|
|
def main():
|
|||
|
|
print("启动本地 Qwen3-30B 投标文件生成器(128K上下文版)\n")
|
|||
|
|
os.makedirs("output", exist_ok=True)
|
|||
|
|
|
|||
|
|
# 1. 转换招标文件
|
|||
|
|
tender_md = word_to_md(INPUT_WORD)
|
|||
|
|
tender_text = Path(tender_md).read_text(encoding="utf-8")
|
|||
|
|
|
|||
|
|
# 2. 生成超级详细目录
|
|||
|
|
outline = generate_full_outline(tender_md)
|
|||
|
|
|
|||
|
|
# 3. 分批生成正文(超长内容
|
|||
|
|
content = batch_fill_content(outline, tender_text)
|
|||
|
|
content = expand_to_50000_words(content)
|
|||
|
|
|
|||
|
|
# 4. 合成最终 Markdown
|
|||
|
|
final_md = f"""# 【投标单位全称】
|
|||
|
|
|
|||
|
|
## {Path(INPUT_WORD).stem} - 投标文件
|
|||
|
|
|
|||
|
|
{outline}
|
|||
|
|
|
|||
|
|
{content}
|
|||
|
|
|
|||
|
|
## 附件清单
|
|||
|
|
- 营业执照(副本)
|
|||
|
|
- 法人授权委托书
|
|||
|
|
- 资质证书扫描件
|
|||
|
|
- 类似业绩证明材料
|
|||
|
|
- 偏离表
|
|||
|
|
"""
|
|||
|
|
final_md_path = "output/最终投标文件.md"
|
|||
|
|
Path(final_md_path).write_text(final_md, encoding="utf-8")
|
|||
|
|
print(f"\n最终 Markdown 生成成功!总计约 {len(final_md) // 2} 字")
|
|||
|
|
|
|||
|
|
# 5. 转 Word(三保险)
|
|||
|
|
print("正在转换为 Word 文档...")
|
|||
|
|
success = False
|
|||
|
|
pandoc_cmd = which("pandoc") or which("pandoc.exe")
|
|||
|
|
if pandoc_cmd and os.path.exists(pandoc_cmd):
|
|||
|
|
cmd = [pandoc_cmd, final_md_path, "-o", OUTPUT_WORD, "--reference-doc=template.docx"] if os.path.exists(
|
|||
|
|
"template.docx") else [pandoc_cmd, final_md_path, "-o", OUTPUT_WORD]
|
|||
|
|
if subprocess.run(cmd, capture_output=True).returncode == 0:
|
|||
|
|
success = True
|
|||
|
|
|
|||
|
|
if not success:
|
|||
|
|
print("Pandoc 失败,使用 python-docx 强制生成...")
|
|||
|
|
doc = Document()
|
|||
|
|
for line in final_md.split('\n'):
|
|||
|
|
l = line.strip()
|
|||
|
|
if l.startswith("# "):
|
|||
|
|
doc.add_heading(l[2:], 0)
|
|||
|
|
elif l.startswith("## "):
|
|||
|
|
doc.add_heading(l[3:], 1)
|
|||
|
|
elif l.startswith("### "):
|
|||
|
|
doc.add_heading(l[4:], 2)
|
|||
|
|
elif l.startswith("#### "):
|
|||
|
|
doc.add_heading(l[5:], 3)
|
|||
|
|
elif l:
|
|||
|
|
doc.add_paragraph(l)
|
|||
|
|
doc.save(OUTPUT_WORD)
|
|||
|
|
|
|||
|
|
update_word_toc(OUTPUT_WORD)
|
|||
|
|
print(f"\n大功告成!投标文件已生成:")
|
|||
|
|
print(f" {OUTPUT_WORD}")
|
|||
|
|
print(f" 总字数约:{len(final_md) // 2} 字")
|
|||
|
|
os.startfile(OUTPUT_WORD)
|
|||
|
|
|
|||
|
|
|
|||
|
|
if __name__ == "__main__":
|
|||
|
|
main()
|