⚒️ 重大重构 LoveACE V2
引入了 mongodb 对数据库进行了一定程度的数据加密 性能改善 代码简化 统一错误模型和响应 使用 apifox 作为文档
This commit is contained in:
267
loveace/router/endpoint/jwc/utils/competition.py
Normal file
267
loveace/router/endpoint/jwc/utils/competition.py
Normal file
@@ -0,0 +1,267 @@
|
||||
from typing import Optional
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from loveace.router.endpoint.jwc.model.competition import (
|
||||
AwardProject,
|
||||
CompetitionAwardsResponse,
|
||||
CompetitionCreditsSummaryResponse,
|
||||
CompetitionFullResponse,
|
||||
CreditsSummary,
|
||||
)
|
||||
|
||||
|
||||
class CompetitionInfoParser:
|
||||
"""
|
||||
创新创业管理平台信息解析器
|
||||
|
||||
功能:
|
||||
- 解析获奖项目列表(表格数据)
|
||||
- 解析学分汇总信息
|
||||
- 提取学生基本信息
|
||||
"""
|
||||
|
||||
def __init__(self, html_content: str):
|
||||
"""
|
||||
初始化解析器
|
||||
|
||||
参数:
|
||||
html_content: HTML页面内容字符串
|
||||
"""
|
||||
self.soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
def parse_awards(self) -> CompetitionAwardsResponse:
|
||||
"""
|
||||
解析获奖项目列表
|
||||
|
||||
返回:
|
||||
CompetitionAwardsResponse: 包含获奖项目列表的响应对象
|
||||
"""
|
||||
# 解析学生ID
|
||||
student_id = self._parse_student_id()
|
||||
|
||||
# 解析项目列表
|
||||
projects = self._parse_projects()
|
||||
|
||||
response = CompetitionAwardsResponse(
|
||||
student_id=student_id,
|
||||
total_count=len(projects),
|
||||
awards=projects,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def parse_credits_summary(self) -> CompetitionCreditsSummaryResponse:
|
||||
"""
|
||||
解析学分汇总信息
|
||||
|
||||
返回:
|
||||
CompetitionCreditsSummaryResponse: 包含学分汇总信息的响应对象
|
||||
"""
|
||||
# 解析学生ID
|
||||
student_id = self._parse_student_id()
|
||||
|
||||
# 解析学分汇总
|
||||
credits_summary = self._parse_credits_summary()
|
||||
|
||||
response = CompetitionCreditsSummaryResponse(
|
||||
student_id=student_id,
|
||||
credits_summary=credits_summary,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def parse_full_competition_info(self) -> CompetitionFullResponse:
|
||||
"""
|
||||
解析完整的学科竞赛信息(获奖项目 + 学分汇总)
|
||||
|
||||
一次性解析HTML,同时提取获奖项目列表和学分汇总信息,
|
||||
减少网络IO和数据库查询次数
|
||||
|
||||
返回:
|
||||
CompetitionFullResponse: 包含完整竞赛信息的响应对象
|
||||
"""
|
||||
# 解析学生ID
|
||||
student_id = self._parse_student_id()
|
||||
|
||||
# 解析项目列表
|
||||
projects = self._parse_projects()
|
||||
|
||||
# 解析学分汇总
|
||||
credits_summary = self._parse_credits_summary()
|
||||
|
||||
response = CompetitionFullResponse(
|
||||
student_id=student_id,
|
||||
total_awards_count=len(projects),
|
||||
awards=projects,
|
||||
credits_summary=credits_summary,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _parse_student_id(self) -> str:
|
||||
"""
|
||||
解析学生基本信息 - 学生ID/工号
|
||||
|
||||
返回:
|
||||
str: 学生ID,如果未找到返回空字符串
|
||||
"""
|
||||
student_span = self.soup.find("span", id="ContentPlaceHolder1_lblXM")
|
||||
if student_span:
|
||||
text = student_span.get_text(strip=True)
|
||||
# 格式: "欢迎您:20244787"
|
||||
if ":" in text:
|
||||
return text.split(":")[1].strip()
|
||||
return ""
|
||||
|
||||
def _parse_projects(self) -> list:
|
||||
"""
|
||||
解析获奖项目列表
|
||||
|
||||
数据来源: 页面中ID为 ContentPlaceHolder1_ContentPlaceHolder2_gvHj 的表格
|
||||
|
||||
表格结构:
|
||||
- 第一行为表头
|
||||
- 后续行为项目数据
|
||||
- 包含15列数据
|
||||
|
||||
返回:
|
||||
list[AwardProject]: 获奖项目列表
|
||||
"""
|
||||
projects = []
|
||||
|
||||
# 查找项目列表表格
|
||||
table = self.soup.find(
|
||||
"table", id="ContentPlaceHolder1_ContentPlaceHolder2_gvHj"
|
||||
)
|
||||
if not table:
|
||||
return projects
|
||||
|
||||
rows = table.find_all("tr")
|
||||
# 跳过表头行(第一行)
|
||||
for row in rows[1:]:
|
||||
cells = row.find_all("td")
|
||||
if len(cells) < 9: # 至少需要9列数据
|
||||
continue
|
||||
|
||||
try:
|
||||
project = AwardProject(
|
||||
project_id=cells[0].get_text(strip=True),
|
||||
project_name=cells[1].get_text(strip=True),
|
||||
level=cells[2].get_text(strip=True),
|
||||
grade=cells[3].get_text(strip=True),
|
||||
award_date=cells[4].get_text(strip=True),
|
||||
applicant_id=cells[5].get_text(strip=True),
|
||||
applicant_name=cells[6].get_text(strip=True),
|
||||
order=int(cells[7].get_text(strip=True)),
|
||||
credits=float(cells[8].get_text(strip=True)),
|
||||
bonus=float(cells[9].get_text(strip=True)),
|
||||
status=cells[10].get_text(strip=True),
|
||||
verification_status=cells[11].get_text(strip=True),
|
||||
)
|
||||
projects.append(project)
|
||||
except (ValueError, IndexError):
|
||||
# 数据格式异常,记录但继续处理
|
||||
continue
|
||||
|
||||
return projects
|
||||
|
||||
def _parse_credits_summary(self) -> Optional[CreditsSummary]:
|
||||
"""
|
||||
解析学分汇总信息
|
||||
|
||||
数据来源: 页面中的学分汇总表中的各类学分 span 元素
|
||||
|
||||
提取内容:
|
||||
- 学科竞赛学分
|
||||
- 科研项目学分
|
||||
- 可转竞赛类学分
|
||||
- 创新创业实践学分
|
||||
- 能力资格认证学分
|
||||
- 其他项目学分
|
||||
|
||||
返回:
|
||||
CreditsSummary: 学分汇总对象,如果无法解析则返回 None
|
||||
"""
|
||||
discipline_competition_credits = None
|
||||
scientific_research_credits = None
|
||||
transferable_competition_credits = None
|
||||
innovation_practice_credits = None
|
||||
ability_certification_credits = None
|
||||
other_project_credits = None
|
||||
|
||||
# 查找学科竞赛学分
|
||||
xkjs_span = self.soup.find(
|
||||
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblXkjsxf"
|
||||
)
|
||||
if xkjs_span:
|
||||
text = xkjs_span.get_text(strip=True)
|
||||
discipline_competition_credits = self._parse_credit_value(text)
|
||||
|
||||
# 查找科研项目学分
|
||||
ky_span = self.soup.find(
|
||||
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblKyxf"
|
||||
)
|
||||
if ky_span:
|
||||
text = ky_span.get_text(strip=True)
|
||||
scientific_research_credits = self._parse_credit_value(text)
|
||||
|
||||
# 查找可转竞赛类学分
|
||||
kzjsl_span = self.soup.find(
|
||||
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblKzjslxf"
|
||||
)
|
||||
if kzjsl_span:
|
||||
text = kzjsl_span.get_text(strip=True)
|
||||
transferable_competition_credits = self._parse_credit_value(text)
|
||||
|
||||
# 查找创新创业实践学分
|
||||
cxcy_span = self.soup.find(
|
||||
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblCxcyxf"
|
||||
)
|
||||
if cxcy_span:
|
||||
text = cxcy_span.get_text(strip=True)
|
||||
innovation_practice_credits = self._parse_credit_value(text)
|
||||
|
||||
# 查找能力资格认证学分
|
||||
nlzg_span = self.soup.find(
|
||||
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblNlzgxf"
|
||||
)
|
||||
if nlzg_span:
|
||||
text = nlzg_span.get_text(strip=True)
|
||||
ability_certification_credits = self._parse_credit_value(text)
|
||||
|
||||
# 查找其他项目学分
|
||||
qt_span = self.soup.find(
|
||||
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblQtxf"
|
||||
)
|
||||
if qt_span:
|
||||
text = qt_span.get_text(strip=True)
|
||||
other_project_credits = self._parse_credit_value(text)
|
||||
|
||||
return CreditsSummary(
|
||||
discipline_competition_credits=discipline_competition_credits,
|
||||
scientific_research_credits=scientific_research_credits,
|
||||
transferable_competition_credits=transferable_competition_credits,
|
||||
innovation_practice_credits=innovation_practice_credits,
|
||||
ability_certification_credits=ability_certification_credits,
|
||||
other_project_credits=other_project_credits,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _parse_credit_value(text: str) -> Optional[float]:
|
||||
"""
|
||||
解析学分值
|
||||
|
||||
参数:
|
||||
text: 文本值,可能为"0", "16.60", "无"等
|
||||
|
||||
返回:
|
||||
float: 学分值,如果为"无"或无法解析则返回 None
|
||||
"""
|
||||
text = text.strip()
|
||||
if text == "无" or text == "":
|
||||
return None
|
||||
try:
|
||||
return float(text)
|
||||
except ValueError:
|
||||
return None
|
||||
Reference in New Issue
Block a user