Files
LoveACE-EndF/loveace/router/endpoint/jwc/utils/competition.py

268 lines
8.4 KiB
Python
Raw Normal View History

from typing import Optional
from bs4 import BeautifulSoup
from loveace.router.endpoint.jwc.model.competition import (
AwardProject,
CompetitionAwardsResponse,
CompetitionCreditsSummaryResponse,
CompetitionFullResponse,
CreditsSummary,
)
class CompetitionInfoParser:
"""
创新创业管理平台信息解析器
功能
- 解析获奖项目列表表格数据
- 解析学分汇总信息
- 提取学生基本信息
"""
def __init__(self, html_content: str):
"""
初始化解析器
参数:
html_content: HTML页面内容字符串
"""
self.soup = BeautifulSoup(html_content, "html.parser")
def parse_awards(self) -> CompetitionAwardsResponse:
"""
解析获奖项目列表
返回:
CompetitionAwardsResponse: 包含获奖项目列表的响应对象
"""
# 解析学生ID
student_id = self._parse_student_id()
# 解析项目列表
projects = self._parse_projects()
response = CompetitionAwardsResponse(
student_id=student_id,
total_count=len(projects),
awards=projects,
)
return response
def parse_credits_summary(self) -> CompetitionCreditsSummaryResponse:
"""
解析学分汇总信息
返回:
CompetitionCreditsSummaryResponse: 包含学分汇总信息的响应对象
"""
# 解析学生ID
student_id = self._parse_student_id()
# 解析学分汇总
credits_summary = self._parse_credits_summary()
response = CompetitionCreditsSummaryResponse(
student_id=student_id,
credits_summary=credits_summary,
)
return response
def parse_full_competition_info(self) -> CompetitionFullResponse:
"""
解析完整的学科竞赛信息获奖项目 + 学分汇总
一次性解析HTML同时提取获奖项目列表和学分汇总信息
减少网络IO和数据库查询次数
返回:
CompetitionFullResponse: 包含完整竞赛信息的响应对象
"""
# 解析学生ID
student_id = self._parse_student_id()
# 解析项目列表
projects = self._parse_projects()
# 解析学分汇总
credits_summary = self._parse_credits_summary()
response = CompetitionFullResponse(
student_id=student_id,
total_awards_count=len(projects),
awards=projects,
credits_summary=credits_summary,
)
return response
def _parse_student_id(self) -> str:
"""
解析学生基本信息 - 学生ID/工号
返回:
str: 学生ID如果未找到返回空字符串
"""
student_span = self.soup.find("span", id="ContentPlaceHolder1_lblXM")
if student_span:
text = student_span.get_text(strip=True)
# 格式: "欢迎您20244787"
if "" in text:
return text.split("")[1].strip()
return ""
def _parse_projects(self) -> list:
"""
解析获奖项目列表
数据来源: 页面中ID为 ContentPlaceHolder1_ContentPlaceHolder2_gvHj 的表格
表格结构:
- 第一行为表头
- 后续行为项目数据
- 包含15列数据
返回:
list[AwardProject]: 获奖项目列表
"""
projects = []
# 查找项目列表表格
table = self.soup.find(
"table", id="ContentPlaceHolder1_ContentPlaceHolder2_gvHj"
)
if not table:
return projects
rows = table.find_all("tr")
# 跳过表头行(第一行)
for row in rows[1:]:
cells = row.find_all("td")
if len(cells) < 9: # 至少需要9列数据
continue
try:
project = AwardProject(
project_id=cells[0].get_text(strip=True),
project_name=cells[1].get_text(strip=True),
level=cells[2].get_text(strip=True),
grade=cells[3].get_text(strip=True),
award_date=cells[4].get_text(strip=True),
applicant_id=cells[5].get_text(strip=True),
applicant_name=cells[6].get_text(strip=True),
order=int(cells[7].get_text(strip=True)),
credits=float(cells[8].get_text(strip=True)),
bonus=float(cells[9].get_text(strip=True)),
status=cells[10].get_text(strip=True),
verification_status=cells[11].get_text(strip=True),
)
projects.append(project)
except (ValueError, IndexError):
# 数据格式异常,记录但继续处理
continue
return projects
def _parse_credits_summary(self) -> Optional[CreditsSummary]:
"""
解析学分汇总信息
数据来源: 页面中的学分汇总表中的各类学分 span 元素
提取内容:
- 学科竞赛学分
- 科研项目学分
- 可转竞赛类学分
- 创新创业实践学分
- 能力资格认证学分
- 其他项目学分
返回:
CreditsSummary: 学分汇总对象如果无法解析则返回 None
"""
discipline_competition_credits = None
scientific_research_credits = None
transferable_competition_credits = None
innovation_practice_credits = None
ability_certification_credits = None
other_project_credits = None
# 查找学科竞赛学分
xkjs_span = self.soup.find(
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblXkjsxf"
)
if xkjs_span:
text = xkjs_span.get_text(strip=True)
discipline_competition_credits = self._parse_credit_value(text)
# 查找科研项目学分
ky_span = self.soup.find(
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblKyxf"
)
if ky_span:
text = ky_span.get_text(strip=True)
scientific_research_credits = self._parse_credit_value(text)
# 查找可转竞赛类学分
kzjsl_span = self.soup.find(
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblKzjslxf"
)
if kzjsl_span:
text = kzjsl_span.get_text(strip=True)
transferable_competition_credits = self._parse_credit_value(text)
# 查找创新创业实践学分
cxcy_span = self.soup.find(
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblCxcyxf"
)
if cxcy_span:
text = cxcy_span.get_text(strip=True)
innovation_practice_credits = self._parse_credit_value(text)
# 查找能力资格认证学分
nlzg_span = self.soup.find(
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblNlzgxf"
)
if nlzg_span:
text = nlzg_span.get_text(strip=True)
ability_certification_credits = self._parse_credit_value(text)
# 查找其他项目学分
qt_span = self.soup.find(
"span", id="ContentPlaceHolder1_ContentPlaceHolder2_lblQtxf"
)
if qt_span:
text = qt_span.get_text(strip=True)
other_project_credits = self._parse_credit_value(text)
return CreditsSummary(
discipline_competition_credits=discipline_competition_credits,
scientific_research_credits=scientific_research_credits,
transferable_competition_credits=transferable_competition_credits,
innovation_practice_credits=innovation_practice_credits,
ability_certification_credits=ability_certification_credits,
other_project_credits=other_project_credits,
)
@staticmethod
def _parse_credit_value(text: str) -> Optional[float]:
"""
解析学分值
参数:
text: 文本值可能为"0", "16.60", ""
返回:
float: 学分值如果为""或无法解析则返回 None
"""
text = text.strip()
if text == "" or text == "":
return None
try:
return float(text)
except ValueError:
return None