Files
LoveACE-EndF/loveace/router/endpoint/jwc/utils/aspnet_form_parser.py
Sibuxiangx bbc86b8330 ⚒️ 重大重构 LoveACE V2
引入了 mongodb
对数据库进行了一定程度的数据加密
性能改善
代码简化
统一错误模型和响应
使用 apifox 作为文档
2025-11-20 20:44:25 +08:00

97 lines
2.6 KiB
Python

"""
ASP.NET 表单解析器
用于从 ASP.NET 页面中提取动态表单数据
"""
import re
from typing import Dict, Optional, Any
from bs4 import BeautifulSoup
class ASPNETFormParser:
"""ASP.NET 表单解析器"""
@staticmethod
def extract_form_data(html_content: str) -> Dict[str, str]:
"""
从 ASP.NET 页面 HTML 中提取表单数据
Args:
html_content: HTML 页面内容
Returns:
包含表单字段的字典
"""
return ASPNETFormParser._extract_with_beautifulsoup(html_content)
@staticmethod
def _extract_with_beautifulsoup(html_content: str) -> Dict[str, str]:
"""
使用 BeautifulSoup 提取表单数据
Args:
html_content: HTML 页面内容
Returns:
包含表单字段的字典
"""
form_data = {}
# 使用 BeautifulSoup 解析 HTML
soup = BeautifulSoup(html_content, "lxml")
# 查找表单
form = soup.find("form", {"method": "post"})
if not form:
raise ValueError("未找到 POST 表单")
# 提取隐藏字段
hidden_fields = [
"__EVENTTARGET",
"__EVENTARGUMENT",
"__LASTFOCUS",
"__VIEWSTATE",
"__VIEWSTATEGENERATOR",
"__EVENTVALIDATION",
]
for field_name in hidden_fields:
input_element = form.find("input", {"name": field_name})
if input_element and input_element.get("value"):
form_data[field_name] = input_element.get("value")
else:
form_data[field_name] = ""
# 添加其他表单字段的默认值
form_data.update(
{
"ctl00$ContentPlaceHolder1$ContentPlaceHolder2$ddlSslb": "%",
"ctl00$ContentPlaceHolder1$ContentPlaceHolder2$txtSsmc": "",
"ctl00$ContentPlaceHolder1$ContentPlaceHolder2$gvSb$ctl28$txtNewPageIndex": "1",
}
)
return form_data
@staticmethod
def get_awards_list_form_data(html_content: str) -> Dict[str, str]:
"""
获取已申报奖项列表页面的表单数据
Args:
html_content: HTML 页面内容
Returns:
用于请求已申报奖项的表单数据
"""
base_form_data = ASPNETFormParser.extract_form_data(html_content)
# 设置 EVENTTARGET 为"已申报奖项"选项卡
base_form_data["__EVENTTARGET"] = (
"ctl00$ContentPlaceHolder1$ContentPlaceHolder2$DataList1$ctl01$LinkButton1"
)
return base_form_data