97 lines
2.6 KiB
Python
97 lines
2.6 KiB
Python
"""
|
|
ASP.NET 表单解析器
|
|
用于从 ASP.NET 页面中提取动态表单数据
|
|
"""
|
|
|
|
import re
|
|
from typing import Dict, Optional, Any
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
class ASPNETFormParser:
|
|
"""ASP.NET 表单解析器"""
|
|
|
|
@staticmethod
|
|
def extract_form_data(html_content: str) -> Dict[str, str]:
|
|
"""
|
|
从 ASP.NET 页面 HTML 中提取表单数据
|
|
|
|
Args:
|
|
html_content: HTML 页面内容
|
|
|
|
Returns:
|
|
包含表单字段的字典
|
|
"""
|
|
|
|
return ASPNETFormParser._extract_with_beautifulsoup(html_content)
|
|
|
|
@staticmethod
|
|
def _extract_with_beautifulsoup(html_content: str) -> Dict[str, str]:
|
|
"""
|
|
使用 BeautifulSoup 提取表单数据
|
|
|
|
Args:
|
|
html_content: HTML 页面内容
|
|
|
|
Returns:
|
|
包含表单字段的字典
|
|
"""
|
|
form_data = {}
|
|
|
|
# 使用 BeautifulSoup 解析 HTML
|
|
soup = BeautifulSoup(html_content, "lxml")
|
|
|
|
# 查找表单
|
|
form = soup.find("form", {"method": "post"})
|
|
if not form:
|
|
raise ValueError("未找到 POST 表单")
|
|
|
|
# 提取隐藏字段
|
|
hidden_fields = [
|
|
"__EVENTTARGET",
|
|
"__EVENTARGUMENT",
|
|
"__LASTFOCUS",
|
|
"__VIEWSTATE",
|
|
"__VIEWSTATEGENERATOR",
|
|
"__EVENTVALIDATION",
|
|
]
|
|
|
|
for field_name in hidden_fields:
|
|
input_element = form.find("input", {"name": field_name})
|
|
if input_element and input_element.get("value"):
|
|
form_data[field_name] = input_element.get("value")
|
|
else:
|
|
form_data[field_name] = ""
|
|
|
|
# 添加其他表单字段的默认值
|
|
form_data.update(
|
|
{
|
|
"ctl00$ContentPlaceHolder1$ContentPlaceHolder2$ddlSslb": "%",
|
|
"ctl00$ContentPlaceHolder1$ContentPlaceHolder2$txtSsmc": "",
|
|
"ctl00$ContentPlaceHolder1$ContentPlaceHolder2$gvSb$ctl28$txtNewPageIndex": "1",
|
|
}
|
|
)
|
|
|
|
return form_data
|
|
|
|
@staticmethod
|
|
def get_awards_list_form_data(html_content: str) -> Dict[str, str]:
|
|
"""
|
|
获取已申报奖项列表页面的表单数据
|
|
|
|
Args:
|
|
html_content: HTML 页面内容
|
|
|
|
Returns:
|
|
用于请求已申报奖项的表单数据
|
|
"""
|
|
base_form_data = ASPNETFormParser.extract_form_data(html_content)
|
|
|
|
# 设置 EVENTTARGET 为"已申报奖项"选项卡
|
|
base_form_data["__EVENTTARGET"] = (
|
|
"ctl00$ContentPlaceHolder1$ContentPlaceHolder2$DataList1$ctl01$LinkButton1"
|
|
)
|
|
|
|
return base_form_data
|