Files
AutoJudge-Flutter/lib/services/questionnaire_parser.dart
2025-11-13 09:14:49 +08:00

413 lines
13 KiB
Dart

import 'package:flutter/foundation.dart';
import 'package:html/parser.dart' as html_parser;
import 'package:html/dom.dart';
import '../models/questionnaire.dart';
/// Parser for HTML questionnaire documents
/// Dynamically extracts questionnaire structure including radio questions,
/// text questions, and metadata
class QuestionnaireParser {
// Cache for parsed questionnaires
static final Map<String, Questionnaire> _cache = {};
/// Parse HTML in isolate for better performance
///
/// [htmlContent] - The HTML content of the questionnaire page
/// [useCache] - Whether to use cached results (default: true)
/// Returns a [Questionnaire] object containing all parsed data
Future<Questionnaire> parseAsync(
String htmlContent, {
bool useCache = true,
}) async {
// Generate cache key from content hash
final cacheKey = htmlContent.hashCode.toString();
// Check cache first
if (useCache && _cache.containsKey(cacheKey)) {
return _cache[cacheKey]!;
}
// Parse in isolate to avoid blocking UI thread
final questionnaire = await compute(_parseInIsolate, htmlContent);
// Store in cache
if (useCache) {
_cache[cacheKey] = questionnaire;
// Limit cache size to prevent memory issues
if (_cache.length > 50) {
// Remove oldest entries (simple FIFO)
final keysToRemove = _cache.keys.take(_cache.length - 50).toList();
for (var key in keysToRemove) {
_cache.remove(key);
}
}
}
return questionnaire;
}
/// Clear the parser cache
static void clearCache() {
_cache.clear();
}
/// Static method for isolate parsing
static Questionnaire _parseInIsolate(String htmlContent) {
final parser = QuestionnaireParser();
return parser.parse(htmlContent);
}
/// Parse HTML document and extract questionnaire structure
///
/// [htmlContent] - The HTML content of the questionnaire page
/// Returns a [Questionnaire] object containing all parsed data
Questionnaire parse(String htmlContent) {
final document = html_parser.parse(htmlContent);
// Extract metadata first
final metadata = _extractMetadata(document);
// Extract radio questions (single-choice questions)
final radioQuestions = _extractRadioQuestions(document);
// Extract text questions (open-ended questions)
final textQuestions = _extractTextQuestions(document);
return Questionnaire(
metadata: metadata,
radioQuestions: radioQuestions,
textQuestions: textQuestions,
tokenValue: metadata.tokenValue,
questionnaireCode: metadata.questionnaireCode,
evaluationContent: metadata.evaluationContent,
evaluatedPeopleNumber: metadata.evaluatedPeopleNumber,
);
}
/// Extract questionnaire metadata from HTML document
///
/// Extracts:
/// - Title (questionnaire title)
/// - Evaluated person (teacher name)
/// - Evaluation content
/// - Token value (CSRF token)
/// - Questionnaire code
/// - Evaluated people number
QuestionnaireMetadata _extractMetadata(Document document) {
String title = '';
String evaluatedPerson = '';
String evaluationContent = '';
String tokenValue = '';
String questionnaireCode = '';
String evaluatedPeopleNumber = '';
// Extract title - usually in a specific div or h1/h2 tag
final titleElement =
document.querySelector('div.title') ??
document.querySelector('h1') ??
document.querySelector('h2');
if (titleElement != null) {
title = titleElement.text.trim();
}
// Extract token value from hidden input
final tokenInput = document.querySelector('input[name="tokenValue"]');
if (tokenInput != null) {
tokenValue = tokenInput.attributes['value'] ?? '';
}
// Extract questionnaire code from hidden input
final codeInput = document.querySelector('input[name="wjdm"]');
if (codeInput != null) {
questionnaireCode = codeInput.attributes['value'] ?? '';
}
// Extract evaluated people number from hidden input
final peopleNumberInput = document.querySelector('input[name="bprdm"]');
if (peopleNumberInput != null) {
evaluatedPeopleNumber = peopleNumberInput.attributes['value'] ?? '';
}
// Extract evaluation content from hidden input
final contentInput = document.querySelector('input[name="pgnr"]');
if (contentInput != null) {
evaluationContent = contentInput.attributes['value'] ?? '';
}
// Try to extract evaluated person name from table or specific elements
// Look for teacher name in common patterns
final teacherElements = document.querySelectorAll('td');
for (var element in teacherElements) {
final text = element.text.trim();
if (text.contains('被评人') || text.contains('教师')) {
// Get the next sibling or adjacent cell
final nextSibling = element.nextElementSibling;
if (nextSibling != null) {
evaluatedPerson = nextSibling.text.trim();
break;
}
}
}
return QuestionnaireMetadata(
title: title,
evaluatedPerson: evaluatedPerson,
evaluationContent: evaluationContent,
tokenValue: tokenValue,
questionnaireCode: questionnaireCode,
evaluatedPeopleNumber: evaluatedPeopleNumber,
);
}
/// Extract all radio questions from the document
///
/// Parses all input[type="radio"] elements and groups them by name attribute
/// Extracts score and weight from value attribute (format: "score_weight")
List<RadioQuestion> _extractRadioQuestions(Document document) {
final Map<String, RadioQuestion> questionsMap = {};
// Find all radio input elements
final radioInputs = document.querySelectorAll('input[type="radio"]');
for (var input in radioInputs) {
final name = input.attributes['name'];
final value = input.attributes['value'];
if (name == null || value == null || name.isEmpty || value.isEmpty) {
continue;
}
// Parse value format "score_weight" (e.g., "5_1" means 5 points with 100% weight)
final parts = value.split('_');
double score = 0.0;
double weight = 0.0;
if (parts.length >= 2) {
score = double.tryParse(parts[0]) ?? 0.0;
weight = double.tryParse(parts[1]) ?? 0.0;
}
// Extract option label - look for adjacent label or text
String label = '';
// Try to find label element associated with this input
final inputId = input.attributes['id'];
if (inputId != null && inputId.isNotEmpty) {
final labelElement = document.querySelector('label[for="$inputId"]');
if (labelElement != null) {
label = labelElement.text.trim();
}
}
// If no label found, look for parent label
if (label.isEmpty) {
var parent = input.parent;
while (parent != null && parent.localName != 'label') {
parent = parent.parent;
}
if (parent != null && parent.localName == 'label') {
label = parent.text.trim();
}
}
// If still no label, look for adjacent text in the same td/cell
if (label.isEmpty) {
var cell = input.parent;
while (cell != null && cell.localName != 'td') {
cell = cell.parent;
}
if (cell != null) {
label = cell.text.trim();
}
}
// Create RadioOption
final option = RadioOption(
label: label,
value: value,
score: score,
weight: weight,
);
// Extract question text and category
if (!questionsMap.containsKey(name)) {
String questionText = '';
String category = '';
// Find the question text - usually in a td with rowspan or previous row
var row = input.parent;
while (row != null && row.localName != 'tr') {
row = row.parent;
}
if (row != null) {
// Look for td with rowspan (category indicator)
final categoryCell = row.querySelector('td[rowspan]');
if (categoryCell != null) {
category = categoryCell.text.trim();
}
// Look for question text in the first td or a specific class
final cells = row.querySelectorAll('td');
for (var cell in cells) {
final text = cell.text.trim();
// Skip cells that only contain radio buttons or are too short
if (text.isNotEmpty &&
!text.contains('input') &&
text.length > 5 &&
cell.querySelector('input[type="radio"]') == null) {
questionText = text;
break;
}
}
// If question text not found in current row, check previous rows
if (questionText.isEmpty) {
var prevRow = row.previousElementSibling;
while (prevRow != null) {
final prevCells = prevRow.querySelectorAll('td');
for (var cell in prevCells) {
final text = cell.text.trim();
if (text.isNotEmpty && text.length > 5) {
questionText = text;
break;
}
}
if (questionText.isNotEmpty) break;
prevRow = prevRow.previousElementSibling;
}
}
}
questionsMap[name] = RadioQuestion(
key: name,
questionText: questionText,
options: [option],
category: category,
);
} else {
// Add option to existing question
final existingQuestion = questionsMap[name]!;
questionsMap[name] = RadioQuestion(
key: existingQuestion.key,
questionText: existingQuestion.questionText,
options: [...existingQuestion.options, option],
category: existingQuestion.category,
);
}
}
return questionsMap.values.toList();
}
/// Extract all text questions from the document
///
/// Parses all textarea elements and identifies question types
/// based on surrounding text content
List<TextQuestion> _extractTextQuestions(Document document) {
final List<TextQuestion> textQuestions = [];
// Find all textarea elements
final textareas = document.querySelectorAll('textarea');
for (var textarea in textareas) {
final name = textarea.attributes['name'];
if (name == null || name.isEmpty) {
continue;
}
// Extract question text from adjacent elements
String questionText = '';
// Look for question text in the same row or previous elements
var cell = textarea.parent;
while (cell != null && cell.localName != 'td') {
cell = cell.parent;
}
if (cell != null) {
// Check previous sibling cells for question text
var prevCell = cell.previousElementSibling;
if (prevCell != null) {
questionText = prevCell.text.trim();
}
// If not found, look in the same cell before the textarea
if (questionText.isEmpty) {
final cellText = cell.text.trim();
if (cellText.isNotEmpty) {
questionText = cellText;
}
}
// If still not found, look in previous row
if (questionText.isEmpty) {
var row = cell.parent;
if (row != null && row.localName == 'tr') {
var prevRow = row.previousElementSibling;
if (prevRow != null) {
final prevCells = prevRow.querySelectorAll('td');
for (var prevCell in prevCells) {
final text = prevCell.text.trim();
if (text.isNotEmpty && text.length > 3) {
questionText = text;
break;
}
}
}
}
}
}
// Analyze question type based on text content and name
final questionType = _analyzeQuestionType(questionText, name);
// Determine if required - zgpj is typically required
final isRequired = name == 'zgpj' || name.contains('zgpj');
textQuestions.add(
TextQuestion(
key: name,
questionText: questionText,
type: questionType,
isRequired: isRequired,
),
);
}
return textQuestions;
}
/// Analyze question type based on question text and field name
///
/// Uses keyword matching to identify:
/// - Inspiration questions (contains "启发")
/// - Suggestion questions (contains "建议" or "意见")
/// - Overall evaluation (name is "zgpj")
/// - General questions (default)
QuestionType _analyzeQuestionType(String questionText, String fieldName) {
// Check field name first
if (fieldName == 'zgpj' || fieldName.contains('zgpj')) {
return QuestionType.overall;
}
// Check question text for keywords
final lowerText = questionText.toLowerCase();
if (lowerText.contains('启发') || lowerText.contains('启示')) {
return QuestionType.inspiration;
}
if (lowerText.contains('建议') ||
lowerText.contains('意见') ||
lowerText.contains('改进')) {
return QuestionType.suggestion;
}
// Default to general type
return QuestionType.general;
}
}