413 lines
13 KiB
Dart
413 lines
13 KiB
Dart
import 'package:flutter/foundation.dart';
|
|
import 'package:html/parser.dart' as html_parser;
|
|
import 'package:html/dom.dart';
|
|
import '../models/questionnaire.dart';
|
|
|
|
/// Parser for HTML questionnaire documents
|
|
/// Dynamically extracts questionnaire structure including radio questions,
|
|
/// text questions, and metadata
|
|
class QuestionnaireParser {
|
|
// Cache for parsed questionnaires
|
|
static final Map<String, Questionnaire> _cache = {};
|
|
|
|
/// Parse HTML in isolate for better performance
|
|
///
|
|
/// [htmlContent] - The HTML content of the questionnaire page
|
|
/// [useCache] - Whether to use cached results (default: true)
|
|
/// Returns a [Questionnaire] object containing all parsed data
|
|
Future<Questionnaire> parseAsync(
|
|
String htmlContent, {
|
|
bool useCache = true,
|
|
}) async {
|
|
// Generate cache key from content hash
|
|
final cacheKey = htmlContent.hashCode.toString();
|
|
|
|
// Check cache first
|
|
if (useCache && _cache.containsKey(cacheKey)) {
|
|
return _cache[cacheKey]!;
|
|
}
|
|
|
|
// Parse in isolate to avoid blocking UI thread
|
|
final questionnaire = await compute(_parseInIsolate, htmlContent);
|
|
|
|
// Store in cache
|
|
if (useCache) {
|
|
_cache[cacheKey] = questionnaire;
|
|
|
|
// Limit cache size to prevent memory issues
|
|
if (_cache.length > 50) {
|
|
// Remove oldest entries (simple FIFO)
|
|
final keysToRemove = _cache.keys.take(_cache.length - 50).toList();
|
|
for (var key in keysToRemove) {
|
|
_cache.remove(key);
|
|
}
|
|
}
|
|
}
|
|
|
|
return questionnaire;
|
|
}
|
|
|
|
/// Clear the parser cache
|
|
static void clearCache() {
|
|
_cache.clear();
|
|
}
|
|
|
|
/// Static method for isolate parsing
|
|
static Questionnaire _parseInIsolate(String htmlContent) {
|
|
final parser = QuestionnaireParser();
|
|
return parser.parse(htmlContent);
|
|
}
|
|
|
|
/// Parse HTML document and extract questionnaire structure
|
|
///
|
|
/// [htmlContent] - The HTML content of the questionnaire page
|
|
/// Returns a [Questionnaire] object containing all parsed data
|
|
Questionnaire parse(String htmlContent) {
|
|
final document = html_parser.parse(htmlContent);
|
|
|
|
// Extract metadata first
|
|
final metadata = _extractMetadata(document);
|
|
|
|
// Extract radio questions (single-choice questions)
|
|
final radioQuestions = _extractRadioQuestions(document);
|
|
|
|
// Extract text questions (open-ended questions)
|
|
final textQuestions = _extractTextQuestions(document);
|
|
|
|
return Questionnaire(
|
|
metadata: metadata,
|
|
radioQuestions: radioQuestions,
|
|
textQuestions: textQuestions,
|
|
tokenValue: metadata.tokenValue,
|
|
questionnaireCode: metadata.questionnaireCode,
|
|
evaluationContent: metadata.evaluationContent,
|
|
evaluatedPeopleNumber: metadata.evaluatedPeopleNumber,
|
|
);
|
|
}
|
|
|
|
/// Extract questionnaire metadata from HTML document
|
|
///
|
|
/// Extracts:
|
|
/// - Title (questionnaire title)
|
|
/// - Evaluated person (teacher name)
|
|
/// - Evaluation content
|
|
/// - Token value (CSRF token)
|
|
/// - Questionnaire code
|
|
/// - Evaluated people number
|
|
QuestionnaireMetadata _extractMetadata(Document document) {
|
|
String title = '';
|
|
String evaluatedPerson = '';
|
|
String evaluationContent = '';
|
|
String tokenValue = '';
|
|
String questionnaireCode = '';
|
|
String evaluatedPeopleNumber = '';
|
|
|
|
// Extract title - usually in a specific div or h1/h2 tag
|
|
final titleElement =
|
|
document.querySelector('div.title') ??
|
|
document.querySelector('h1') ??
|
|
document.querySelector('h2');
|
|
if (titleElement != null) {
|
|
title = titleElement.text.trim();
|
|
}
|
|
|
|
// Extract token value from hidden input
|
|
final tokenInput = document.querySelector('input[name="tokenValue"]');
|
|
if (tokenInput != null) {
|
|
tokenValue = tokenInput.attributes['value'] ?? '';
|
|
}
|
|
|
|
// Extract questionnaire code from hidden input
|
|
final codeInput = document.querySelector('input[name="wjdm"]');
|
|
if (codeInput != null) {
|
|
questionnaireCode = codeInput.attributes['value'] ?? '';
|
|
}
|
|
|
|
// Extract evaluated people number from hidden input
|
|
final peopleNumberInput = document.querySelector('input[name="bprdm"]');
|
|
if (peopleNumberInput != null) {
|
|
evaluatedPeopleNumber = peopleNumberInput.attributes['value'] ?? '';
|
|
}
|
|
|
|
// Extract evaluation content from hidden input
|
|
final contentInput = document.querySelector('input[name="pgnr"]');
|
|
if (contentInput != null) {
|
|
evaluationContent = contentInput.attributes['value'] ?? '';
|
|
}
|
|
|
|
// Try to extract evaluated person name from table or specific elements
|
|
// Look for teacher name in common patterns
|
|
final teacherElements = document.querySelectorAll('td');
|
|
for (var element in teacherElements) {
|
|
final text = element.text.trim();
|
|
if (text.contains('被评人') || text.contains('教师')) {
|
|
// Get the next sibling or adjacent cell
|
|
final nextSibling = element.nextElementSibling;
|
|
if (nextSibling != null) {
|
|
evaluatedPerson = nextSibling.text.trim();
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return QuestionnaireMetadata(
|
|
title: title,
|
|
evaluatedPerson: evaluatedPerson,
|
|
evaluationContent: evaluationContent,
|
|
tokenValue: tokenValue,
|
|
questionnaireCode: questionnaireCode,
|
|
evaluatedPeopleNumber: evaluatedPeopleNumber,
|
|
);
|
|
}
|
|
|
|
/// Extract all radio questions from the document
|
|
///
|
|
/// Parses all input[type="radio"] elements and groups them by name attribute
|
|
/// Extracts score and weight from value attribute (format: "score_weight")
|
|
List<RadioQuestion> _extractRadioQuestions(Document document) {
|
|
final Map<String, RadioQuestion> questionsMap = {};
|
|
|
|
// Find all radio input elements
|
|
final radioInputs = document.querySelectorAll('input[type="radio"]');
|
|
|
|
for (var input in radioInputs) {
|
|
final name = input.attributes['name'];
|
|
final value = input.attributes['value'];
|
|
|
|
if (name == null || value == null || name.isEmpty || value.isEmpty) {
|
|
continue;
|
|
}
|
|
|
|
// Parse value format "score_weight" (e.g., "5_1" means 5 points with 100% weight)
|
|
final parts = value.split('_');
|
|
double score = 0.0;
|
|
double weight = 0.0;
|
|
|
|
if (parts.length >= 2) {
|
|
score = double.tryParse(parts[0]) ?? 0.0;
|
|
weight = double.tryParse(parts[1]) ?? 0.0;
|
|
}
|
|
|
|
// Extract option label - look for adjacent label or text
|
|
String label = '';
|
|
|
|
// Try to find label element associated with this input
|
|
final inputId = input.attributes['id'];
|
|
if (inputId != null && inputId.isNotEmpty) {
|
|
final labelElement = document.querySelector('label[for="$inputId"]');
|
|
if (labelElement != null) {
|
|
label = labelElement.text.trim();
|
|
}
|
|
}
|
|
|
|
// If no label found, look for parent label
|
|
if (label.isEmpty) {
|
|
var parent = input.parent;
|
|
while (parent != null && parent.localName != 'label') {
|
|
parent = parent.parent;
|
|
}
|
|
if (parent != null && parent.localName == 'label') {
|
|
label = parent.text.trim();
|
|
}
|
|
}
|
|
|
|
// If still no label, look for adjacent text in the same td/cell
|
|
if (label.isEmpty) {
|
|
var cell = input.parent;
|
|
while (cell != null && cell.localName != 'td') {
|
|
cell = cell.parent;
|
|
}
|
|
if (cell != null) {
|
|
label = cell.text.trim();
|
|
}
|
|
}
|
|
|
|
// Create RadioOption
|
|
final option = RadioOption(
|
|
label: label,
|
|
value: value,
|
|
score: score,
|
|
weight: weight,
|
|
);
|
|
|
|
// Extract question text and category
|
|
if (!questionsMap.containsKey(name)) {
|
|
String questionText = '';
|
|
String category = '';
|
|
|
|
// Find the question text - usually in a td with rowspan or previous row
|
|
var row = input.parent;
|
|
while (row != null && row.localName != 'tr') {
|
|
row = row.parent;
|
|
}
|
|
|
|
if (row != null) {
|
|
// Look for td with rowspan (category indicator)
|
|
final categoryCell = row.querySelector('td[rowspan]');
|
|
if (categoryCell != null) {
|
|
category = categoryCell.text.trim();
|
|
}
|
|
|
|
// Look for question text in the first td or a specific class
|
|
final cells = row.querySelectorAll('td');
|
|
for (var cell in cells) {
|
|
final text = cell.text.trim();
|
|
// Skip cells that only contain radio buttons or are too short
|
|
if (text.isNotEmpty &&
|
|
!text.contains('input') &&
|
|
text.length > 5 &&
|
|
cell.querySelector('input[type="radio"]') == null) {
|
|
questionText = text;
|
|
break;
|
|
}
|
|
}
|
|
|
|
// If question text not found in current row, check previous rows
|
|
if (questionText.isEmpty) {
|
|
var prevRow = row.previousElementSibling;
|
|
while (prevRow != null) {
|
|
final prevCells = prevRow.querySelectorAll('td');
|
|
for (var cell in prevCells) {
|
|
final text = cell.text.trim();
|
|
if (text.isNotEmpty && text.length > 5) {
|
|
questionText = text;
|
|
break;
|
|
}
|
|
}
|
|
if (questionText.isNotEmpty) break;
|
|
prevRow = prevRow.previousElementSibling;
|
|
}
|
|
}
|
|
}
|
|
|
|
questionsMap[name] = RadioQuestion(
|
|
key: name,
|
|
questionText: questionText,
|
|
options: [option],
|
|
category: category,
|
|
);
|
|
} else {
|
|
// Add option to existing question
|
|
final existingQuestion = questionsMap[name]!;
|
|
questionsMap[name] = RadioQuestion(
|
|
key: existingQuestion.key,
|
|
questionText: existingQuestion.questionText,
|
|
options: [...existingQuestion.options, option],
|
|
category: existingQuestion.category,
|
|
);
|
|
}
|
|
}
|
|
|
|
return questionsMap.values.toList();
|
|
}
|
|
|
|
/// Extract all text questions from the document
|
|
///
|
|
/// Parses all textarea elements and identifies question types
|
|
/// based on surrounding text content
|
|
List<TextQuestion> _extractTextQuestions(Document document) {
|
|
final List<TextQuestion> textQuestions = [];
|
|
|
|
// Find all textarea elements
|
|
final textareas = document.querySelectorAll('textarea');
|
|
|
|
for (var textarea in textareas) {
|
|
final name = textarea.attributes['name'];
|
|
|
|
if (name == null || name.isEmpty) {
|
|
continue;
|
|
}
|
|
|
|
// Extract question text from adjacent elements
|
|
String questionText = '';
|
|
|
|
// Look for question text in the same row or previous elements
|
|
var cell = textarea.parent;
|
|
while (cell != null && cell.localName != 'td') {
|
|
cell = cell.parent;
|
|
}
|
|
|
|
if (cell != null) {
|
|
// Check previous sibling cells for question text
|
|
var prevCell = cell.previousElementSibling;
|
|
if (prevCell != null) {
|
|
questionText = prevCell.text.trim();
|
|
}
|
|
|
|
// If not found, look in the same cell before the textarea
|
|
if (questionText.isEmpty) {
|
|
final cellText = cell.text.trim();
|
|
if (cellText.isNotEmpty) {
|
|
questionText = cellText;
|
|
}
|
|
}
|
|
|
|
// If still not found, look in previous row
|
|
if (questionText.isEmpty) {
|
|
var row = cell.parent;
|
|
if (row != null && row.localName == 'tr') {
|
|
var prevRow = row.previousElementSibling;
|
|
if (prevRow != null) {
|
|
final prevCells = prevRow.querySelectorAll('td');
|
|
for (var prevCell in prevCells) {
|
|
final text = prevCell.text.trim();
|
|
if (text.isNotEmpty && text.length > 3) {
|
|
questionText = text;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Analyze question type based on text content and name
|
|
final questionType = _analyzeQuestionType(questionText, name);
|
|
|
|
// Determine if required - zgpj is typically required
|
|
final isRequired = name == 'zgpj' || name.contains('zgpj');
|
|
|
|
textQuestions.add(
|
|
TextQuestion(
|
|
key: name,
|
|
questionText: questionText,
|
|
type: questionType,
|
|
isRequired: isRequired,
|
|
),
|
|
);
|
|
}
|
|
|
|
return textQuestions;
|
|
}
|
|
|
|
/// Analyze question type based on question text and field name
|
|
///
|
|
/// Uses keyword matching to identify:
|
|
/// - Inspiration questions (contains "启发")
|
|
/// - Suggestion questions (contains "建议" or "意见")
|
|
/// - Overall evaluation (name is "zgpj")
|
|
/// - General questions (default)
|
|
QuestionType _analyzeQuestionType(String questionText, String fieldName) {
|
|
// Check field name first
|
|
if (fieldName == 'zgpj' || fieldName.contains('zgpj')) {
|
|
return QuestionType.overall;
|
|
}
|
|
|
|
// Check question text for keywords
|
|
final lowerText = questionText.toLowerCase();
|
|
|
|
if (lowerText.contains('启发') || lowerText.contains('启示')) {
|
|
return QuestionType.inspiration;
|
|
}
|
|
|
|
if (lowerText.contains('建议') ||
|
|
lowerText.contains('意见') ||
|
|
lowerText.contains('改进')) {
|
|
return QuestionType.suggestion;
|
|
}
|
|
|
|
// Default to general type
|
|
return QuestionType.general;
|
|
}
|
|
}
|