😋 初始化仓库
This commit is contained in:
412
lib/services/questionnaire_parser.dart
Normal file
412
lib/services/questionnaire_parser.dart
Normal file
@@ -0,0 +1,412 @@
|
||||
import 'package:flutter/foundation.dart';
|
||||
import 'package:html/parser.dart' as html_parser;
|
||||
import 'package:html/dom.dart';
|
||||
import '../models/questionnaire.dart';
|
||||
|
||||
/// Parser for HTML questionnaire documents
|
||||
/// Dynamically extracts questionnaire structure including radio questions,
|
||||
/// text questions, and metadata
|
||||
class QuestionnaireParser {
|
||||
// Cache for parsed questionnaires
|
||||
static final Map<String, Questionnaire> _cache = {};
|
||||
|
||||
/// Parse HTML in isolate for better performance
|
||||
///
|
||||
/// [htmlContent] - The HTML content of the questionnaire page
|
||||
/// [useCache] - Whether to use cached results (default: true)
|
||||
/// Returns a [Questionnaire] object containing all parsed data
|
||||
Future<Questionnaire> parseAsync(
|
||||
String htmlContent, {
|
||||
bool useCache = true,
|
||||
}) async {
|
||||
// Generate cache key from content hash
|
||||
final cacheKey = htmlContent.hashCode.toString();
|
||||
|
||||
// Check cache first
|
||||
if (useCache && _cache.containsKey(cacheKey)) {
|
||||
return _cache[cacheKey]!;
|
||||
}
|
||||
|
||||
// Parse in isolate to avoid blocking UI thread
|
||||
final questionnaire = await compute(_parseInIsolate, htmlContent);
|
||||
|
||||
// Store in cache
|
||||
if (useCache) {
|
||||
_cache[cacheKey] = questionnaire;
|
||||
|
||||
// Limit cache size to prevent memory issues
|
||||
if (_cache.length > 50) {
|
||||
// Remove oldest entries (simple FIFO)
|
||||
final keysToRemove = _cache.keys.take(_cache.length - 50).toList();
|
||||
for (var key in keysToRemove) {
|
||||
_cache.remove(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return questionnaire;
|
||||
}
|
||||
|
||||
/// Clear the parser cache
|
||||
static void clearCache() {
|
||||
_cache.clear();
|
||||
}
|
||||
|
||||
/// Static method for isolate parsing
|
||||
static Questionnaire _parseInIsolate(String htmlContent) {
|
||||
final parser = QuestionnaireParser();
|
||||
return parser.parse(htmlContent);
|
||||
}
|
||||
|
||||
/// Parse HTML document and extract questionnaire structure
|
||||
///
|
||||
/// [htmlContent] - The HTML content of the questionnaire page
|
||||
/// Returns a [Questionnaire] object containing all parsed data
|
||||
Questionnaire parse(String htmlContent) {
|
||||
final document = html_parser.parse(htmlContent);
|
||||
|
||||
// Extract metadata first
|
||||
final metadata = _extractMetadata(document);
|
||||
|
||||
// Extract radio questions (single-choice questions)
|
||||
final radioQuestions = _extractRadioQuestions(document);
|
||||
|
||||
// Extract text questions (open-ended questions)
|
||||
final textQuestions = _extractTextQuestions(document);
|
||||
|
||||
return Questionnaire(
|
||||
metadata: metadata,
|
||||
radioQuestions: radioQuestions,
|
||||
textQuestions: textQuestions,
|
||||
tokenValue: metadata.tokenValue,
|
||||
questionnaireCode: metadata.questionnaireCode,
|
||||
evaluationContent: metadata.evaluationContent,
|
||||
evaluatedPeopleNumber: metadata.evaluatedPeopleNumber,
|
||||
);
|
||||
}
|
||||
|
||||
/// Extract questionnaire metadata from HTML document
|
||||
///
|
||||
/// Extracts:
|
||||
/// - Title (questionnaire title)
|
||||
/// - Evaluated person (teacher name)
|
||||
/// - Evaluation content
|
||||
/// - Token value (CSRF token)
|
||||
/// - Questionnaire code
|
||||
/// - Evaluated people number
|
||||
QuestionnaireMetadata _extractMetadata(Document document) {
|
||||
String title = '';
|
||||
String evaluatedPerson = '';
|
||||
String evaluationContent = '';
|
||||
String tokenValue = '';
|
||||
String questionnaireCode = '';
|
||||
String evaluatedPeopleNumber = '';
|
||||
|
||||
// Extract title - usually in a specific div or h1/h2 tag
|
||||
final titleElement =
|
||||
document.querySelector('div.title') ??
|
||||
document.querySelector('h1') ??
|
||||
document.querySelector('h2');
|
||||
if (titleElement != null) {
|
||||
title = titleElement.text.trim();
|
||||
}
|
||||
|
||||
// Extract token value from hidden input
|
||||
final tokenInput = document.querySelector('input[name="tokenValue"]');
|
||||
if (tokenInput != null) {
|
||||
tokenValue = tokenInput.attributes['value'] ?? '';
|
||||
}
|
||||
|
||||
// Extract questionnaire code from hidden input
|
||||
final codeInput = document.querySelector('input[name="wjdm"]');
|
||||
if (codeInput != null) {
|
||||
questionnaireCode = codeInput.attributes['value'] ?? '';
|
||||
}
|
||||
|
||||
// Extract evaluated people number from hidden input
|
||||
final peopleNumberInput = document.querySelector('input[name="bprdm"]');
|
||||
if (peopleNumberInput != null) {
|
||||
evaluatedPeopleNumber = peopleNumberInput.attributes['value'] ?? '';
|
||||
}
|
||||
|
||||
// Extract evaluation content from hidden input
|
||||
final contentInput = document.querySelector('input[name="pgnr"]');
|
||||
if (contentInput != null) {
|
||||
evaluationContent = contentInput.attributes['value'] ?? '';
|
||||
}
|
||||
|
||||
// Try to extract evaluated person name from table or specific elements
|
||||
// Look for teacher name in common patterns
|
||||
final teacherElements = document.querySelectorAll('td');
|
||||
for (var element in teacherElements) {
|
||||
final text = element.text.trim();
|
||||
if (text.contains('被评人') || text.contains('教师')) {
|
||||
// Get the next sibling or adjacent cell
|
||||
final nextSibling = element.nextElementSibling;
|
||||
if (nextSibling != null) {
|
||||
evaluatedPerson = nextSibling.text.trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return QuestionnaireMetadata(
|
||||
title: title,
|
||||
evaluatedPerson: evaluatedPerson,
|
||||
evaluationContent: evaluationContent,
|
||||
tokenValue: tokenValue,
|
||||
questionnaireCode: questionnaireCode,
|
||||
evaluatedPeopleNumber: evaluatedPeopleNumber,
|
||||
);
|
||||
}
|
||||
|
||||
/// Extract all radio questions from the document
|
||||
///
|
||||
/// Parses all input[type="radio"] elements and groups them by name attribute
|
||||
/// Extracts score and weight from value attribute (format: "score_weight")
|
||||
List<RadioQuestion> _extractRadioQuestions(Document document) {
|
||||
final Map<String, RadioQuestion> questionsMap = {};
|
||||
|
||||
// Find all radio input elements
|
||||
final radioInputs = document.querySelectorAll('input[type="radio"]');
|
||||
|
||||
for (var input in radioInputs) {
|
||||
final name = input.attributes['name'];
|
||||
final value = input.attributes['value'];
|
||||
|
||||
if (name == null || value == null || name.isEmpty || value.isEmpty) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse value format "score_weight" (e.g., "5_1" means 5 points with 100% weight)
|
||||
final parts = value.split('_');
|
||||
double score = 0.0;
|
||||
double weight = 0.0;
|
||||
|
||||
if (parts.length >= 2) {
|
||||
score = double.tryParse(parts[0]) ?? 0.0;
|
||||
weight = double.tryParse(parts[1]) ?? 0.0;
|
||||
}
|
||||
|
||||
// Extract option label - look for adjacent label or text
|
||||
String label = '';
|
||||
|
||||
// Try to find label element associated with this input
|
||||
final inputId = input.attributes['id'];
|
||||
if (inputId != null && inputId.isNotEmpty) {
|
||||
final labelElement = document.querySelector('label[for="$inputId"]');
|
||||
if (labelElement != null) {
|
||||
label = labelElement.text.trim();
|
||||
}
|
||||
}
|
||||
|
||||
// If no label found, look for parent label
|
||||
if (label.isEmpty) {
|
||||
var parent = input.parent;
|
||||
while (parent != null && parent.localName != 'label') {
|
||||
parent = parent.parent;
|
||||
}
|
||||
if (parent != null && parent.localName == 'label') {
|
||||
label = parent.text.trim();
|
||||
}
|
||||
}
|
||||
|
||||
// If still no label, look for adjacent text in the same td/cell
|
||||
if (label.isEmpty) {
|
||||
var cell = input.parent;
|
||||
while (cell != null && cell.localName != 'td') {
|
||||
cell = cell.parent;
|
||||
}
|
||||
if (cell != null) {
|
||||
label = cell.text.trim();
|
||||
}
|
||||
}
|
||||
|
||||
// Create RadioOption
|
||||
final option = RadioOption(
|
||||
label: label,
|
||||
value: value,
|
||||
score: score,
|
||||
weight: weight,
|
||||
);
|
||||
|
||||
// Extract question text and category
|
||||
if (!questionsMap.containsKey(name)) {
|
||||
String questionText = '';
|
||||
String category = '';
|
||||
|
||||
// Find the question text - usually in a td with rowspan or previous row
|
||||
var row = input.parent;
|
||||
while (row != null && row.localName != 'tr') {
|
||||
row = row.parent;
|
||||
}
|
||||
|
||||
if (row != null) {
|
||||
// Look for td with rowspan (category indicator)
|
||||
final categoryCell = row.querySelector('td[rowspan]');
|
||||
if (categoryCell != null) {
|
||||
category = categoryCell.text.trim();
|
||||
}
|
||||
|
||||
// Look for question text in the first td or a specific class
|
||||
final cells = row.querySelectorAll('td');
|
||||
for (var cell in cells) {
|
||||
final text = cell.text.trim();
|
||||
// Skip cells that only contain radio buttons or are too short
|
||||
if (text.isNotEmpty &&
|
||||
!text.contains('input') &&
|
||||
text.length > 5 &&
|
||||
cell.querySelector('input[type="radio"]') == null) {
|
||||
questionText = text;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// If question text not found in current row, check previous rows
|
||||
if (questionText.isEmpty) {
|
||||
var prevRow = row.previousElementSibling;
|
||||
while (prevRow != null) {
|
||||
final prevCells = prevRow.querySelectorAll('td');
|
||||
for (var cell in prevCells) {
|
||||
final text = cell.text.trim();
|
||||
if (text.isNotEmpty && text.length > 5) {
|
||||
questionText = text;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (questionText.isNotEmpty) break;
|
||||
prevRow = prevRow.previousElementSibling;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
questionsMap[name] = RadioQuestion(
|
||||
key: name,
|
||||
questionText: questionText,
|
||||
options: [option],
|
||||
category: category,
|
||||
);
|
||||
} else {
|
||||
// Add option to existing question
|
||||
final existingQuestion = questionsMap[name]!;
|
||||
questionsMap[name] = RadioQuestion(
|
||||
key: existingQuestion.key,
|
||||
questionText: existingQuestion.questionText,
|
||||
options: [...existingQuestion.options, option],
|
||||
category: existingQuestion.category,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return questionsMap.values.toList();
|
||||
}
|
||||
|
||||
/// Extract all text questions from the document
|
||||
///
|
||||
/// Parses all textarea elements and identifies question types
|
||||
/// based on surrounding text content
|
||||
List<TextQuestion> _extractTextQuestions(Document document) {
|
||||
final List<TextQuestion> textQuestions = [];
|
||||
|
||||
// Find all textarea elements
|
||||
final textareas = document.querySelectorAll('textarea');
|
||||
|
||||
for (var textarea in textareas) {
|
||||
final name = textarea.attributes['name'];
|
||||
|
||||
if (name == null || name.isEmpty) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Extract question text from adjacent elements
|
||||
String questionText = '';
|
||||
|
||||
// Look for question text in the same row or previous elements
|
||||
var cell = textarea.parent;
|
||||
while (cell != null && cell.localName != 'td') {
|
||||
cell = cell.parent;
|
||||
}
|
||||
|
||||
if (cell != null) {
|
||||
// Check previous sibling cells for question text
|
||||
var prevCell = cell.previousElementSibling;
|
||||
if (prevCell != null) {
|
||||
questionText = prevCell.text.trim();
|
||||
}
|
||||
|
||||
// If not found, look in the same cell before the textarea
|
||||
if (questionText.isEmpty) {
|
||||
final cellText = cell.text.trim();
|
||||
if (cellText.isNotEmpty) {
|
||||
questionText = cellText;
|
||||
}
|
||||
}
|
||||
|
||||
// If still not found, look in previous row
|
||||
if (questionText.isEmpty) {
|
||||
var row = cell.parent;
|
||||
if (row != null && row.localName == 'tr') {
|
||||
var prevRow = row.previousElementSibling;
|
||||
if (prevRow != null) {
|
||||
final prevCells = prevRow.querySelectorAll('td');
|
||||
for (var prevCell in prevCells) {
|
||||
final text = prevCell.text.trim();
|
||||
if (text.isNotEmpty && text.length > 3) {
|
||||
questionText = text;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Analyze question type based on text content and name
|
||||
final questionType = _analyzeQuestionType(questionText, name);
|
||||
|
||||
// Determine if required - zgpj is typically required
|
||||
final isRequired = name == 'zgpj' || name.contains('zgpj');
|
||||
|
||||
textQuestions.add(
|
||||
TextQuestion(
|
||||
key: name,
|
||||
questionText: questionText,
|
||||
type: questionType,
|
||||
isRequired: isRequired,
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
return textQuestions;
|
||||
}
|
||||
|
||||
/// Analyze question type based on question text and field name
|
||||
///
|
||||
/// Uses keyword matching to identify:
|
||||
/// - Inspiration questions (contains "启发")
|
||||
/// - Suggestion questions (contains "建议" or "意见")
|
||||
/// - Overall evaluation (name is "zgpj")
|
||||
/// - General questions (default)
|
||||
QuestionType _analyzeQuestionType(String questionText, String fieldName) {
|
||||
// Check field name first
|
||||
if (fieldName == 'zgpj' || fieldName.contains('zgpj')) {
|
||||
return QuestionType.overall;
|
||||
}
|
||||
|
||||
// Check question text for keywords
|
||||
final lowerText = questionText.toLowerCase();
|
||||
|
||||
if (lowerText.contains('启发') || lowerText.contains('启示')) {
|
||||
return QuestionType.inspiration;
|
||||
}
|
||||
|
||||
if (lowerText.contains('建议') ||
|
||||
lowerText.contains('意见') ||
|
||||
lowerText.contains('改进')) {
|
||||
return QuestionType.suggestion;
|
||||
}
|
||||
|
||||
// Default to general type
|
||||
return QuestionType.general;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user