import 'package:flutter/foundation.dart'; import 'package:html/parser.dart' as html_parser; import 'package:html/dom.dart'; import '../models/questionnaire.dart'; /// Parser for HTML questionnaire documents /// Dynamically extracts questionnaire structure including radio questions, /// text questions, and metadata class QuestionnaireParser { // Cache for parsed questionnaires static final Map _cache = {}; /// Parse HTML in isolate for better performance /// /// [htmlContent] - The HTML content of the questionnaire page /// [useCache] - Whether to use cached results (default: true) /// Returns a [Questionnaire] object containing all parsed data Future parseAsync( String htmlContent, { bool useCache = true, }) async { // Generate cache key from content hash final cacheKey = htmlContent.hashCode.toString(); // Check cache first if (useCache && _cache.containsKey(cacheKey)) { return _cache[cacheKey]!; } // Parse in isolate to avoid blocking UI thread final questionnaire = await compute(_parseInIsolate, htmlContent); // Store in cache if (useCache) { _cache[cacheKey] = questionnaire; // Limit cache size to prevent memory issues if (_cache.length > 50) { // Remove oldest entries (simple FIFO) final keysToRemove = _cache.keys.take(_cache.length - 50).toList(); for (var key in keysToRemove) { _cache.remove(key); } } } return questionnaire; } /// Clear the parser cache static void clearCache() { _cache.clear(); } /// Static method for isolate parsing static Questionnaire _parseInIsolate(String htmlContent) { final parser = QuestionnaireParser(); return parser.parse(htmlContent); } /// Parse HTML document and extract questionnaire structure /// /// [htmlContent] - The HTML content of the questionnaire page /// Returns a [Questionnaire] object containing all parsed data Questionnaire parse(String htmlContent) { final document = html_parser.parse(htmlContent); // Extract metadata first final metadata = _extractMetadata(document); // Extract radio questions (single-choice questions) final radioQuestions = _extractRadioQuestions(document); // Extract text questions (open-ended questions) final textQuestions = _extractTextQuestions(document); return Questionnaire( metadata: metadata, radioQuestions: radioQuestions, textQuestions: textQuestions, tokenValue: metadata.tokenValue, questionnaireCode: metadata.questionnaireCode, evaluationContent: metadata.evaluationContent, evaluatedPeopleNumber: metadata.evaluatedPeopleNumber, ); } /// Extract questionnaire metadata from HTML document /// /// Extracts: /// - Title (questionnaire title) /// - Evaluated person (teacher name) /// - Evaluation content /// - Token value (CSRF token) /// - Questionnaire code /// - Evaluated people number QuestionnaireMetadata _extractMetadata(Document document) { String title = ''; String evaluatedPerson = ''; String evaluationContent = ''; String tokenValue = ''; String questionnaireCode = ''; String evaluatedPeopleNumber = ''; // Extract title - usually in a specific div or h1/h2 tag final titleElement = document.querySelector('div.title') ?? document.querySelector('h1') ?? document.querySelector('h2'); if (titleElement != null) { title = titleElement.text.trim(); } // Extract token value from hidden input final tokenInput = document.querySelector('input[name="tokenValue"]'); if (tokenInput != null) { tokenValue = tokenInput.attributes['value'] ?? ''; } // Extract questionnaire code from hidden input final codeInput = document.querySelector('input[name="wjdm"]'); if (codeInput != null) { questionnaireCode = codeInput.attributes['value'] ?? ''; } // Extract evaluated people number from hidden input final peopleNumberInput = document.querySelector('input[name="bprdm"]'); if (peopleNumberInput != null) { evaluatedPeopleNumber = peopleNumberInput.attributes['value'] ?? ''; } // Extract evaluation content from hidden input final contentInput = document.querySelector('input[name="pgnr"]'); if (contentInput != null) { evaluationContent = contentInput.attributes['value'] ?? ''; } // Try to extract evaluated person name from table or specific elements // Look for teacher name in common patterns final teacherElements = document.querySelectorAll('td'); for (var element in teacherElements) { final text = element.text.trim(); if (text.contains('被评人') || text.contains('教师')) { // Get the next sibling or adjacent cell final nextSibling = element.nextElementSibling; if (nextSibling != null) { evaluatedPerson = nextSibling.text.trim(); break; } } } return QuestionnaireMetadata( title: title, evaluatedPerson: evaluatedPerson, evaluationContent: evaluationContent, tokenValue: tokenValue, questionnaireCode: questionnaireCode, evaluatedPeopleNumber: evaluatedPeopleNumber, ); } /// Extract all radio questions from the document /// /// Parses all input[type="radio"] elements and groups them by name attribute /// Extracts score and weight from value attribute (format: "score_weight") List _extractRadioQuestions(Document document) { final Map questionsMap = {}; // Find all radio input elements final radioInputs = document.querySelectorAll('input[type="radio"]'); for (var input in radioInputs) { final name = input.attributes['name']; final value = input.attributes['value']; if (name == null || value == null || name.isEmpty || value.isEmpty) { continue; } // Parse value format "score_weight" (e.g., "5_1" means 5 points with 100% weight) final parts = value.split('_'); double score = 0.0; double weight = 0.0; if (parts.length >= 2) { score = double.tryParse(parts[0]) ?? 0.0; weight = double.tryParse(parts[1]) ?? 0.0; } // Extract option label - look for adjacent label or text String label = ''; // Try to find label element associated with this input final inputId = input.attributes['id']; if (inputId != null && inputId.isNotEmpty) { final labelElement = document.querySelector('label[for="$inputId"]'); if (labelElement != null) { label = labelElement.text.trim(); } } // If no label found, look for parent label if (label.isEmpty) { var parent = input.parent; while (parent != null && parent.localName != 'label') { parent = parent.parent; } if (parent != null && parent.localName == 'label') { label = parent.text.trim(); } } // If still no label, look for adjacent text in the same td/cell if (label.isEmpty) { var cell = input.parent; while (cell != null && cell.localName != 'td') { cell = cell.parent; } if (cell != null) { label = cell.text.trim(); } } // Create RadioOption final option = RadioOption( label: label, value: value, score: score, weight: weight, ); // Extract question text and category if (!questionsMap.containsKey(name)) { String questionText = ''; String category = ''; // Find the question text - usually in a td with rowspan or previous row var row = input.parent; while (row != null && row.localName != 'tr') { row = row.parent; } if (row != null) { // Look for td with rowspan (category indicator) final categoryCell = row.querySelector('td[rowspan]'); if (categoryCell != null) { category = categoryCell.text.trim(); } // Look for question text in the first td or a specific class final cells = row.querySelectorAll('td'); for (var cell in cells) { final text = cell.text.trim(); // Skip cells that only contain radio buttons or are too short if (text.isNotEmpty && !text.contains('input') && text.length > 5 && cell.querySelector('input[type="radio"]') == null) { questionText = text; break; } } // If question text not found in current row, check previous rows if (questionText.isEmpty) { var prevRow = row.previousElementSibling; while (prevRow != null) { final prevCells = prevRow.querySelectorAll('td'); for (var cell in prevCells) { final text = cell.text.trim(); if (text.isNotEmpty && text.length > 5) { questionText = text; break; } } if (questionText.isNotEmpty) break; prevRow = prevRow.previousElementSibling; } } } questionsMap[name] = RadioQuestion( key: name, questionText: questionText, options: [option], category: category, ); } else { // Add option to existing question final existingQuestion = questionsMap[name]!; questionsMap[name] = RadioQuestion( key: existingQuestion.key, questionText: existingQuestion.questionText, options: [...existingQuestion.options, option], category: existingQuestion.category, ); } } return questionsMap.values.toList(); } /// Extract all text questions from the document /// /// Parses all textarea elements and identifies question types /// based on surrounding text content List _extractTextQuestions(Document document) { final List textQuestions = []; // Find all textarea elements final textareas = document.querySelectorAll('textarea'); for (var textarea in textareas) { final name = textarea.attributes['name']; if (name == null || name.isEmpty) { continue; } // Extract question text from adjacent elements String questionText = ''; // Look for question text in the same row or previous elements var cell = textarea.parent; while (cell != null && cell.localName != 'td') { cell = cell.parent; } if (cell != null) { // Check previous sibling cells for question text var prevCell = cell.previousElementSibling; if (prevCell != null) { questionText = prevCell.text.trim(); } // If not found, look in the same cell before the textarea if (questionText.isEmpty) { final cellText = cell.text.trim(); if (cellText.isNotEmpty) { questionText = cellText; } } // If still not found, look in previous row if (questionText.isEmpty) { var row = cell.parent; if (row != null && row.localName == 'tr') { var prevRow = row.previousElementSibling; if (prevRow != null) { final prevCells = prevRow.querySelectorAll('td'); for (var prevCell in prevCells) { final text = prevCell.text.trim(); if (text.isNotEmpty && text.length > 3) { questionText = text; break; } } } } } } // Analyze question type based on text content and name final questionType = _analyzeQuestionType(questionText, name); // Determine if required - zgpj is typically required final isRequired = name == 'zgpj' || name.contains('zgpj'); textQuestions.add( TextQuestion( key: name, questionText: questionText, type: questionType, isRequired: isRequired, ), ); } return textQuestions; } /// Analyze question type based on question text and field name /// /// Uses keyword matching to identify: /// - Inspiration questions (contains "启发") /// - Suggestion questions (contains "建议" or "意见") /// - Overall evaluation (name is "zgpj") /// - General questions (default) QuestionType _analyzeQuestionType(String questionText, String fieldName) { // Check field name first if (fieldName == 'zgpj' || fieldName.contains('zgpj')) { return QuestionType.overall; } // Check question text for keywords final lowerText = questionText.toLowerCase(); if (lowerText.contains('启发') || lowerText.contains('启示')) { return QuestionType.inspiration; } if (lowerText.contains('建议') || lowerText.contains('意见') || lowerText.contains('改进')) { return QuestionType.suggestion; } // Default to general type return QuestionType.general; } }