From a3cff924ba94a0c4161b98d959f8e8c9c7554413 Mon Sep 17 00:00:00 2001 From: CaffeineFueled Date: Tue, 15 Apr 2025 07:11:30 +0200 Subject: [PATCH] CHANGE import of multiple log files --- main.py | 238 ++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 163 insertions(+), 75 deletions(-) diff --git a/main.py b/main.py index 9831d45..9fd21c9 100644 --- a/main.py +++ b/main.py @@ -136,12 +136,15 @@ async def combined_view( # Parse all log files and collect all rows for filename in log_files: log_path = os.path.join(logs_dir, filename) - columns, rows = parse_log_file(log_path) - - if columns: - common_columns.update(columns) - - all_rows.extend(rows) + try: + columns, rows = parse_log_file(log_path) + + if columns: + common_columns.update(columns) + + all_rows.extend(rows) + except Exception as e: + print(f"Error processing file {filename} in combined view: {e}") # Apply gateway filter if specified if gateway: @@ -290,11 +293,14 @@ async def api_all_entries( reference_columns = [] for filename in log_files: log_path = os.path.join(logs_dir, filename) - columns, rows = parse_log_file(log_path) - if columns and not reference_columns: - # Save column order from first file with columns - reference_columns = columns - all_rows.extend(rows) + try: + columns, rows = parse_log_file(log_path) + if columns and not reference_columns: + # Save column order from first file with columns + reference_columns = columns + all_rows.extend(rows) + except Exception as e: + print(f"Error processing file {filename} in api_all_entries: {e}") # Apply gateway filter if specified if gateway: @@ -355,13 +361,51 @@ class LogRow(BaseModel): @app.get("/view/{filename}", response_class=HTMLResponse) async def view_log(request: Request, filename: str): log_path = os.path.join(os.getcwd(), "logs", filename) - raw_content = "" + raw_content = None parsed_rows = [] header_columns = [] try: - with open(log_path, "r") as file: - raw_content = file.read() + # Read the file in binary mode first to check for encodings + with open(log_path, "rb") as file: + binary_content = file.read() + + # Check for BOM (Byte Order Mark) at the beginning of the file + raw_content = None + + # Check for UTF-16 LE BOM + if binary_content.startswith(b'\xff\xfe'): + try: + raw_content = binary_content.decode('utf-16-le') + except UnicodeDecodeError: + pass + + # Check for UTF-16 BE BOM + if raw_content is None and binary_content.startswith(b'\xfe\xff'): + try: + raw_content = binary_content.decode('utf-16-be') + except UnicodeDecodeError: + pass + + # Try UTF-8 + if raw_content is None: + try: + raw_content = binary_content.decode('utf-8') + except UnicodeDecodeError: + pass + + # Try common encodings if we still don't have content + if raw_content is None: + for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']: + try: + raw_content = binary_content.decode(encoding) + break + except UnicodeDecodeError: + continue + + # If all decodings fail, use latin1 as a fallback with replacement + if raw_content is None: + raw_content = binary_content.decode('latin1', errors='replace') header_columns, parsed_dict_rows = parse_log_file(log_path) @@ -427,13 +471,18 @@ def get_all_logs() -> List[LogEntry]: result = [] for filename in log_files: - gateway, timestamp = parse_filename(filename) - if gateway and timestamp: - result.append(LogEntry( - gateway=gateway, - timestamp=timestamp, - filename=filename - )) + try: + gateway, timestamp = parse_filename(filename) + if gateway and timestamp: + result.append(LogEntry( + gateway=gateway, + timestamp=timestamp, + filename=filename + )) + else: + print(f"Could not parse filename: {filename}") + except Exception as e: + print(f"Error processing log file {filename}: {e}") # Sort by timestamp descending (newest first) result.sort(key=lambda x: x.timestamp, reverse=True) @@ -459,65 +508,104 @@ def parse_log_file(log_path): header_columns = [] try: - with open(log_path, "r") as file: - content = file.read() - lines = content.splitlines() - - # Find the "SSL-VPN sessions:" section - session_section_start = None + # Read the file in binary mode first to check for encodings + with open(log_path, "rb") as file: + binary_content = file.read() + + # Check for BOM (Byte Order Mark) at the beginning of the file + content = None + + # Check for UTF-16 LE BOM + if binary_content.startswith(b'\xff\xfe'): + try: + content = binary_content.decode('utf-16-le') + except UnicodeDecodeError: + pass + + # Check for UTF-16 BE BOM + if content is None and binary_content.startswith(b'\xfe\xff'): + try: + content = binary_content.decode('utf-16-be') + except UnicodeDecodeError: + pass + + # Try UTF-8 + if content is None: + try: + content = binary_content.decode('utf-8') + except UnicodeDecodeError: + pass + + # Try common encodings if we still don't have content + if content is None: + for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']: + try: + content = binary_content.decode(encoding) + break + except UnicodeDecodeError: + continue + + # If all decodings fail, use latin1 as a fallback with replacement + if content is None: + content = binary_content.decode('latin1', errors='replace') + + lines = content.splitlines() + + # Find the "SSL-VPN sessions:" section + session_section_start = None + for i, line in enumerate(lines): + if "SSL-VPN sessions:" in line: + session_section_start = i + break + + if session_section_start is None: + # If SSL-VPN sessions section not found, fall back to the login users section for i, line in enumerate(lines): - if "SSL-VPN sessions:" in line: + if "SSL-VPN Login Users:" in line: session_section_start = i break + + if session_section_start is None: + # No recognized sections found + return header_columns, parsed_rows + + # Find header line with column names (it should be right after the section title) + header_line_idx = session_section_start + 1 + if header_line_idx < len(lines): + header_line = lines[header_line_idx] + if "Index" in header_line and "User" in header_line and "Group" in header_line: + # Preserve exact order of columns from file + header_columns = [col.strip() for col in header_line.split("\t") if col.strip()] - if session_section_start is None: - # If SSL-VPN sessions section not found, fall back to the login users section - for i, line in enumerate(lines): - if "SSL-VPN Login Users:" in line: - session_section_start = i + # Parse data rows + for line in lines[header_line_idx+1:]: + # Stop parsing when we hit an empty line or a new section + if not line.strip() or line.strip().endswith("#"): break - - if session_section_start is None: - # No recognized sections found - return header_columns, parsed_rows - - # Find header line with column names (it should be right after the section title) - header_line_idx = session_section_start + 1 - if header_line_idx < len(lines): - header_line = lines[header_line_idx] - if "Index" in header_line and "User" in header_line and "Group" in header_line: - # Preserve exact order of columns from file - header_columns = [col.strip() for col in header_line.split("\t") if col.strip()] - - # Parse data rows - for line in lines[header_line_idx+1:]: - # Stop parsing when we hit an empty line or a new section - if not line.strip() or line.strip().endswith("#"): - break - - if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"): - columns = [col.strip() for col in line.split("\t") if col] - row_data = {} - - # Map columns to dictionary in original order with extra whitespace handling - for i, col in enumerate(columns): - if i < len(header_columns): - column_name = header_columns[i] - # Triple strip to ensure all possible whitespace is removed - clean_value = col.strip() if col else "" - # Special handling for Tunnel/Dest IP which may have extra spaces - if column_name == "Tunnel/Dest IP": - clean_value = clean_value.strip() - row_data[column_name] = clean_value - - # Add source filename metadata - filename = os.path.basename(log_path) - gateway, timestamp = parse_filename(filename) - row_data["_source_file"] = filename - row_data["_gateway"] = gateway - row_data["_timestamp"] = timestamp - - parsed_rows.append(row_data) + + if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"): + columns = [col.strip() for col in line.split("\t") if col] + row_data = {} + + # Map columns to dictionary in original order with extra whitespace handling + for i, col in enumerate(columns): + if i < len(header_columns): + column_name = header_columns[i] + # Triple strip to ensure all possible whitespace is removed + clean_value = col.strip() if col else "" + # Special handling for Tunnel/Dest IP which may have extra spaces + if column_name == "Tunnel/Dest IP": + clean_value = clean_value.strip() + row_data[column_name] = clean_value + + # Add source filename metadata + filename = os.path.basename(log_path) + gateway, timestamp = parse_filename(filename) + row_data["_source_file"] = filename + row_data["_gateway"] = gateway + row_data["_timestamp"] = timestamp + + parsed_rows.append(row_data) except Exception as e: print(f"Error parsing log file {log_path}: {e}")