From a3cff924ba94a0c4161b98d959f8e8c9c7554413 Mon Sep 17 00:00:00 2001
From: CaffeineFueled <hello@ittavern.com>
Date: Tue, 15 Apr 2025 07:11:30 +0200
Subject: [PATCH] CHANGE import of multiple log files

---
 main.py | 238 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 163 insertions(+), 75 deletions(-)

diff --git a/main.py b/main.py
index 9831d45..9fd21c9 100644
--- a/main.py
+++ b/main.py
@@ -136,12 +136,15 @@ async def combined_view(
     # Parse all log files and collect all rows
     for filename in log_files:
         log_path = os.path.join(logs_dir, filename)
-        columns, rows = parse_log_file(log_path)
-        
-        if columns:
-            common_columns.update(columns)
-        
-        all_rows.extend(rows)
+        try:
+            columns, rows = parse_log_file(log_path)
+            
+            if columns:
+                common_columns.update(columns)
+            
+            all_rows.extend(rows)
+        except Exception as e:
+            print(f"Error processing file {filename} in combined view: {e}")
     
     # Apply gateway filter if specified
     if gateway:
@@ -290,11 +293,14 @@ async def api_all_entries(
     reference_columns = []
     for filename in log_files:
         log_path = os.path.join(logs_dir, filename)
-        columns, rows = parse_log_file(log_path)
-        if columns and not reference_columns:
-            # Save column order from first file with columns
-            reference_columns = columns
-        all_rows.extend(rows)
+        try:
+            columns, rows = parse_log_file(log_path)
+            if columns and not reference_columns:
+                # Save column order from first file with columns
+                reference_columns = columns
+            all_rows.extend(rows)
+        except Exception as e:
+            print(f"Error processing file {filename} in api_all_entries: {e}")
     
     # Apply gateway filter if specified
     if gateway:
@@ -355,13 +361,51 @@ class LogRow(BaseModel):
 @app.get("/view/{filename}", response_class=HTMLResponse)
 async def view_log(request: Request, filename: str):
     log_path = os.path.join(os.getcwd(), "logs", filename)
-    raw_content = ""
+    raw_content = None
     parsed_rows = []
     header_columns = []
     
     try:
-        with open(log_path, "r") as file:
-            raw_content = file.read()
+        # Read the file in binary mode first to check for encodings
+        with open(log_path, "rb") as file:
+            binary_content = file.read()
+        
+        # Check for BOM (Byte Order Mark) at the beginning of the file
+        raw_content = None
+        
+        # Check for UTF-16 LE BOM
+        if binary_content.startswith(b'\xff\xfe'):
+            try:
+                raw_content = binary_content.decode('utf-16-le')
+            except UnicodeDecodeError:
+                pass
+        
+        # Check for UTF-16 BE BOM
+        if raw_content is None and binary_content.startswith(b'\xfe\xff'):
+            try:
+                raw_content = binary_content.decode('utf-16-be')
+            except UnicodeDecodeError:
+                pass
+                
+        # Try UTF-8
+        if raw_content is None:
+            try:
+                raw_content = binary_content.decode('utf-8')
+            except UnicodeDecodeError:
+                pass
+                
+        # Try common encodings if we still don't have content
+        if raw_content is None:
+            for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
+                try:
+                    raw_content = binary_content.decode(encoding)
+                    break
+                except UnicodeDecodeError:
+                    continue
+        
+        # If all decodings fail, use latin1 as a fallback with replacement
+        if raw_content is None:
+            raw_content = binary_content.decode('latin1', errors='replace')
         
         header_columns, parsed_dict_rows = parse_log_file(log_path)
         
@@ -427,13 +471,18 @@ def get_all_logs() -> List[LogEntry]:
     result = []
     
     for filename in log_files:
-        gateway, timestamp = parse_filename(filename)
-        if gateway and timestamp:
-            result.append(LogEntry(
-                gateway=gateway,
-                timestamp=timestamp,
-                filename=filename
-            ))
+        try:
+            gateway, timestamp = parse_filename(filename)
+            if gateway and timestamp:
+                result.append(LogEntry(
+                    gateway=gateway,
+                    timestamp=timestamp,
+                    filename=filename
+                ))
+            else:
+                print(f"Could not parse filename: {filename}")
+        except Exception as e:
+            print(f"Error processing log file {filename}: {e}")
     
     # Sort by timestamp descending (newest first)
     result.sort(key=lambda x: x.timestamp, reverse=True)
@@ -459,65 +508,104 @@ def parse_log_file(log_path):
     header_columns = []
     
     try:
-        with open(log_path, "r") as file:
-            content = file.read()
-            lines = content.splitlines()
-            
-            # Find the "SSL-VPN sessions:" section
-            session_section_start = None
+        # Read the file in binary mode first to check for encodings
+        with open(log_path, "rb") as file:
+            binary_content = file.read()
+        
+        # Check for BOM (Byte Order Mark) at the beginning of the file
+        content = None
+        
+        # Check for UTF-16 LE BOM
+        if binary_content.startswith(b'\xff\xfe'):
+            try:
+                content = binary_content.decode('utf-16-le')
+            except UnicodeDecodeError:
+                pass
+        
+        # Check for UTF-16 BE BOM
+        if content is None and binary_content.startswith(b'\xfe\xff'):
+            try:
+                content = binary_content.decode('utf-16-be')
+            except UnicodeDecodeError:
+                pass
+                
+        # Try UTF-8
+        if content is None:
+            try:
+                content = binary_content.decode('utf-8')
+            except UnicodeDecodeError:
+                pass
+                
+        # Try common encodings if we still don't have content
+        if content is None:
+            for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
+                try:
+                    content = binary_content.decode(encoding)
+                    break
+                except UnicodeDecodeError:
+                    continue
+        
+        # If all decodings fail, use latin1 as a fallback with replacement
+        if content is None:
+            content = binary_content.decode('latin1', errors='replace')
+        
+        lines = content.splitlines()
+        
+        # Find the "SSL-VPN sessions:" section
+        session_section_start = None
+        for i, line in enumerate(lines):
+            if "SSL-VPN sessions:" in line:
+                session_section_start = i
+                break
+        
+        if session_section_start is None:
+            # If SSL-VPN sessions section not found, fall back to the login users section
             for i, line in enumerate(lines):
-                if "SSL-VPN sessions:" in line:
+                if "SSL-VPN Login Users:" in line:
                     session_section_start = i
                     break
+        
+        if session_section_start is None:
+            # No recognized sections found
+            return header_columns, parsed_rows
+        
+        # Find header line with column names (it should be right after the section title)
+        header_line_idx = session_section_start + 1
+        if header_line_idx < len(lines):
+            header_line = lines[header_line_idx]
+            if "Index" in header_line and "User" in header_line and "Group" in header_line:
+                # Preserve exact order of columns from file
+                header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
             
-            if session_section_start is None:
-                # If SSL-VPN sessions section not found, fall back to the login users section
-                for i, line in enumerate(lines):
-                    if "SSL-VPN Login Users:" in line:
-                        session_section_start = i
+                # Parse data rows
+                for line in lines[header_line_idx+1:]:
+                    # Stop parsing when we hit an empty line or a new section
+                    if not line.strip() or line.strip().endswith("#"):
                         break
-            
-            if session_section_start is None:
-                # No recognized sections found
-                return header_columns, parsed_rows
-            
-            # Find header line with column names (it should be right after the section title)
-            header_line_idx = session_section_start + 1
-            if header_line_idx < len(lines):
-                header_line = lines[header_line_idx]
-                if "Index" in header_line and "User" in header_line and "Group" in header_line:
-                    # Preserve exact order of columns from file
-                    header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
-                
-                    # Parse data rows
-                    for line in lines[header_line_idx+1:]:
-                        # Stop parsing when we hit an empty line or a new section
-                        if not line.strip() or line.strip().endswith("#"):
-                            break
-                            
-                        if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
-                            columns = [col.strip() for col in line.split("\t") if col]
-                            row_data = {}
-                            
-                            # Map columns to dictionary in original order with extra whitespace handling
-                            for i, col in enumerate(columns):
-                                if i < len(header_columns):
-                                    column_name = header_columns[i]
-                                    # Triple strip to ensure all possible whitespace is removed
-                                    clean_value = col.strip() if col else ""
-                                    # Special handling for Tunnel/Dest IP which may have extra spaces
-                                    if column_name == "Tunnel/Dest IP":
-                                        clean_value = clean_value.strip()
-                                    row_data[column_name] = clean_value
-                            
-                            # Add source filename metadata
-                            filename = os.path.basename(log_path)
-                            gateway, timestamp = parse_filename(filename)
-                            row_data["_source_file"] = filename
-                            row_data["_gateway"] = gateway
-                            row_data["_timestamp"] = timestamp
-                            
-                            parsed_rows.append(row_data)
+                        
+                    if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
+                        columns = [col.strip() for col in line.split("\t") if col]
+                        row_data = {}
+                        
+                        # Map columns to dictionary in original order with extra whitespace handling
+                        for i, col in enumerate(columns):
+                            if i < len(header_columns):
+                                column_name = header_columns[i]
+                                # Triple strip to ensure all possible whitespace is removed
+                                clean_value = col.strip() if col else ""
+                                # Special handling for Tunnel/Dest IP which may have extra spaces
+                                if column_name == "Tunnel/Dest IP":
+                                    clean_value = clean_value.strip()
+                                row_data[column_name] = clean_value
+                        
+                        # Add source filename metadata
+                        filename = os.path.basename(log_path)
+                        gateway, timestamp = parse_filename(filename)
+                        row_data["_source_file"] = filename
+                        row_data["_gateway"] = gateway
+                        row_data["_timestamp"] = timestamp
+                        
+                        parsed_rows.append(row_data)
     except Exception as e:
         print(f"Error parsing log file {log_path}: {e}")