CHANGE import of multiple log files

2025-04-15 07:11:30 +02:00 · 2025-04-15 07:11:30 +02:00 · a3cff924ba
commit a3cff924ba
parent 0e3323b7ab
1 changed files with 163 additions and 75 deletions
--- a/main.py
+++ b/main.py
@ -136,12 +136,15 @@ async def combined_view(
    # Parse all log files and collect all rows
    for filename in log_files:
        log_path = os.path.join(logs_dir, filename)
-        columns, rows = parse_log_file(log_path)
+        try:
+            columns, rows = parse_log_file(log_path)
            
-        if columns:
-            common_columns.update(columns)
+            if columns:
+                common_columns.update(columns)
            
-        all_rows.extend(rows)
+            all_rows.extend(rows)
+        except Exception as e:
+            print(f"Error processing file {filename} in combined view: {e}")
    
    # Apply gateway filter if specified
    if gateway:
@ -290,11 +293,14 @@ async def api_all_entries(
    reference_columns = []
    for filename in log_files:
        log_path = os.path.join(logs_dir, filename)
-        columns, rows = parse_log_file(log_path)
-        if columns and not reference_columns:
-            # Save column order from first file with columns
-            reference_columns = columns
-        all_rows.extend(rows)
+        try:
+            columns, rows = parse_log_file(log_path)
+            if columns and not reference_columns:
+                # Save column order from first file with columns
+                reference_columns = columns
+            all_rows.extend(rows)
+        except Exception as e:
+            print(f"Error processing file {filename} in api_all_entries: {e}")
    
    # Apply gateway filter if specified
    if gateway:
@ -355,13 +361,51 @@ class LogRow(BaseModel):
@app.get("/view/{filename}", response_class=HTMLResponse)
 async def view_log(request: Request, filename: str):
    log_path = os.path.join(os.getcwd(), "logs", filename)
-    raw_content = ""
+    raw_content = None
    parsed_rows = []
    header_columns = []
    
    try:
-        with open(log_path, "r") as file:
-            raw_content = file.read()
+        # Read the file in binary mode first to check for encodings
+        with open(log_path, "rb") as file:
+            binary_content = file.read()
+        
+        # Check for BOM (Byte Order Mark) at the beginning of the file
+        raw_content = None
+        
+        # Check for UTF-16 LE BOM
+        if binary_content.startswith(b'\xff\xfe'):
+            try:
+                raw_content = binary_content.decode('utf-16-le')
+            except UnicodeDecodeError:
+                pass
+        
+        # Check for UTF-16 BE BOM
+        if raw_content is None and binary_content.startswith(b'\xfe\xff'):
+            try:
+                raw_content = binary_content.decode('utf-16-be')
+            except UnicodeDecodeError:
+                pass
+                
+        # Try UTF-8
+        if raw_content is None:
+            try:
+                raw_content = binary_content.decode('utf-8')
+            except UnicodeDecodeError:
+                pass
+                
+        # Try common encodings if we still don't have content
+        if raw_content is None:
+            for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
+                try:
+                    raw_content = binary_content.decode(encoding)
+                    break
+                except UnicodeDecodeError:
+                    continue
+        
+        # If all decodings fail, use latin1 as a fallback with replacement
+        if raw_content is None:
+            raw_content = binary_content.decode('latin1', errors='replace')
        
        header_columns, parsed_dict_rows = parse_log_file(log_path)
        
@ -427,13 +471,18 @@ def get_all_logs() -> List[LogEntry]:
    result = []
    
    for filename in log_files:
-        gateway, timestamp = parse_filename(filename)
-        if gateway and timestamp:
-            result.append(LogEntry(
-                gateway=gateway,
-                timestamp=timestamp,
-                filename=filename
-            ))
+        try:
+            gateway, timestamp = parse_filename(filename)
+            if gateway and timestamp:
+                result.append(LogEntry(
+                    gateway=gateway,
+                    timestamp=timestamp,
+                    filename=filename
+                ))
+            else:
+                print(f"Could not parse filename: {filename}")
+        except Exception as e:
+            print(f"Error processing log file {filename}: {e}")
    
    # Sort by timestamp descending (newest first)
    result.sort(key=lambda x: x.timestamp, reverse=True)
@ -459,65 +508,104 @@ def parse_log_file(log_path):
    header_columns = []
    
    try:
-        with open(log_path, "r") as file:
-            content = file.read()
-            lines = content.splitlines()
+        # Read the file in binary mode first to check for encodings
+        with open(log_path, "rb") as file:
+            binary_content = file.read()
        
-            # Find the "SSL-VPN sessions:" section
-            session_section_start = None
+        # Check for BOM (Byte Order Mark) at the beginning of the file
+        content = None
+        
+        # Check for UTF-16 LE BOM
+        if binary_content.startswith(b'\xff\xfe'):
+            try:
+                content = binary_content.decode('utf-16-le')
+            except UnicodeDecodeError:
+                pass
+        
+        # Check for UTF-16 BE BOM
+        if content is None and binary_content.startswith(b'\xfe\xff'):
+            try:
+                content = binary_content.decode('utf-16-be')
+            except UnicodeDecodeError:
+                pass
+                
+        # Try UTF-8
+        if content is None:
+            try:
+                content = binary_content.decode('utf-8')
+            except UnicodeDecodeError:
+                pass
+                
+        # Try common encodings if we still don't have content
+        if content is None:
+            for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
+                try:
+                    content = binary_content.decode(encoding)
+                    break
+                except UnicodeDecodeError:
+                    continue
+        
+        # If all decodings fail, use latin1 as a fallback with replacement
+        if content is None:
+            content = binary_content.decode('latin1', errors='replace')
+        
+        lines = content.splitlines()
+        
+        # Find the "SSL-VPN sessions:" section
+        session_section_start = None
+        for i, line in enumerate(lines):
+            if "SSL-VPN sessions:" in line:
+                session_section_start = i
+                break
+        
+        if session_section_start is None:
+            # If SSL-VPN sessions section not found, fall back to the login users section
            for i, line in enumerate(lines):
-                if "SSL-VPN sessions:" in line:
+                if "SSL-VPN Login Users:" in line:
                    session_section_start = i
                    break
        
-            if session_section_start is None:
-                # If SSL-VPN sessions section not found, fall back to the login users section
-                for i, line in enumerate(lines):
-                    if "SSL-VPN Login Users:" in line:
-                        session_section_start = i
+        if session_section_start is None:
+            # No recognized sections found
+            return header_columns, parsed_rows
+        
+        # Find header line with column names (it should be right after the section title)
+        header_line_idx = session_section_start + 1
+        if header_line_idx < len(lines):
+            header_line = lines[header_line_idx]
+            if "Index" in header_line and "User" in header_line and "Group" in header_line:
+                # Preserve exact order of columns from file
+                header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
+            
+                # Parse data rows
+                for line in lines[header_line_idx+1:]:
+                    # Stop parsing when we hit an empty line or a new section
+                    if not line.strip() or line.strip().endswith("#"):
                        break
                        
-            if session_section_start is None:
-                # No recognized sections found
-                return header_columns, parsed_rows
+                    if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
+                        columns = [col.strip() for col in line.split("\t") if col]
+                        row_data = {}
                        
-            # Find header line with column names (it should be right after the section title)
-            header_line_idx = session_section_start + 1
-            if header_line_idx < len(lines):
-                header_line = lines[header_line_idx]
-                if "Index" in header_line and "User" in header_line and "Group" in header_line:
-                    # Preserve exact order of columns from file
-                    header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
+                        # Map columns to dictionary in original order with extra whitespace handling
+                        for i, col in enumerate(columns):
+                            if i < len(header_columns):
+                                column_name = header_columns[i]
+                                # Triple strip to ensure all possible whitespace is removed
+                                clean_value = col.strip() if col else ""
+                                # Special handling for Tunnel/Dest IP which may have extra spaces
+                                if column_name == "Tunnel/Dest IP":
+                                    clean_value = clean_value.strip()
+                                row_data[column_name] = clean_value
                        
-                    # Parse data rows
-                    for line in lines[header_line_idx+1:]:
-                        # Stop parsing when we hit an empty line or a new section
-                        if not line.strip() or line.strip().endswith("#"):
-                            break
+                        # Add source filename metadata
+                        filename = os.path.basename(log_path)
+                        gateway, timestamp = parse_filename(filename)
+                        row_data["_source_file"] = filename
+                        row_data["_gateway"] = gateway
+                        row_data["_timestamp"] = timestamp
                        
-                        if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
-                            columns = [col.strip() for col in line.split("\t") if col]
-                            row_data = {}
-                            
-                            # Map columns to dictionary in original order with extra whitespace handling
-                            for i, col in enumerate(columns):
-                                if i < len(header_columns):
-                                    column_name = header_columns[i]
-                                    # Triple strip to ensure all possible whitespace is removed
-                                    clean_value = col.strip() if col else ""
-                                    # Special handling for Tunnel/Dest IP which may have extra spaces
-                                    if column_name == "Tunnel/Dest IP":
-                                        clean_value = clean_value.strip()
-                                    row_data[column_name] = clean_value
-                            
-                            # Add source filename metadata
-                            filename = os.path.basename(log_path)
-                            gateway, timestamp = parse_filename(filename)
-                            row_data["_source_file"] = filename
-                            row_data["_gateway"] = gateway
-                            row_data["_timestamp"] = timestamp
-                            
-                            parsed_rows.append(row_data)
+                        parsed_rows.append(row_data)
    except Exception as e:
        print(f"Error parsing log file {log_path}: {e}")