CHANGE import of multiple log files

2025-04-15 07:11:30 +02:00 · 2025-04-15 07:11:30 +02:00 · a3cff924ba
commit a3cff924ba
parent 0e3323b7ab
1 changed files with 163 additions and 75 deletions
--- a/main.py
+++ b/main.py
@ -136,12 +136,15 @@ async def combined_view(
    # Parse all log files and collect all rows
    for filename in log_files:
        log_path = os.path.join(logs_dir, filename)
-        columns, rows = parse_log_file(log_path)
+        try:
            columns, rows = parse_log_file(log_path)
-        if columns:
+            if columns:
-            common_columns.update(columns)
+                common_columns.update(columns)
-        all_rows.extend(rows)
+            all_rows.extend(rows)
        except Exception as e:
            print(f"Error processing file {filename} in combined view: {e}")
    # Apply gateway filter if specified
    if gateway:
@ -290,11 +293,14 @@ async def api_all_entries(
    reference_columns = []
    for filename in log_files:
        log_path = os.path.join(logs_dir, filename)
-        columns, rows = parse_log_file(log_path)
+        try:
-        if columns and not reference_columns:
+            columns, rows = parse_log_file(log_path)
-            # Save column order from first file with columns
+            if columns and not reference_columns:
-            reference_columns = columns
+                # Save column order from first file with columns
-        all_rows.extend(rows)
+                reference_columns = columns
            all_rows.extend(rows)
        except Exception as e:
            print(f"Error processing file {filename} in api_all_entries: {e}")
    # Apply gateway filter if specified
    if gateway:
@ -355,13 +361,51 @@ class LogRow(BaseModel):
@app.get("/view/{filename}", response_class=HTMLResponse)
 async def view_log(request: Request, filename: str):
    log_path = os.path.join(os.getcwd(), "logs", filename)
-    raw_content = ""
+    raw_content = None
    parsed_rows = []
    header_columns = []
    try:
-        with open(log_path, "r") as file:
+        # Read the file in binary mode first to check for encodings
-            raw_content = file.read()
+        with open(log_path, "rb") as file:
            binary_content = file.read()
        # Check for BOM (Byte Order Mark) at the beginning of the file
        raw_content = None
        # Check for UTF-16 LE BOM
        if binary_content.startswith(b'\xff\xfe'):
            try:
                raw_content = binary_content.decode('utf-16-le')
            except UnicodeDecodeError:
                pass
        # Check for UTF-16 BE BOM
        if raw_content is None and binary_content.startswith(b'\xfe\xff'):
            try:
                raw_content = binary_content.decode('utf-16-be')
            except UnicodeDecodeError:
                pass
        # Try UTF-8
        if raw_content is None:
            try:
                raw_content = binary_content.decode('utf-8')
            except UnicodeDecodeError:
                pass
        # Try common encodings if we still don't have content
        if raw_content is None:
            for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
                try:
                    raw_content = binary_content.decode(encoding)
                    break
                except UnicodeDecodeError:
                    continue
        # If all decodings fail, use latin1 as a fallback with replacement
        if raw_content is None:
            raw_content = binary_content.decode('latin1', errors='replace')
        header_columns, parsed_dict_rows = parse_log_file(log_path)
@ -427,13 +471,18 @@ def get_all_logs() -> List[LogEntry]:
    result = []
    for filename in log_files:
-        gateway, timestamp = parse_filename(filename)
+        try:
-        if gateway and timestamp:
+            gateway, timestamp = parse_filename(filename)
-            result.append(LogEntry(
+            if gateway and timestamp:
-                gateway=gateway,
+                result.append(LogEntry(
-                timestamp=timestamp,
+                    gateway=gateway,
-                filename=filename
+                    timestamp=timestamp,
-            ))
+                    filename=filename
                ))
            else:
                print(f"Could not parse filename: {filename}")
        except Exception as e:
            print(f"Error processing log file {filename}: {e}")
    # Sort by timestamp descending (newest first)
    result.sort(key=lambda x: x.timestamp, reverse=True)
@ -459,65 +508,104 @@ def parse_log_file(log_path):
    header_columns = []
    try:
-        with open(log_path, "r") as file:
+        # Read the file in binary mode first to check for encodings
-            content = file.read()
+        with open(log_path, "rb") as file:
-            lines = content.splitlines()
+            binary_content = file.read()
-            # Find the "SSL-VPN sessions:" section
+        # Check for BOM (Byte Order Mark) at the beginning of the file
-            session_section_start = None
+        content = None
        # Check for UTF-16 LE BOM
        if binary_content.startswith(b'\xff\xfe'):
            try:
                content = binary_content.decode('utf-16-le')
            except UnicodeDecodeError:
                pass
        # Check for UTF-16 BE BOM
        if content is None and binary_content.startswith(b'\xfe\xff'):
            try:
                content = binary_content.decode('utf-16-be')
            except UnicodeDecodeError:
                pass
        # Try UTF-8
        if content is None:
            try:
                content = binary_content.decode('utf-8')
            except UnicodeDecodeError:
                pass
        # Try common encodings if we still don't have content
        if content is None:
            for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
                try:
                    content = binary_content.decode(encoding)
                    break
                except UnicodeDecodeError:
                    continue
        # If all decodings fail, use latin1 as a fallback with replacement
        if content is None:
            content = binary_content.decode('latin1', errors='replace')
        lines = content.splitlines()
        # Find the "SSL-VPN sessions:" section
        session_section_start = None
        for i, line in enumerate(lines):
            if "SSL-VPN sessions:" in line:
                session_section_start = i
                break
        if session_section_start is None:
            # If SSL-VPN sessions section not found, fall back to the login users section
            for i, line in enumerate(lines):
-                if "SSL-VPN sessions:" in line:
+                if "SSL-VPN Login Users:" in line:
                    session_section_start = i
                    break
-            if session_section_start is None:
+        if session_section_start is None:
-                # If SSL-VPN sessions section not found, fall back to the login users section
+            # No recognized sections found
-                for i, line in enumerate(lines):
+            return header_columns, parsed_rows
-                    if "SSL-VPN Login Users:" in line:
+        
-                        session_section_start = i
+        # Find header line with column names (it should be right after the section title)
        header_line_idx = session_section_start + 1
        if header_line_idx < len(lines):
            header_line = lines[header_line_idx]
            if "Index" in header_line and "User" in header_line and "Group" in header_line:
                # Preserve exact order of columns from file
                header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
                # Parse data rows
                for line in lines[header_line_idx+1:]:
                    # Stop parsing when we hit an empty line or a new section
                    if not line.strip() or line.strip().endswith("#"):
                        break
-            if session_section_start is None:
+                    if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
-                # No recognized sections found
+                        columns = [col.strip() for col in line.split("\t") if col]
-                return header_columns, parsed_rows
+                        row_data = {}
-            # Find header line with column names (it should be right after the section title)
+                        # Map columns to dictionary in original order with extra whitespace handling
-            header_line_idx = session_section_start + 1
+                        for i, col in enumerate(columns):
-            if header_line_idx < len(lines):
+                            if i < len(header_columns):
-                header_line = lines[header_line_idx]
+                                column_name = header_columns[i]
-                if "Index" in header_line and "User" in header_line and "Group" in header_line:
+                                # Triple strip to ensure all possible whitespace is removed
-                    # Preserve exact order of columns from file
+                                clean_value = col.strip() if col else ""
-                    header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
+                                # Special handling for Tunnel/Dest IP which may have extra spaces
                                if column_name == "Tunnel/Dest IP":
                                    clean_value = clean_value.strip()
                                row_data[column_name] = clean_value
-                    # Parse data rows
+                        # Add source filename metadata
-                    for line in lines[header_line_idx+1:]:
+                        filename = os.path.basename(log_path)
-                        # Stop parsing when we hit an empty line or a new section
+                        gateway, timestamp = parse_filename(filename)
-                        if not line.strip() or line.strip().endswith("#"):
+                        row_data["_source_file"] = filename
-                            break
+                        row_data["_gateway"] = gateway
                        row_data["_timestamp"] = timestamp
-                        if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
+                        parsed_rows.append(row_data)
                            columns = [col.strip() for col in line.split("\t") if col]
                            row_data = {}
                            # Map columns to dictionary in original order with extra whitespace handling
                            for i, col in enumerate(columns):
                                if i < len(header_columns):
                                    column_name = header_columns[i]
                                    # Triple strip to ensure all possible whitespace is removed
                                    clean_value = col.strip() if col else ""
                                    # Special handling for Tunnel/Dest IP which may have extra spaces
                                    if column_name == "Tunnel/Dest IP":
                                        clean_value = clean_value.strip()
                                    row_data[column_name] = clean_value
                            # Add source filename metadata
                            filename = os.path.basename(log_path)
                            gateway, timestamp = parse_filename(filename)
                            row_data["_source_file"] = filename
                            row_data["_gateway"] = gateway
                            row_data["_timestamp"] = timestamp
                            parsed_rows.append(row_data)
    except Exception as e:
        print(f"Error parsing log file {log_path}: {e}")