CHANGE import of multiple log files

This commit is contained in:
CaffeineFueled 2025-04-15 07:11:30 +02:00
parent 0e3323b7ab
commit a3cff924ba

224
main.py
View file

@ -136,12 +136,15 @@ async def combined_view(
# Parse all log files and collect all rows # Parse all log files and collect all rows
for filename in log_files: for filename in log_files:
log_path = os.path.join(logs_dir, filename) log_path = os.path.join(logs_dir, filename)
columns, rows = parse_log_file(log_path) try:
columns, rows = parse_log_file(log_path)
if columns: if columns:
common_columns.update(columns) common_columns.update(columns)
all_rows.extend(rows) all_rows.extend(rows)
except Exception as e:
print(f"Error processing file {filename} in combined view: {e}")
# Apply gateway filter if specified # Apply gateway filter if specified
if gateway: if gateway:
@ -290,11 +293,14 @@ async def api_all_entries(
reference_columns = [] reference_columns = []
for filename in log_files: for filename in log_files:
log_path = os.path.join(logs_dir, filename) log_path = os.path.join(logs_dir, filename)
columns, rows = parse_log_file(log_path) try:
if columns and not reference_columns: columns, rows = parse_log_file(log_path)
# Save column order from first file with columns if columns and not reference_columns:
reference_columns = columns # Save column order from first file with columns
all_rows.extend(rows) reference_columns = columns
all_rows.extend(rows)
except Exception as e:
print(f"Error processing file {filename} in api_all_entries: {e}")
# Apply gateway filter if specified # Apply gateway filter if specified
if gateway: if gateway:
@ -355,13 +361,51 @@ class LogRow(BaseModel):
@app.get("/view/{filename}", response_class=HTMLResponse) @app.get("/view/{filename}", response_class=HTMLResponse)
async def view_log(request: Request, filename: str): async def view_log(request: Request, filename: str):
log_path = os.path.join(os.getcwd(), "logs", filename) log_path = os.path.join(os.getcwd(), "logs", filename)
raw_content = "" raw_content = None
parsed_rows = [] parsed_rows = []
header_columns = [] header_columns = []
try: try:
with open(log_path, "r") as file: # Read the file in binary mode first to check for encodings
raw_content = file.read() with open(log_path, "rb") as file:
binary_content = file.read()
# Check for BOM (Byte Order Mark) at the beginning of the file
raw_content = None
# Check for UTF-16 LE BOM
if binary_content.startswith(b'\xff\xfe'):
try:
raw_content = binary_content.decode('utf-16-le')
except UnicodeDecodeError:
pass
# Check for UTF-16 BE BOM
if raw_content is None and binary_content.startswith(b'\xfe\xff'):
try:
raw_content = binary_content.decode('utf-16-be')
except UnicodeDecodeError:
pass
# Try UTF-8
if raw_content is None:
try:
raw_content = binary_content.decode('utf-8')
except UnicodeDecodeError:
pass
# Try common encodings if we still don't have content
if raw_content is None:
for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
try:
raw_content = binary_content.decode(encoding)
break
except UnicodeDecodeError:
continue
# If all decodings fail, use latin1 as a fallback with replacement
if raw_content is None:
raw_content = binary_content.decode('latin1', errors='replace')
header_columns, parsed_dict_rows = parse_log_file(log_path) header_columns, parsed_dict_rows = parse_log_file(log_path)
@ -427,13 +471,18 @@ def get_all_logs() -> List[LogEntry]:
result = [] result = []
for filename in log_files: for filename in log_files:
gateway, timestamp = parse_filename(filename) try:
if gateway and timestamp: gateway, timestamp = parse_filename(filename)
result.append(LogEntry( if gateway and timestamp:
gateway=gateway, result.append(LogEntry(
timestamp=timestamp, gateway=gateway,
filename=filename timestamp=timestamp,
)) filename=filename
))
else:
print(f"Could not parse filename: {filename}")
except Exception as e:
print(f"Error processing log file {filename}: {e}")
# Sort by timestamp descending (newest first) # Sort by timestamp descending (newest first)
result.sort(key=lambda x: x.timestamp, reverse=True) result.sort(key=lambda x: x.timestamp, reverse=True)
@ -459,65 +508,104 @@ def parse_log_file(log_path):
header_columns = [] header_columns = []
try: try:
with open(log_path, "r") as file: # Read the file in binary mode first to check for encodings
content = file.read() with open(log_path, "rb") as file:
lines = content.splitlines() binary_content = file.read()
# Find the "SSL-VPN sessions:" section # Check for BOM (Byte Order Mark) at the beginning of the file
session_section_start = None content = None
# Check for UTF-16 LE BOM
if binary_content.startswith(b'\xff\xfe'):
try:
content = binary_content.decode('utf-16-le')
except UnicodeDecodeError:
pass
# Check for UTF-16 BE BOM
if content is None and binary_content.startswith(b'\xfe\xff'):
try:
content = binary_content.decode('utf-16-be')
except UnicodeDecodeError:
pass
# Try UTF-8
if content is None:
try:
content = binary_content.decode('utf-8')
except UnicodeDecodeError:
pass
# Try common encodings if we still don't have content
if content is None:
for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
try:
content = binary_content.decode(encoding)
break
except UnicodeDecodeError:
continue
# If all decodings fail, use latin1 as a fallback with replacement
if content is None:
content = binary_content.decode('latin1', errors='replace')
lines = content.splitlines()
# Find the "SSL-VPN sessions:" section
session_section_start = None
for i, line in enumerate(lines):
if "SSL-VPN sessions:" in line:
session_section_start = i
break
if session_section_start is None:
# If SSL-VPN sessions section not found, fall back to the login users section
for i, line in enumerate(lines): for i, line in enumerate(lines):
if "SSL-VPN sessions:" in line: if "SSL-VPN Login Users:" in line:
session_section_start = i session_section_start = i
break break
if session_section_start is None: if session_section_start is None:
# If SSL-VPN sessions section not found, fall back to the login users section # No recognized sections found
for i, line in enumerate(lines): return header_columns, parsed_rows
if "SSL-VPN Login Users:" in line:
session_section_start = i # Find header line with column names (it should be right after the section title)
header_line_idx = session_section_start + 1
if header_line_idx < len(lines):
header_line = lines[header_line_idx]
if "Index" in header_line and "User" in header_line and "Group" in header_line:
# Preserve exact order of columns from file
header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
# Parse data rows
for line in lines[header_line_idx+1:]:
# Stop parsing when we hit an empty line or a new section
if not line.strip() or line.strip().endswith("#"):
break break
if session_section_start is None: if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
# No recognized sections found columns = [col.strip() for col in line.split("\t") if col]
return header_columns, parsed_rows row_data = {}
# Find header line with column names (it should be right after the section title) # Map columns to dictionary in original order with extra whitespace handling
header_line_idx = session_section_start + 1 for i, col in enumerate(columns):
if header_line_idx < len(lines): if i < len(header_columns):
header_line = lines[header_line_idx] column_name = header_columns[i]
if "Index" in header_line and "User" in header_line and "Group" in header_line: # Triple strip to ensure all possible whitespace is removed
# Preserve exact order of columns from file clean_value = col.strip() if col else ""
header_columns = [col.strip() for col in header_line.split("\t") if col.strip()] # Special handling for Tunnel/Dest IP which may have extra spaces
if column_name == "Tunnel/Dest IP":
clean_value = clean_value.strip()
row_data[column_name] = clean_value
# Parse data rows # Add source filename metadata
for line in lines[header_line_idx+1:]: filename = os.path.basename(log_path)
# Stop parsing when we hit an empty line or a new section gateway, timestamp = parse_filename(filename)
if not line.strip() or line.strip().endswith("#"): row_data["_source_file"] = filename
break row_data["_gateway"] = gateway
row_data["_timestamp"] = timestamp
if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"): parsed_rows.append(row_data)
columns = [col.strip() for col in line.split("\t") if col]
row_data = {}
# Map columns to dictionary in original order with extra whitespace handling
for i, col in enumerate(columns):
if i < len(header_columns):
column_name = header_columns[i]
# Triple strip to ensure all possible whitespace is removed
clean_value = col.strip() if col else ""
# Special handling for Tunnel/Dest IP which may have extra spaces
if column_name == "Tunnel/Dest IP":
clean_value = clean_value.strip()
row_data[column_name] = clean_value
# Add source filename metadata
filename = os.path.basename(log_path)
gateway, timestamp = parse_filename(filename)
row_data["_source_file"] = filename
row_data["_gateway"] = gateway
row_data["_timestamp"] = timestamp
parsed_rows.append(row_data)
except Exception as e: except Exception as e:
print(f"Error parsing log file {log_path}: {e}") print(f"Error parsing log file {log_path}: {e}")