CHANGE import of multiple log files

This commit is contained in:
CaffeineFueled 2025-04-15 07:11:30 +02:00
parent 0e3323b7ab
commit a3cff924ba

238
main.py
View file

@ -136,12 +136,15 @@ async def combined_view(
# Parse all log files and collect all rows
for filename in log_files:
log_path = os.path.join(logs_dir, filename)
columns, rows = parse_log_file(log_path)
if columns:
common_columns.update(columns)
all_rows.extend(rows)
try:
columns, rows = parse_log_file(log_path)
if columns:
common_columns.update(columns)
all_rows.extend(rows)
except Exception as e:
print(f"Error processing file {filename} in combined view: {e}")
# Apply gateway filter if specified
if gateway:
@ -290,11 +293,14 @@ async def api_all_entries(
reference_columns = []
for filename in log_files:
log_path = os.path.join(logs_dir, filename)
columns, rows = parse_log_file(log_path)
if columns and not reference_columns:
# Save column order from first file with columns
reference_columns = columns
all_rows.extend(rows)
try:
columns, rows = parse_log_file(log_path)
if columns and not reference_columns:
# Save column order from first file with columns
reference_columns = columns
all_rows.extend(rows)
except Exception as e:
print(f"Error processing file {filename} in api_all_entries: {e}")
# Apply gateway filter if specified
if gateway:
@ -355,13 +361,51 @@ class LogRow(BaseModel):
@app.get("/view/{filename}", response_class=HTMLResponse)
async def view_log(request: Request, filename: str):
log_path = os.path.join(os.getcwd(), "logs", filename)
raw_content = ""
raw_content = None
parsed_rows = []
header_columns = []
try:
with open(log_path, "r") as file:
raw_content = file.read()
# Read the file in binary mode first to check for encodings
with open(log_path, "rb") as file:
binary_content = file.read()
# Check for BOM (Byte Order Mark) at the beginning of the file
raw_content = None
# Check for UTF-16 LE BOM
if binary_content.startswith(b'\xff\xfe'):
try:
raw_content = binary_content.decode('utf-16-le')
except UnicodeDecodeError:
pass
# Check for UTF-16 BE BOM
if raw_content is None and binary_content.startswith(b'\xfe\xff'):
try:
raw_content = binary_content.decode('utf-16-be')
except UnicodeDecodeError:
pass
# Try UTF-8
if raw_content is None:
try:
raw_content = binary_content.decode('utf-8')
except UnicodeDecodeError:
pass
# Try common encodings if we still don't have content
if raw_content is None:
for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
try:
raw_content = binary_content.decode(encoding)
break
except UnicodeDecodeError:
continue
# If all decodings fail, use latin1 as a fallback with replacement
if raw_content is None:
raw_content = binary_content.decode('latin1', errors='replace')
header_columns, parsed_dict_rows = parse_log_file(log_path)
@ -427,13 +471,18 @@ def get_all_logs() -> List[LogEntry]:
result = []
for filename in log_files:
gateway, timestamp = parse_filename(filename)
if gateway and timestamp:
result.append(LogEntry(
gateway=gateway,
timestamp=timestamp,
filename=filename
))
try:
gateway, timestamp = parse_filename(filename)
if gateway and timestamp:
result.append(LogEntry(
gateway=gateway,
timestamp=timestamp,
filename=filename
))
else:
print(f"Could not parse filename: {filename}")
except Exception as e:
print(f"Error processing log file {filename}: {e}")
# Sort by timestamp descending (newest first)
result.sort(key=lambda x: x.timestamp, reverse=True)
@ -459,65 +508,104 @@ def parse_log_file(log_path):
header_columns = []
try:
with open(log_path, "r") as file:
content = file.read()
lines = content.splitlines()
# Find the "SSL-VPN sessions:" section
session_section_start = None
# Read the file in binary mode first to check for encodings
with open(log_path, "rb") as file:
binary_content = file.read()
# Check for BOM (Byte Order Mark) at the beginning of the file
content = None
# Check for UTF-16 LE BOM
if binary_content.startswith(b'\xff\xfe'):
try:
content = binary_content.decode('utf-16-le')
except UnicodeDecodeError:
pass
# Check for UTF-16 BE BOM
if content is None and binary_content.startswith(b'\xfe\xff'):
try:
content = binary_content.decode('utf-16-be')
except UnicodeDecodeError:
pass
# Try UTF-8
if content is None:
try:
content = binary_content.decode('utf-8')
except UnicodeDecodeError:
pass
# Try common encodings if we still don't have content
if content is None:
for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
try:
content = binary_content.decode(encoding)
break
except UnicodeDecodeError:
continue
# If all decodings fail, use latin1 as a fallback with replacement
if content is None:
content = binary_content.decode('latin1', errors='replace')
lines = content.splitlines()
# Find the "SSL-VPN sessions:" section
session_section_start = None
for i, line in enumerate(lines):
if "SSL-VPN sessions:" in line:
session_section_start = i
break
if session_section_start is None:
# If SSL-VPN sessions section not found, fall back to the login users section
for i, line in enumerate(lines):
if "SSL-VPN sessions:" in line:
if "SSL-VPN Login Users:" in line:
session_section_start = i
break
if session_section_start is None:
# No recognized sections found
return header_columns, parsed_rows
# Find header line with column names (it should be right after the section title)
header_line_idx = session_section_start + 1
if header_line_idx < len(lines):
header_line = lines[header_line_idx]
if "Index" in header_line and "User" in header_line and "Group" in header_line:
# Preserve exact order of columns from file
header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
if session_section_start is None:
# If SSL-VPN sessions section not found, fall back to the login users section
for i, line in enumerate(lines):
if "SSL-VPN Login Users:" in line:
session_section_start = i
# Parse data rows
for line in lines[header_line_idx+1:]:
# Stop parsing when we hit an empty line or a new section
if not line.strip() or line.strip().endswith("#"):
break
if session_section_start is None:
# No recognized sections found
return header_columns, parsed_rows
# Find header line with column names (it should be right after the section title)
header_line_idx = session_section_start + 1
if header_line_idx < len(lines):
header_line = lines[header_line_idx]
if "Index" in header_line and "User" in header_line and "Group" in header_line:
# Preserve exact order of columns from file
header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
# Parse data rows
for line in lines[header_line_idx+1:]:
# Stop parsing when we hit an empty line or a new section
if not line.strip() or line.strip().endswith("#"):
break
if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
columns = [col.strip() for col in line.split("\t") if col]
row_data = {}
# Map columns to dictionary in original order with extra whitespace handling
for i, col in enumerate(columns):
if i < len(header_columns):
column_name = header_columns[i]
# Triple strip to ensure all possible whitespace is removed
clean_value = col.strip() if col else ""
# Special handling for Tunnel/Dest IP which may have extra spaces
if column_name == "Tunnel/Dest IP":
clean_value = clean_value.strip()
row_data[column_name] = clean_value
# Add source filename metadata
filename = os.path.basename(log_path)
gateway, timestamp = parse_filename(filename)
row_data["_source_file"] = filename
row_data["_gateway"] = gateway
row_data["_timestamp"] = timestamp
parsed_rows.append(row_data)
if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
columns = [col.strip() for col in line.split("\t") if col]
row_data = {}
# Map columns to dictionary in original order with extra whitespace handling
for i, col in enumerate(columns):
if i < len(header_columns):
column_name = header_columns[i]
# Triple strip to ensure all possible whitespace is removed
clean_value = col.strip() if col else ""
# Special handling for Tunnel/Dest IP which may have extra spaces
if column_name == "Tunnel/Dest IP":
clean_value = clean_value.strip()
row_data[column_name] = clean_value
# Add source filename metadata
filename = os.path.basename(log_path)
gateway, timestamp = parse_filename(filename)
row_data["_source_file"] = filename
row_data["_gateway"] = gateway
row_data["_timestamp"] = timestamp
parsed_rows.append(row_data)
except Exception as e:
print(f"Error parsing log file {log_path}: {e}")