CHANGE import of multiple log files
This commit is contained in:
parent
0e3323b7ab
commit
a3cff924ba
1 changed files with 163 additions and 75 deletions
224
main.py
224
main.py
|
@ -136,12 +136,15 @@ async def combined_view(
|
||||||
# Parse all log files and collect all rows
|
# Parse all log files and collect all rows
|
||||||
for filename in log_files:
|
for filename in log_files:
|
||||||
log_path = os.path.join(logs_dir, filename)
|
log_path = os.path.join(logs_dir, filename)
|
||||||
columns, rows = parse_log_file(log_path)
|
try:
|
||||||
|
columns, rows = parse_log_file(log_path)
|
||||||
|
|
||||||
if columns:
|
if columns:
|
||||||
common_columns.update(columns)
|
common_columns.update(columns)
|
||||||
|
|
||||||
all_rows.extend(rows)
|
all_rows.extend(rows)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing file {filename} in combined view: {e}")
|
||||||
|
|
||||||
# Apply gateway filter if specified
|
# Apply gateway filter if specified
|
||||||
if gateway:
|
if gateway:
|
||||||
|
@ -290,11 +293,14 @@ async def api_all_entries(
|
||||||
reference_columns = []
|
reference_columns = []
|
||||||
for filename in log_files:
|
for filename in log_files:
|
||||||
log_path = os.path.join(logs_dir, filename)
|
log_path = os.path.join(logs_dir, filename)
|
||||||
columns, rows = parse_log_file(log_path)
|
try:
|
||||||
if columns and not reference_columns:
|
columns, rows = parse_log_file(log_path)
|
||||||
# Save column order from first file with columns
|
if columns and not reference_columns:
|
||||||
reference_columns = columns
|
# Save column order from first file with columns
|
||||||
all_rows.extend(rows)
|
reference_columns = columns
|
||||||
|
all_rows.extend(rows)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing file {filename} in api_all_entries: {e}")
|
||||||
|
|
||||||
# Apply gateway filter if specified
|
# Apply gateway filter if specified
|
||||||
if gateway:
|
if gateway:
|
||||||
|
@ -355,13 +361,51 @@ class LogRow(BaseModel):
|
||||||
@app.get("/view/{filename}", response_class=HTMLResponse)
|
@app.get("/view/{filename}", response_class=HTMLResponse)
|
||||||
async def view_log(request: Request, filename: str):
|
async def view_log(request: Request, filename: str):
|
||||||
log_path = os.path.join(os.getcwd(), "logs", filename)
|
log_path = os.path.join(os.getcwd(), "logs", filename)
|
||||||
raw_content = ""
|
raw_content = None
|
||||||
parsed_rows = []
|
parsed_rows = []
|
||||||
header_columns = []
|
header_columns = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(log_path, "r") as file:
|
# Read the file in binary mode first to check for encodings
|
||||||
raw_content = file.read()
|
with open(log_path, "rb") as file:
|
||||||
|
binary_content = file.read()
|
||||||
|
|
||||||
|
# Check for BOM (Byte Order Mark) at the beginning of the file
|
||||||
|
raw_content = None
|
||||||
|
|
||||||
|
# Check for UTF-16 LE BOM
|
||||||
|
if binary_content.startswith(b'\xff\xfe'):
|
||||||
|
try:
|
||||||
|
raw_content = binary_content.decode('utf-16-le')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check for UTF-16 BE BOM
|
||||||
|
if raw_content is None and binary_content.startswith(b'\xfe\xff'):
|
||||||
|
try:
|
||||||
|
raw_content = binary_content.decode('utf-16-be')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Try UTF-8
|
||||||
|
if raw_content is None:
|
||||||
|
try:
|
||||||
|
raw_content = binary_content.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Try common encodings if we still don't have content
|
||||||
|
if raw_content is None:
|
||||||
|
for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
|
||||||
|
try:
|
||||||
|
raw_content = binary_content.decode(encoding)
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If all decodings fail, use latin1 as a fallback with replacement
|
||||||
|
if raw_content is None:
|
||||||
|
raw_content = binary_content.decode('latin1', errors='replace')
|
||||||
|
|
||||||
header_columns, parsed_dict_rows = parse_log_file(log_path)
|
header_columns, parsed_dict_rows = parse_log_file(log_path)
|
||||||
|
|
||||||
|
@ -427,13 +471,18 @@ def get_all_logs() -> List[LogEntry]:
|
||||||
result = []
|
result = []
|
||||||
|
|
||||||
for filename in log_files:
|
for filename in log_files:
|
||||||
gateway, timestamp = parse_filename(filename)
|
try:
|
||||||
if gateway and timestamp:
|
gateway, timestamp = parse_filename(filename)
|
||||||
result.append(LogEntry(
|
if gateway and timestamp:
|
||||||
gateway=gateway,
|
result.append(LogEntry(
|
||||||
timestamp=timestamp,
|
gateway=gateway,
|
||||||
filename=filename
|
timestamp=timestamp,
|
||||||
))
|
filename=filename
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
print(f"Could not parse filename: {filename}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing log file {filename}: {e}")
|
||||||
|
|
||||||
# Sort by timestamp descending (newest first)
|
# Sort by timestamp descending (newest first)
|
||||||
result.sort(key=lambda x: x.timestamp, reverse=True)
|
result.sort(key=lambda x: x.timestamp, reverse=True)
|
||||||
|
@ -459,65 +508,104 @@ def parse_log_file(log_path):
|
||||||
header_columns = []
|
header_columns = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(log_path, "r") as file:
|
# Read the file in binary mode first to check for encodings
|
||||||
content = file.read()
|
with open(log_path, "rb") as file:
|
||||||
lines = content.splitlines()
|
binary_content = file.read()
|
||||||
|
|
||||||
# Find the "SSL-VPN sessions:" section
|
# Check for BOM (Byte Order Mark) at the beginning of the file
|
||||||
session_section_start = None
|
content = None
|
||||||
|
|
||||||
|
# Check for UTF-16 LE BOM
|
||||||
|
if binary_content.startswith(b'\xff\xfe'):
|
||||||
|
try:
|
||||||
|
content = binary_content.decode('utf-16-le')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Check for UTF-16 BE BOM
|
||||||
|
if content is None and binary_content.startswith(b'\xfe\xff'):
|
||||||
|
try:
|
||||||
|
content = binary_content.decode('utf-16-be')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Try UTF-8
|
||||||
|
if content is None:
|
||||||
|
try:
|
||||||
|
content = binary_content.decode('utf-8')
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Try common encodings if we still don't have content
|
||||||
|
if content is None:
|
||||||
|
for encoding in ['utf-16', 'latin1', 'cp1252', 'iso-8859-1']:
|
||||||
|
try:
|
||||||
|
content = binary_content.decode(encoding)
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If all decodings fail, use latin1 as a fallback with replacement
|
||||||
|
if content is None:
|
||||||
|
content = binary_content.decode('latin1', errors='replace')
|
||||||
|
|
||||||
|
lines = content.splitlines()
|
||||||
|
|
||||||
|
# Find the "SSL-VPN sessions:" section
|
||||||
|
session_section_start = None
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if "SSL-VPN sessions:" in line:
|
||||||
|
session_section_start = i
|
||||||
|
break
|
||||||
|
|
||||||
|
if session_section_start is None:
|
||||||
|
# If SSL-VPN sessions section not found, fall back to the login users section
|
||||||
for i, line in enumerate(lines):
|
for i, line in enumerate(lines):
|
||||||
if "SSL-VPN sessions:" in line:
|
if "SSL-VPN Login Users:" in line:
|
||||||
session_section_start = i
|
session_section_start = i
|
||||||
break
|
break
|
||||||
|
|
||||||
if session_section_start is None:
|
if session_section_start is None:
|
||||||
# If SSL-VPN sessions section not found, fall back to the login users section
|
# No recognized sections found
|
||||||
for i, line in enumerate(lines):
|
return header_columns, parsed_rows
|
||||||
if "SSL-VPN Login Users:" in line:
|
|
||||||
session_section_start = i
|
# Find header line with column names (it should be right after the section title)
|
||||||
|
header_line_idx = session_section_start + 1
|
||||||
|
if header_line_idx < len(lines):
|
||||||
|
header_line = lines[header_line_idx]
|
||||||
|
if "Index" in header_line and "User" in header_line and "Group" in header_line:
|
||||||
|
# Preserve exact order of columns from file
|
||||||
|
header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
|
||||||
|
|
||||||
|
# Parse data rows
|
||||||
|
for line in lines[header_line_idx+1:]:
|
||||||
|
# Stop parsing when we hit an empty line or a new section
|
||||||
|
if not line.strip() or line.strip().endswith("#"):
|
||||||
break
|
break
|
||||||
|
|
||||||
if session_section_start is None:
|
if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
|
||||||
# No recognized sections found
|
columns = [col.strip() for col in line.split("\t") if col]
|
||||||
return header_columns, parsed_rows
|
row_data = {}
|
||||||
|
|
||||||
# Find header line with column names (it should be right after the section title)
|
# Map columns to dictionary in original order with extra whitespace handling
|
||||||
header_line_idx = session_section_start + 1
|
for i, col in enumerate(columns):
|
||||||
if header_line_idx < len(lines):
|
if i < len(header_columns):
|
||||||
header_line = lines[header_line_idx]
|
column_name = header_columns[i]
|
||||||
if "Index" in header_line and "User" in header_line and "Group" in header_line:
|
# Triple strip to ensure all possible whitespace is removed
|
||||||
# Preserve exact order of columns from file
|
clean_value = col.strip() if col else ""
|
||||||
header_columns = [col.strip() for col in header_line.split("\t") if col.strip()]
|
# Special handling for Tunnel/Dest IP which may have extra spaces
|
||||||
|
if column_name == "Tunnel/Dest IP":
|
||||||
|
clean_value = clean_value.strip()
|
||||||
|
row_data[column_name] = clean_value
|
||||||
|
|
||||||
# Parse data rows
|
# Add source filename metadata
|
||||||
for line in lines[header_line_idx+1:]:
|
filename = os.path.basename(log_path)
|
||||||
# Stop parsing when we hit an empty line or a new section
|
gateway, timestamp = parse_filename(filename)
|
||||||
if not line.strip() or line.strip().endswith("#"):
|
row_data["_source_file"] = filename
|
||||||
break
|
row_data["_gateway"] = gateway
|
||||||
|
row_data["_timestamp"] = timestamp
|
||||||
|
|
||||||
if line.strip() and not line.startswith("FBI-HQ-SSLVPN #"):
|
parsed_rows.append(row_data)
|
||||||
columns = [col.strip() for col in line.split("\t") if col]
|
|
||||||
row_data = {}
|
|
||||||
|
|
||||||
# Map columns to dictionary in original order with extra whitespace handling
|
|
||||||
for i, col in enumerate(columns):
|
|
||||||
if i < len(header_columns):
|
|
||||||
column_name = header_columns[i]
|
|
||||||
# Triple strip to ensure all possible whitespace is removed
|
|
||||||
clean_value = col.strip() if col else ""
|
|
||||||
# Special handling for Tunnel/Dest IP which may have extra spaces
|
|
||||||
if column_name == "Tunnel/Dest IP":
|
|
||||||
clean_value = clean_value.strip()
|
|
||||||
row_data[column_name] = clean_value
|
|
||||||
|
|
||||||
# Add source filename metadata
|
|
||||||
filename = os.path.basename(log_path)
|
|
||||||
gateway, timestamp = parse_filename(filename)
|
|
||||||
row_data["_source_file"] = filename
|
|
||||||
row_data["_gateway"] = gateway
|
|
||||||
row_data["_timestamp"] = timestamp
|
|
||||||
|
|
||||||
parsed_rows.append(row_data)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error parsing log file {log_path}: {e}")
|
print(f"Error parsing log file {log_path}: {e}")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue