From 25cafa604425cfa59a7cb9dc618469e53ebe2e4d Mon Sep 17 00:00:00 2001
From: Srujan N <srujan.00123@gmail.com>
Date: Mon, 22 Sep 2025 23:49:39 +0000
Subject: [PATCH] fix: remove whitelist from internal MT940 helper function

---
 .../bank_statement_import.py                  | 18 +++++++++++------
 .../test_bank_statement_import.py             | 20 ++++++++++++++++++-
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/erpnext/accounts/doctype/bank_statement_import/bank_statement_import.py b/erpnext/accounts/doctype/bank_statement_import/bank_statement_import.py
index ab393da9caa..f7380afc961 100644
--- a/erpnext/accounts/doctype/bank_statement_import/bank_statement_import.py
+++ b/erpnext/accounts/doctype/bank_statement_import/bank_statement_import.py
@@ -118,22 +118,27 @@ def preprocess_mt940_content(content: str) -> str:
 	but some banks provide longer statement numbers that cause parsing errors.
 	This function truncates statement numbers longer than 5 digits to the last 5 digits.
 	"""
-	# Pattern to match :28C: field with statement number and optional sequence
-	pattern = r'(:28C:)(\d{6,})(/\d+)?'
+	# Fast-path: bail if no :28C: tag exists
+	if ":28C:" not in content:
+		return content
+
+	# Match :28C: at start of line, capture digits and optional /seq, preserve whitespace
+	pattern = re.compile(r'(?m)^(:28C:)(\d{6,})(/\d+)?(\s*)$')
 
 	def replace_statement_number(match):
 		prefix = match.group(1)  # ':28C:'
 		statement_num = match.group(2)  # The statement number
 		sequence_part = match.group(3) or ''  # The sequence part like '/1'
+		trailing_space = match.group(4) or ''  # Preserve trailing whitespace
 
 		# If statement number is longer than 5 digits, truncate to last 5 digits
 		if len(statement_num) > 5:
 			statement_num = statement_num[-5:]
 
-		return prefix + statement_num + sequence_part
+		return prefix + statement_num + sequence_part + trailing_space
 
 	# Apply the replacement
-	processed_content = re.sub(pattern, replace_statement_number, content)
+	processed_content = pattern.sub(replace_statement_number, content)
 	return processed_content
 
 
@@ -142,10 +147,11 @@ def convert_mt940_to_csv(data_import, mt940_file_path):
 
 	file_doc, content = get_file(mt940_file_path)
 
-	if not is_mt940_format(content):
+	is_mt940 = is_mt940_format(content)
+	if not is_mt940:
 		frappe.throw(_("The uploaded file does not appear to be in valid MT940 format."))
 
-	if is_mt940_format(content) and not doc.import_mt940_fromat:
+	if is_mt940 and not doc.import_mt940_fromat:
 		frappe.throw(_("MT940 file detected. Please enable 'Import MT940 Format' to proceed."))
 
 	try:
diff --git a/erpnext/accounts/doctype/bank_statement_import/test_bank_statement_import.py b/erpnext/accounts/doctype/bank_statement_import/test_bank_statement_import.py
index 5e8a3646c48..f0b97480331 100644
--- a/erpnext/accounts/doctype/bank_statement_import/test_bank_statement_import.py
+++ b/erpnext/accounts/doctype/bank_statement_import/test_bank_statement_import.py
@@ -2,7 +2,6 @@
 # See license.txt
 
 import unittest
-import frappe
 
 from erpnext.accounts.doctype.bank_statement_import.bank_statement_import import (
 	preprocess_mt940_content,
@@ -189,3 +188,22 @@ class TestBankStatementImport(unittest.TestCase):
 		# Verify that other content remains unchanged
 		self.assertIn(":20:STMTREF167619", result)  # Reference should remain unchanged
 		self.assertIn("UPI/TEST USER/123456789/PaidViaTestApp", result)
+
+	def test_preprocess_mt940_content_whitespace_variants(self):
+		"""Test handling of whitespace and different line endings"""
+		# Test with trailing spaces
+		mt940_content = ":28C:167619/1   \n"
+		expected_content = ":28C:67619/1   \n"
+		result = preprocess_mt940_content(mt940_content)
+		self.assertEqual(result, expected_content)
+
+		# Test with Windows line endings (CRLF)
+		mt940_content = ":28C:167619/1\r\n"
+		expected_content = ":28C:67619/1\r\n"
+		result = preprocess_mt940_content(mt940_content)
+		self.assertEqual(result, expected_content)
+
+		# Test with leading spaces (should not match as it's not line start)
+		mt940_content = "   :28C:167619/1\n"
+		result = preprocess_mt940_content(mt940_content)
+		self.assertEqual(result, mt940_content)  # Should remain unchanged