From 25cafa604425cfa59a7cb9dc618469e53ebe2e4d Mon Sep 17 00:00:00 2001 From: Srujan N Date: Mon, 22 Sep 2025 23:49:39 +0000 Subject: [PATCH] fix: remove whitelist from internal MT940 helper function --- .../bank_statement_import.py | 18 +++++++++++------ .../test_bank_statement_import.py | 20 ++++++++++++++++++- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/erpnext/accounts/doctype/bank_statement_import/bank_statement_import.py b/erpnext/accounts/doctype/bank_statement_import/bank_statement_import.py index ab393da9caa..f7380afc961 100644 --- a/erpnext/accounts/doctype/bank_statement_import/bank_statement_import.py +++ b/erpnext/accounts/doctype/bank_statement_import/bank_statement_import.py @@ -118,22 +118,27 @@ def preprocess_mt940_content(content: str) -> str: but some banks provide longer statement numbers that cause parsing errors. This function truncates statement numbers longer than 5 digits to the last 5 digits. """ - # Pattern to match :28C: field with statement number and optional sequence - pattern = r'(:28C:)(\d{6,})(/\d+)?' + # Fast-path: bail if no :28C: tag exists + if ":28C:" not in content: + return content + + # Match :28C: at start of line, capture digits and optional /seq, preserve whitespace + pattern = re.compile(r'(?m)^(:28C:)(\d{6,})(/\d+)?(\s*)$') def replace_statement_number(match): prefix = match.group(1) # ':28C:' statement_num = match.group(2) # The statement number sequence_part = match.group(3) or '' # The sequence part like '/1' + trailing_space = match.group(4) or '' # Preserve trailing whitespace # If statement number is longer than 5 digits, truncate to last 5 digits if len(statement_num) > 5: statement_num = statement_num[-5:] - return prefix + statement_num + sequence_part + return prefix + statement_num + sequence_part + trailing_space # Apply the replacement - processed_content = re.sub(pattern, replace_statement_number, content) + processed_content = pattern.sub(replace_statement_number, content) return processed_content @@ -142,10 +147,11 @@ def convert_mt940_to_csv(data_import, mt940_file_path): file_doc, content = get_file(mt940_file_path) - if not is_mt940_format(content): + is_mt940 = is_mt940_format(content) + if not is_mt940: frappe.throw(_("The uploaded file does not appear to be in valid MT940 format.")) - if is_mt940_format(content) and not doc.import_mt940_fromat: + if is_mt940 and not doc.import_mt940_fromat: frappe.throw(_("MT940 file detected. Please enable 'Import MT940 Format' to proceed.")) try: diff --git a/erpnext/accounts/doctype/bank_statement_import/test_bank_statement_import.py b/erpnext/accounts/doctype/bank_statement_import/test_bank_statement_import.py index 5e8a3646c48..f0b97480331 100644 --- a/erpnext/accounts/doctype/bank_statement_import/test_bank_statement_import.py +++ b/erpnext/accounts/doctype/bank_statement_import/test_bank_statement_import.py @@ -2,7 +2,6 @@ # See license.txt import unittest -import frappe from erpnext.accounts.doctype.bank_statement_import.bank_statement_import import ( preprocess_mt940_content, @@ -189,3 +188,22 @@ class TestBankStatementImport(unittest.TestCase): # Verify that other content remains unchanged self.assertIn(":20:STMTREF167619", result) # Reference should remain unchanged self.assertIn("UPI/TEST USER/123456789/PaidViaTestApp", result) + + def test_preprocess_mt940_content_whitespace_variants(self): + """Test handling of whitespace and different line endings""" + # Test with trailing spaces + mt940_content = ":28C:167619/1 \n" + expected_content = ":28C:67619/1 \n" + result = preprocess_mt940_content(mt940_content) + self.assertEqual(result, expected_content) + + # Test with Windows line endings (CRLF) + mt940_content = ":28C:167619/1\r\n" + expected_content = ":28C:67619/1\r\n" + result = preprocess_mt940_content(mt940_content) + self.assertEqual(result, expected_content) + + # Test with leading spaces (should not match as it's not line start) + mt940_content = " :28C:167619/1\n" + result = preprocess_mt940_content(mt940_content) + self.assertEqual(result, mt940_content) # Should remain unchanged