From d7902d0477c6002b63ef02bf57baffe15b06ce04 Mon Sep 17 00:00:00 2001 From: Shllokkk Date: Tue, 10 Mar 2026 18:18:58 +0530 Subject: [PATCH] fix: sanitize genericode import inputs and secure XML parser (cherry picked from commit 17eb983c4020a0e90159a5d997215bc4df90a8fd) --- erpnext/edi/doctype/code_list/code_list.py | 11 +++++++---- .../edi/doctype/code_list/code_list_import.py | 16 +++++++++++----- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/erpnext/edi/doctype/code_list/code_list.py b/erpnext/edi/doctype/code_list/code_list.py index 8957c6565b9..e723157e7a0 100644 --- a/erpnext/edi/doctype/code_list/code_list.py +++ b/erpnext/edi/doctype/code_list/code_list.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING import frappe from frappe.model.document import Document +from frappe.utils import escape_html if TYPE_CHECKING: from lxml.etree import Element @@ -63,14 +64,16 @@ class CodeList(Document): def from_genericode(self, root: "Element"): """Extract Code List details from genericode XML""" - self.title = root.find(".//Identification/ShortName").text + self.title = escape_html(root.find(".//Identification/ShortName").text) self.version = root.find(".//Identification/Version").text self.canonical_uri = root.find(".//CanonicalUri").text # optionals - self.description = getattr(root.find(".//Identification/LongName"), "text", None) - self.publisher = getattr(root.find(".//Identification/Agency/ShortName"), "text", None) + self.description = escape_html(getattr(root.find(".//Identification/LongName"), "text", None)) + self.publisher = escape_html(getattr(root.find(".//Identification/Agency/ShortName"), "text", None)) if not self.publisher: - self.publisher = getattr(root.find(".//Identification/Agency/LongName"), "text", None) + self.publisher = escape_html( + getattr(root.find(".//Identification/Agency/LongName"), "text", None) + ) self.publisher_id = getattr(root.find(".//Identification/Agency/Identifier"), "text", None) self.url = getattr(root.find(".//Identification/LocationUri"), "text", None) diff --git a/erpnext/edi/doctype/code_list/code_list_import.py b/erpnext/edi/doctype/code_list/code_list_import.py index 3909eb22766..71cb7d0f82d 100644 --- a/erpnext/edi/doctype/code_list/code_list_import.py +++ b/erpnext/edi/doctype/code_list/code_list_import.py @@ -3,6 +3,7 @@ import json import frappe import requests from frappe import _ +from frappe.utils import escape_html from lxml import etree URL_PREFIXES = ("http://", "https://") @@ -32,7 +33,12 @@ def import_genericode(): content = f.read() # Parse the xml content - parser = etree.XMLParser(remove_blank_text=True) + parser = etree.XMLParser( + remove_blank_text=True, + resolve_entities=False, + load_dtd=False, + no_network=True, + ) try: root = etree.fromstring(content, parser=parser) except Exception as e: @@ -104,7 +110,7 @@ def get_genericode_columns_and_examples(root): # Get column names for column in root.findall(".//Column"): - column_id = column.get("Id") + column_id = escape_html(column.get("Id")) columns.append(column_id) example_values[column_id] = [] filterable_columns[column_id] = set() @@ -112,7 +118,7 @@ def get_genericode_columns_and_examples(root): # Get all values and count unique occurrences for row in root.findall(".//SimpleCodeList/Row"): for value in row.findall("Value"): - column_id = value.get("ColumnRef") + column_id = escape_html(value.get("ColumnRef")) if column_id not in columns: # Handle undeclared column columns.append(column_id) @@ -123,7 +129,7 @@ def get_genericode_columns_and_examples(root): if simple_value is None: continue - filterable_columns[column_id].add(simple_value.text) + filterable_columns[column_id].add(escape_html(simple_value.text)) # Get example values (up to 3) and filter columns with cardinality <= 5 for row in root.findall(".//SimpleCodeList/Row")[:3]: @@ -133,7 +139,7 @@ def get_genericode_columns_and_examples(root): if simple_value is None: continue - example_values[column_id].append(simple_value.text) + example_values[column_id].append(escape_html(simple_value.text)) filterable_columns = {k: list(v) for k, v in filterable_columns.items() if len(v) <= 5}