fix: sanitize genericode import inputs and secure XML parser

(cherry picked from commit 17eb983c40)
This commit is contained in:
Shllokkk
2026-03-10 18:18:58 +05:30
committed by Mergify
parent e78386f49a
commit d7902d0477
2 changed files with 18 additions and 9 deletions

View File

@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING
import frappe
from frappe.model.document import Document
from frappe.utils import escape_html
if TYPE_CHECKING:
from lxml.etree import Element
@@ -63,14 +64,16 @@ class CodeList(Document):
def from_genericode(self, root: "Element"):
"""Extract Code List details from genericode XML"""
self.title = root.find(".//Identification/ShortName").text
self.title = escape_html(root.find(".//Identification/ShortName").text)
self.version = root.find(".//Identification/Version").text
self.canonical_uri = root.find(".//CanonicalUri").text
# optionals
self.description = getattr(root.find(".//Identification/LongName"), "text", None)
self.publisher = getattr(root.find(".//Identification/Agency/ShortName"), "text", None)
self.description = escape_html(getattr(root.find(".//Identification/LongName"), "text", None))
self.publisher = escape_html(getattr(root.find(".//Identification/Agency/ShortName"), "text", None))
if not self.publisher:
self.publisher = getattr(root.find(".//Identification/Agency/LongName"), "text", None)
self.publisher = escape_html(
getattr(root.find(".//Identification/Agency/LongName"), "text", None)
)
self.publisher_id = getattr(root.find(".//Identification/Agency/Identifier"), "text", None)
self.url = getattr(root.find(".//Identification/LocationUri"), "text", None)

View File

@@ -3,6 +3,7 @@ import json
import frappe
import requests
from frappe import _
from frappe.utils import escape_html
from lxml import etree
URL_PREFIXES = ("http://", "https://")
@@ -32,7 +33,12 @@ def import_genericode():
content = f.read()
# Parse the xml content
parser = etree.XMLParser(remove_blank_text=True)
parser = etree.XMLParser(
remove_blank_text=True,
resolve_entities=False,
load_dtd=False,
no_network=True,
)
try:
root = etree.fromstring(content, parser=parser)
except Exception as e:
@@ -104,7 +110,7 @@ def get_genericode_columns_and_examples(root):
# Get column names
for column in root.findall(".//Column"):
column_id = column.get("Id")
column_id = escape_html(column.get("Id"))
columns.append(column_id)
example_values[column_id] = []
filterable_columns[column_id] = set()
@@ -112,7 +118,7 @@ def get_genericode_columns_and_examples(root):
# Get all values and count unique occurrences
for row in root.findall(".//SimpleCodeList/Row"):
for value in row.findall("Value"):
column_id = value.get("ColumnRef")
column_id = escape_html(value.get("ColumnRef"))
if column_id not in columns:
# Handle undeclared column
columns.append(column_id)
@@ -123,7 +129,7 @@ def get_genericode_columns_and_examples(root):
if simple_value is None:
continue
filterable_columns[column_id].add(simple_value.text)
filterable_columns[column_id].add(escape_html(simple_value.text))
# Get example values (up to 3) and filter columns with cardinality <= 5
for row in root.findall(".//SimpleCodeList/Row")[:3]:
@@ -133,7 +139,7 @@ def get_genericode_columns_and_examples(root):
if simple_value is None:
continue
example_values[column_id].append(simple_value.text)
example_values[column_id].append(escape_html(simple_value.text))
filterable_columns = {k: list(v) for k, v in filterable_columns.items() if len(v) <= 5}