mirror of
https://github.com/frappe/erpnext.git
synced 2026-04-02 06:29:54 +00:00
fix: sanitize genericode import inputs and secure XML parser
(cherry picked from commit 17eb983c40)
This commit is contained in:
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING
|
||||
|
||||
import frappe
|
||||
from frappe.model.document import Document
|
||||
from frappe.utils import escape_html
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from lxml.etree import Element
|
||||
@@ -63,14 +64,16 @@ class CodeList(Document):
|
||||
|
||||
def from_genericode(self, root: "Element"):
|
||||
"""Extract Code List details from genericode XML"""
|
||||
self.title = root.find(".//Identification/ShortName").text
|
||||
self.title = escape_html(root.find(".//Identification/ShortName").text)
|
||||
self.version = root.find(".//Identification/Version").text
|
||||
self.canonical_uri = root.find(".//CanonicalUri").text
|
||||
# optionals
|
||||
self.description = getattr(root.find(".//Identification/LongName"), "text", None)
|
||||
self.publisher = getattr(root.find(".//Identification/Agency/ShortName"), "text", None)
|
||||
self.description = escape_html(getattr(root.find(".//Identification/LongName"), "text", None))
|
||||
self.publisher = escape_html(getattr(root.find(".//Identification/Agency/ShortName"), "text", None))
|
||||
if not self.publisher:
|
||||
self.publisher = getattr(root.find(".//Identification/Agency/LongName"), "text", None)
|
||||
self.publisher = escape_html(
|
||||
getattr(root.find(".//Identification/Agency/LongName"), "text", None)
|
||||
)
|
||||
self.publisher_id = getattr(root.find(".//Identification/Agency/Identifier"), "text", None)
|
||||
self.url = getattr(root.find(".//Identification/LocationUri"), "text", None)
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ import json
|
||||
import frappe
|
||||
import requests
|
||||
from frappe import _
|
||||
from frappe.utils import escape_html
|
||||
from lxml import etree
|
||||
|
||||
URL_PREFIXES = ("http://", "https://")
|
||||
@@ -32,7 +33,12 @@ def import_genericode():
|
||||
content = f.read()
|
||||
|
||||
# Parse the xml content
|
||||
parser = etree.XMLParser(remove_blank_text=True)
|
||||
parser = etree.XMLParser(
|
||||
remove_blank_text=True,
|
||||
resolve_entities=False,
|
||||
load_dtd=False,
|
||||
no_network=True,
|
||||
)
|
||||
try:
|
||||
root = etree.fromstring(content, parser=parser)
|
||||
except Exception as e:
|
||||
@@ -104,7 +110,7 @@ def get_genericode_columns_and_examples(root):
|
||||
|
||||
# Get column names
|
||||
for column in root.findall(".//Column"):
|
||||
column_id = column.get("Id")
|
||||
column_id = escape_html(column.get("Id"))
|
||||
columns.append(column_id)
|
||||
example_values[column_id] = []
|
||||
filterable_columns[column_id] = set()
|
||||
@@ -112,7 +118,7 @@ def get_genericode_columns_and_examples(root):
|
||||
# Get all values and count unique occurrences
|
||||
for row in root.findall(".//SimpleCodeList/Row"):
|
||||
for value in row.findall("Value"):
|
||||
column_id = value.get("ColumnRef")
|
||||
column_id = escape_html(value.get("ColumnRef"))
|
||||
if column_id not in columns:
|
||||
# Handle undeclared column
|
||||
columns.append(column_id)
|
||||
@@ -123,7 +129,7 @@ def get_genericode_columns_and_examples(root):
|
||||
if simple_value is None:
|
||||
continue
|
||||
|
||||
filterable_columns[column_id].add(simple_value.text)
|
||||
filterable_columns[column_id].add(escape_html(simple_value.text))
|
||||
|
||||
# Get example values (up to 3) and filter columns with cardinality <= 5
|
||||
for row in root.findall(".//SimpleCodeList/Row")[:3]:
|
||||
@@ -133,7 +139,7 @@ def get_genericode_columns_and_examples(root):
|
||||
if simple_value is None:
|
||||
continue
|
||||
|
||||
example_values[column_id].append(simple_value.text)
|
||||
example_values[column_id].append(escape_html(simple_value.text))
|
||||
|
||||
filterable_columns = {k: list(v) for k, v in filterable_columns.items() if len(v) <= 5}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user