diff --git a/erpnext/edi/doctype/code_list/code_list_import.js b/erpnext/edi/doctype/code_list/code_list_import.js index 4a33f3e2fe6..917e815fc97 100644 --- a/erpnext/edi/doctype/code_list/code_list_import.js +++ b/erpnext/edi/doctype/code_list/code_list_import.js @@ -10,6 +10,7 @@ erpnext.edi.import_genericode = function (listview_or_form) { method: "erpnext.edi.doctype.code_list.code_list_import.import_genericode", doctype: doctype, docname: docname, + allow_web_link: false, allow_toggle_private: false, allow_take_photo: false, on_success: function (_file_doc, r) { diff --git a/erpnext/edi/doctype/code_list/code_list_import.py b/erpnext/edi/doctype/code_list/code_list_import.py index 7368d3c012e..20fa7c453b4 100644 --- a/erpnext/edi/doctype/code_list/code_list_import.py +++ b/erpnext/edi/doctype/code_list/code_list_import.py @@ -1,48 +1,118 @@ import json +from urllib.parse import urlsplit import frappe import requests from frappe import _ from frappe.utils import escape_html +from frappe.utils.file_manager import save_file from lxml import etree -URL_PREFIXES = ("http://", "https://") +GENERICODE_FETCH_TIMEOUT = 15 +LOCAL_FILE_PREFIXES = ("/files/", "/private/files/") + + +class RemoteGenericodeUrlNotAllowedError(Exception): + pass + + +class CodeListSelectionMismatchError(Exception): + pass @frappe.whitelist() def import_genericode(): - doctype = "Code List" - docname = frappe.form_dict.docname - content = frappe.local.uploaded_file - - # recover the content, if it's a link - if (file_url := frappe.local.uploaded_file_url) and file_url.startswith(URL_PREFIXES): - try: - # If it's a URL, fetch the content and make it a local file (for durable audit) - response = requests.get(frappe.local.uploaded_file_url) - response.raise_for_status() - frappe.local.uploaded_file = content = response.content - frappe.local.uploaded_filename = frappe.local.uploaded_file_url.split("/")[-1] - frappe.local.uploaded_file_url = None - except Exception as e: - frappe.throw(f"
{e!s}", title=_("Fetching Error"))
-
- if file_url := frappe.local.uploaded_file_url:
- file_path = frappe.utils.file_manager.get_file_path(file_url)
- with open(file_path.encode(), mode="rb") as f:
- content = f.read()
-
- # Parse the xml content
- parser = etree.XMLParser(
- remove_blank_text=True,
- resolve_entities=False,
- load_dtd=False,
- no_network=True,
- )
try:
- root = etree.fromstring(content, parser=parser)
- except Exception as e:
- frappe.throw(f"{e!s}", title=_("Parsing Error"))
+ content, file_name = get_uploaded_genericode_file()
+
+ return import_genericode_content(
+ doctype="Code List",
+ docname=frappe.form_dict.docname,
+ content=content,
+ file_name=file_name,
+ )
+ except RemoteGenericodeUrlNotAllowedError:
+ frappe.throw(
+ _("Importing Code Lists from remote URLs is not allowed."),
+ title=_("Invalid Upload"),
+ )
+ except CodeListSelectionMismatchError:
+ frappe.throw(_("The uploaded file does not match the selected Code List."))
+ except etree.XMLSyntaxError:
+ frappe.throw(
+ _("The uploaded file could not be parsed as a genericode XML document."),
+ title=_("Parsing Error"),
+ )
+
+
+def import_genericode_from_url(
+ url: str,
+ doctype: str = "Code List",
+ docname: str | None = None,
+):
+ """Import a Code List from a trusted backend URL."""
+ content = fetch_genericode_from_url(url)
+ file_name = urlsplit(url).path.rsplit("/", 1)[-1] or "genericode.xml"
+
+ return import_genericode_content(
+ doctype=doctype,
+ docname=docname,
+ content=content,
+ file_name=file_name,
+ )
+
+
+def get_uploaded_genericode_file() -> tuple[bytes, str | None]:
+ uploaded_data = frappe.local.uploaded_file
+ file_name = frappe.local.uploaded_filename
+ if uploaded_data and file_name:
+ return uploaded_data, file_name
+
+ file_url = frappe.local.uploaded_file_url
+ if not file_url:
+ raise frappe.ValidationError(_("No file uploaded or URL provided."))
+
+ if not is_local_file_url(file_url):
+ raise RemoteGenericodeUrlNotAllowedError
+
+ file_doc = frappe.get_doc("File", {"file_url": file_url})
+ file_doc.check_permission("read")
+ return read_file_bytes(file_doc), file_name
+
+
+def read_file_bytes(file_doc) -> bytes:
+ """Return the raw bytes of a File document.
+
+ v15's `File.get_content` eagerly decodes to utf-8 and returns `str` for text
+ files, but `lxml.etree.fromstring` needs bytes when the XML declares an encoding.
+ """
+ content = file_doc.get_content()
+ if isinstance(content, str):
+ content = content.encode("utf-8")
+ return content
+
+
+def is_local_file_url(file_url: str | None) -> bool:
+ if not file_url:
+ return False
+
+ parsed = urlsplit(file_url.strip())
+ return not parsed.scheme and not parsed.netloc and parsed.path.startswith(LOCAL_FILE_PREFIXES)
+
+
+def fetch_genericode_from_url(url: str) -> bytes:
+ response = requests.get(url, timeout=GENERICODE_FETCH_TIMEOUT)
+ response.raise_for_status()
+ return response.content
+
+
+def import_genericode_content(
+ doctype: str,
+ docname: str | None,
+ content: bytes,
+ file_name: str | None,
+):
+ root = parse_genericode_content(content)
# Extract the name (CanonicalVersionUri) from the parsed XML
name = root.find(".//CanonicalVersionUri").text
@@ -51,7 +121,7 @@ def import_genericode():
if frappe.db.exists(doctype, docname):
code_list = frappe.get_doc(doctype, docname)
if code_list.name != name:
- frappe.throw(_("The uploaded file does not match the selected Code List."))
+ raise CodeListSelectionMismatchError
else:
# Create a new Code List document with the extracted name
code_list = frappe.new_doc(doctype)
@@ -60,19 +130,13 @@ def import_genericode():
code_list.from_genericode(root)
code_list.save()
- # Attach the file and provide a recoverable identifier
- file_doc = frappe.get_doc(
- {
- "doctype": "File",
- "attached_to_doctype": "Code List",
- "attached_to_name": code_list.name,
- "folder": frappe.db.get_value("File", {"is_attachments_folder": 1}),
- "file_name": frappe.local.uploaded_filename,
- "file_url": frappe.local.uploaded_file_url,
- "is_private": 1,
- "content": content,
- }
- ).save()
+ file_doc = save_file(
+ fname=file_name,
+ content=content,
+ dt=doctype,
+ dn=code_list.name,
+ is_private=1,
+ )
# Get available columns and example values
columns, example_values, filterable_columns = get_genericode_columns_and_examples(root)
@@ -87,6 +151,16 @@ def import_genericode():
}
+def parse_genericode_content(content: bytes):
+ parser = etree.XMLParser(
+ remove_blank_text=True,
+ resolve_entities=False,
+ load_dtd=False,
+ no_network=True,
+ )
+ return etree.fromstring(content, parser=parser)
+
+
@frappe.whitelist()
def process_genericode_import(
code_list_name: str,
diff --git a/erpnext/edi/doctype/code_list/test_code_list_import.py b/erpnext/edi/doctype/code_list/test_code_list_import.py
new file mode 100644
index 00000000000..a8eb721ea1f
--- /dev/null
+++ b/erpnext/edi/doctype/code_list/test_code_list_import.py
@@ -0,0 +1,200 @@
+# Copyright (c) 2024, Frappe Technologies Pvt. Ltd. and Contributors
+# See license.txt
+
+from unittest.mock import Mock, patch
+
+import frappe
+import requests
+from frappe.tests.utils import FrappeTestCase
+
+from erpnext.edi.doctype.code_list import code_list_import
+
+SAMPLE_GENERICODE = b"""
+