From 329449004017d7ce7c6566510a13756d43e17d43 Mon Sep 17 00:00:00 2001 From: Nikhil Kothari Date: Wed, 3 Jun 2026 19:48:14 +0530 Subject: [PATCH] feat(banking): PDF statement importer and overriding column mapping (#55559) * feat(banking): PDF statement importer * feat(banking): allow users to override column mapping * fix: store pending page images in flags --- .../BankStatementImporter/CSV/CSVImport.tsx | 6 +- .../CSV/CSVRawDataPreview.tsx | 223 ++-- .../CSV/StatementDetails.tsx | 15 +- .../BankStatementImporter/PDF/BBoxOverlay.tsx | 129 ++ .../BankStatementImporter/PDF/PDFImport.tsx | 23 + .../PDF/PDFTableEditor.tsx | 362 ++++++ .../BankStatementImporter/RawTableGrid.tsx | 222 ++++ .../BankStatementImporter/import_utils.ts | 114 +- banking/src/components/ui/file-dropzone.tsx | 2 +- banking/src/pages/BankStatementImporter.tsx | 48 +- .../src/pages/ViewBankStatementImportLog.tsx | 11 +- banking/src/types/Accounts/BankAccount.ts | 2 + .../types/Accounts/BankStatementImportLog.ts | 2 + .../doctype/bank_account/bank_account.json | 7 + .../doctype/bank_account/bank_account.py | 1 + .../bank_statement_import_log.json | 10 +- .../bank_statement_import_log.py | 1125 +++++++++++++---- .../test_bank_statement_import_log.py | 351 +++++ pyproject.toml | 5 + 19 files changed, 2238 insertions(+), 420 deletions(-) create mode 100644 banking/src/components/features/BankStatementImporter/PDF/BBoxOverlay.tsx create mode 100644 banking/src/components/features/BankStatementImporter/PDF/PDFImport.tsx create mode 100644 banking/src/components/features/BankStatementImporter/PDF/PDFTableEditor.tsx create mode 100644 banking/src/components/features/BankStatementImporter/RawTableGrid.tsx diff --git a/banking/src/components/features/BankStatementImporter/CSV/CSVImport.tsx b/banking/src/components/features/BankStatementImporter/CSV/CSVImport.tsx index 63b7df824d0..174f1401b1c 100644 --- a/banking/src/components/features/BankStatementImporter/CSV/CSVImport.tsx +++ b/banking/src/components/features/BankStatementImporter/CSV/CSVImport.tsx @@ -2,9 +2,7 @@ import CSVRawDataPreview from './CSVRawDataPreview' import StatementDetails from './StatementDetails' import { GetStatementDetailsResponse } from '../import_utils' -const CSVImport = ({ data }: { data: { message: GetStatementDetailsResponse } }) => { - - +const CSVImport = ({ data, mutate }: { data: { message: GetStatementDetailsResponse }, mutate: () => void }) => { return (
@@ -12,7 +10,7 @@ const CSVImport = ({ data }: { data: { message: GetStatementDetailsResponse } })
- +
) diff --git a/banking/src/components/features/BankStatementImporter/CSV/CSVRawDataPreview.tsx b/banking/src/components/features/BankStatementImporter/CSV/CSVRawDataPreview.tsx index 31a00a90694..d910707e9af 100644 --- a/banking/src/components/features/BankStatementImporter/CSV/CSVRawDataPreview.tsx +++ b/banking/src/components/features/BankStatementImporter/CSV/CSVRawDataPreview.tsx @@ -1,151 +1,104 @@ -import { Table, TableBody, TableCell, TableHead, TableRow } from "@/components/ui/table" -import { cn } from "@/lib/utils" -import { ArrowDownRightIcon, ArrowUpDownIcon, ArrowUpRightIcon, BanknoteIcon, CalendarIcon, DollarSignIcon, FileTextIcon, ListIcon, ReceiptIcon } from "lucide-react" -import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip" +import { useEffect, useRef, useState } from "react" +import { toast } from "sonner" import _ from "@/lib/translate" -import { GetStatementDetailsResponse } from "../import_utils" -import { useMemo } from "react" +import RawTableGrid from "../RawTableGrid" +import { + applyColumnMappingChange, + ColumnMapsTo, + GetStatementDetailsResponse, + useSetHeaderIndex, + useUpdateColumnMapping, +} from "../import_utils" import { BankStatementImportLogColumnMap } from "@/types/Accounts/BankStatementImportLogColumnMap" +type Mapping = Pick -const CSVRawDataPreview = ({ data }: { data: GetStatementDetailsResponse }) => { +const toMapping = (columns?: BankStatementImportLogColumnMap[]): Mapping[] => + (columns ?? []).map((c) => ({ + index: c.index, + maps_to: c.maps_to, + header_text: c.header_text, + variable: c.variable, + })) - const column_mapping: Record = useMemo(() => { +const headerToState = (index?: number) => (index != null && index >= 0 ? index : null) - const col_map: Record = {} +const CSVRawDataPreview = ({ + data, + mutate, +}: { + data: GetStatementDetailsResponse + mutate: () => void +}) => { + const isCompleted = data.doc.status === "Completed" - data.doc.column_mapping?.forEach(col => { - if (col.maps_to && col.maps_to !== "Do not import") { - col_map[col.maps_to] = col.index; - } - }) + const [mapping, setMapping] = useState(() => toMapping(data.doc.column_mapping)) + const [headerIndex, setHeaderIndex] = useState(() => + headerToState(data.doc.detected_header_index), + ) - return col_map + const { call: updateMapping, loading: savingMapping } = useUpdateColumnMapping() + const { call: setHeader, loading: savingHeader } = useSetHeaderIndex() - }, [data]) + const mappingRef = useRef(mapping) + const saveTimer = useRef>(undefined) - const validColumns = Object.values(column_mapping) + useEffect(() => () => clearTimeout(saveTimer.current), []) - // Reverse the column mapping to get a map of column index to variable name - const columnIndexMap: Record = Object.fromEntries(Object.entries(column_mapping).map(([variable, columnIndex]) => [columnIndex, variable as StandardColumnTypes])) + const columnMappingRecord: Record = {} + mapping.forEach((c) => { + if (c.maps_to) columnMappingRecord[c.index] = c.maps_to as ColumnMapsTo + }) + + const commitMapping = (next: Mapping[]) => { + mappingRef.current = next + setMapping(next) + } + + // Persist mapping edits (debounced) so the transaction preview updates in realtime. + const scheduleSaveMapping = () => { + if (isCompleted) return + clearTimeout(saveTimer.current) + saveTimer.current = setTimeout(() => { + updateMapping({ statement_import_id: data.doc.name, column_mapping: mappingRef.current }) + .then(() => mutate()) + .catch(() => toast.error(_("Could not save the column mapping."))) + }, 500) + } + + const onChangeMapping = (columnIndex: number, mapsTo: ColumnMapsTo) => { + if (isCompleted) return + commitMapping(applyColumnMappingChange(mappingRef.current, columnIndex, mapsTo)) + scheduleSaveMapping() + } + + const onSetHeader = (rowIndex: number | null) => { + if (isCompleted) return + setHeaderIndex(rowIndex) + setHeader({ statement_import_id: data.doc.name, header_index: rowIndex ?? -1 }) + .then((res) => { + // The backend re-derives the mapping for the new header; sync local state. + const doc = res?.message?.doc + if (doc) { + commitMapping(toMapping(doc.column_mapping)) + setHeaderIndex(headerToState(doc.detected_header_index)) + } + mutate() + }) + .catch(() => toast.error(_("Could not update the header row."))) + } - // Loop over the contents of the CSV file and show a preview - highlight the header row and the transaction rows return ( - - - {data.raw_data.map((row, index) => { - - const isHeaderRow = index === data.doc.detected_header_index; - const isTransactionRow = index >= (data.doc.detected_transaction_starting_index ?? 0) && index <= (data.doc.detected_transaction_ending_index ?? 0); - - return - {isHeaderRow ? - {index + 1} - : - - {index + 1} - - } - {row.map((cell, cellIndex) => { - - const isValidColumn = validColumns.includes(cellIndex); - const columnType = columnIndexMap[cellIndex]; - const isAmountColumn = ["Amount", "Withdrawal", "Deposit", "Balance"].includes(columnType); - - if (isHeaderRow) { - return -
- {columnType && - - - - - {_(columnType)} - - - } - {cell} -
-
- } else { - return -
- {cell} -
-
- } - } - - )} -
- })} -
-
+ ) } -type StandardColumnTypes = BankStatementImportLogColumnMap['maps_to']; - -const ColumnHeaderIcon = ({ columnType }: { columnType?: StandardColumnTypes }) => { - if (!columnType) { - return null - } - - if (columnType === 'Amount') { - return - } - - if (columnType === 'Withdrawal') { - return - } - - if (columnType === 'Deposit') { - return - } - - if (columnType === 'Balance') { - return - } - - if (columnType === 'Date') { - return - } - - if (columnType === 'Description') { - return - } - - if (columnType === 'Reference') { - return - } - - if (columnType === 'Transaction Type') { - return - } - - if (columnType === 'Debit/Credit') { - return - } - - return null -} - -export default CSVRawDataPreview \ No newline at end of file +export default CSVRawDataPreview diff --git a/banking/src/components/features/BankStatementImporter/CSV/StatementDetails.tsx b/banking/src/components/features/BankStatementImporter/CSV/StatementDetails.tsx index 74f40eb7e33..588527ed9df 100644 --- a/banking/src/components/features/BankStatementImporter/CSV/StatementDetails.tsx +++ b/banking/src/components/features/BankStatementImporter/CSV/StatementDetails.tsx @@ -142,11 +142,16 @@ const StatementDetails = ({ data }: Props) => {
- {bank?.account_name} - {bank?.account} + {bank?.account_name}
+ + {_("Account")} + + {bank?.account} + + {_("Statement File")} @@ -158,7 +163,11 @@ const StatementDetails = ({ data }: Props) => { {_("Transaction Dates")} - {_("{0} to {1}", [formatDate(data.doc.start_date, "Do MMMM YYYY"), formatDate(data.doc.end_date, "Do MMMM YYYY")])} + {data.doc.start_date && data.doc.end_date ? ( + {_("{0} to {1}", [formatDate(data.doc.start_date, "Do MMMM YYYY"), formatDate(data.doc.end_date, "Do MMMM YYYY")])} + ) : ( + - + )} {_("Number of Transactions")} diff --git a/banking/src/components/features/BankStatementImporter/PDF/BBoxOverlay.tsx b/banking/src/components/features/BankStatementImporter/PDF/BBoxOverlay.tsx new file mode 100644 index 00000000000..8da19caa666 --- /dev/null +++ b/banking/src/components/features/BankStatementImporter/PDF/BBoxOverlay.tsx @@ -0,0 +1,129 @@ +import { RefObject, useEffect, useRef, useState } from 'react' +import { cn } from '@/lib/utils' + +type Bbox = [number, number, number, number] + +const MIN_SIZE = 8 // PDF points + +// Keep the box valid: normalise flipped edges, enforce a min size, clamp to the page. +const clampBbox = (bbox: Bbox, pageWidth: number, pageHeight: number): Bbox => { + let [x0, top, x1, bottom] = bbox + if (x1 < x0) [x0, x1] = [x1, x0] + if (bottom < top) [top, bottom] = [bottom, top] + x0 = Math.max(0, Math.min(x0, pageWidth - MIN_SIZE)) + top = Math.max(0, Math.min(top, pageHeight - MIN_SIZE)) + x1 = Math.min(pageWidth, Math.max(x1, x0 + MIN_SIZE)) + bottom = Math.min(pageHeight, Math.max(bottom, top + MIN_SIZE)) + return [x0, top, x1, bottom] +} + +const HANDLES = [ + { id: 'nw', className: 'left-0 top-0 -translate-x-1/2 -translate-y-1/2 cursor-nwse-resize' }, + { id: 'ne', className: 'right-0 top-0 translate-x-1/2 -translate-y-1/2 cursor-nesw-resize' }, + { id: 'sw', className: 'left-0 bottom-0 -translate-x-1/2 translate-y-1/2 cursor-nesw-resize' }, + { id: 'se', className: 'right-0 bottom-0 translate-x-1/2 translate-y-1/2 cursor-nwse-resize' }, +] + +type Props = { + bbox: Bbox + pageWidth: number + pageHeight: number + color: { border: string; bg: string; swatch: string } + label: string + included: boolean + disabled?: boolean + containerRef: RefObject + onCommit: (bbox: Bbox) => void +} + +/** A draggable + corner-resizable rectangle over a rendered PDF page. Coordinates are in PDF + * points (top-left origin); pixel deltas are converted using the container's rendered size. */ +const BBoxOverlay = ({ bbox, pageWidth, pageHeight, color, label, included, disabled, containerRef, onCommit }: Props) => { + const [draft, setDraft] = useState(bbox) + const draftRef = useRef(bbox) + const drag = useRef<{ mode: string; startX: number; startY: number; start: Bbox } | null>(null) + + // Reset to the authoritative bbox whenever it changes (e.g. after a server re-extract). + useEffect(() => { + setDraft(bbox) + draftRef.current = bbox + }, [bbox]) + + const apply = (next: Bbox) => { + draftRef.current = next + setDraft(next) + } + + const onPointerDown = (e: React.PointerEvent) => { + if (disabled) return + e.preventDefault() + e.stopPropagation() + const mode = (e.target as HTMLElement).dataset.handle ?? 'move' + ;(e.currentTarget as HTMLElement).setPointerCapture(e.pointerId) + drag.current = { mode, startX: e.clientX, startY: e.clientY, start: draftRef.current } + } + + const onPointerMove = (e: React.PointerEvent) => { + if (!drag.current || !containerRef.current) return + const rect = containerRef.current.getBoundingClientRect() + const dx = ((e.clientX - drag.current.startX) / rect.width) * pageWidth + const dy = ((e.clientY - drag.current.startY) / rect.height) * pageHeight + let [x0, top, x1, bottom] = drag.current.start + const m = drag.current.mode + if (m === 'move') { + x0 += dx + x1 += dx + top += dy + bottom += dy + } else { + if (m.includes('w')) x0 += dx + if (m.includes('e')) x1 += dx + if (m.includes('n')) top += dy + if (m.includes('s')) bottom += dy + } + apply(clampBbox([x0, top, x1, bottom], pageWidth, pageHeight)) + } + + const onPointerUp = (e: React.PointerEvent) => { + if (!drag.current) return + ;(e.currentTarget as HTMLElement).releasePointerCapture(e.pointerId) + drag.current = null + onCommit(draftRef.current) + } + + const [x0, top, x1, bottom] = draft + + return ( +
+ + {label} + + {!disabled && + HANDLES.map((handle) => ( + + ))} +
+ ) +} + +export default BBoxOverlay diff --git a/banking/src/components/features/BankStatementImporter/PDF/PDFImport.tsx b/banking/src/components/features/BankStatementImporter/PDF/PDFImport.tsx new file mode 100644 index 00000000000..13a322d0d5c --- /dev/null +++ b/banking/src/components/features/BankStatementImporter/PDF/PDFImport.tsx @@ -0,0 +1,23 @@ +import StatementDetails from '../CSV/StatementDetails' +import PDFTableEditor from './PDFTableEditor' +import { GetStatementDetailsResponse } from '../import_utils' + +type Props = { + data: { message: GetStatementDetailsResponse } + mutate: () => void +} + +const PDFImport = ({ data, mutate }: Props) => { + return ( +
+
+ +
+
+ +
+
+ ) +} + +export default PDFImport diff --git a/banking/src/components/features/BankStatementImporter/PDF/PDFTableEditor.tsx b/banking/src/components/features/BankStatementImporter/PDF/PDFTableEditor.tsx new file mode 100644 index 00000000000..396dad8788c --- /dev/null +++ b/banking/src/components/features/BankStatementImporter/PDF/PDFTableEditor.tsx @@ -0,0 +1,362 @@ +import { useEffect, useMemo, useRef, useState } from 'react' +import { toast } from 'sonner' +import { ChevronDownIcon, ChevronLeftIcon, ChevronRightIcon, FileTextIcon, Loader2Icon, TableIcon } from 'lucide-react' +import _ from '@/lib/translate' +import { cn } from '@/lib/utils' +import { Button } from '@/components/ui/button' +import { Switch } from '@/components/ui/switch' +import { Label } from '@/components/ui/label' +import { H3, Paragraph } from '@/components/ui/typography' +import { Tabs, TabsList, TabsTrigger } from '@/components/ui/tabs' +import ErrorBanner from '@/components/ui/error-banner' +import RawTableGrid from '../RawTableGrid' +import BBoxOverlay from './BBoxOverlay' +import { + applyColumnMappingChange, + ColumnMapsTo, + GetStatementDetailsResponse, + PDFTable, + useReextractPDFTable, + useSetPDFTableHeader, + useUpdatePDFTables, +} from '../import_utils' + +type Props = { + data: GetStatementDetailsResponse + mutate: () => void +} + +// Distinct overlay colours per table on a page. +const OVERLAY_COLORS = [ + { border: 'border-blue-500', bg: 'bg-blue-500/10', swatch: 'bg-blue-500' }, + { border: 'border-purple-500', bg: 'bg-purple-500/10', swatch: 'bg-purple-500' }, + { border: 'border-amber-500', bg: 'bg-amber-500/10', swatch: 'bg-amber-500' }, + { border: 'border-teal-500', bg: 'bg-teal-500/10', swatch: 'bg-teal-500' }, +] + +const columnMappingRecord = (table: PDFTable): Record => { + const map: Record = {} + table.column_mapping?.forEach((col) => { + map[col.index] = col.maps_to + }) + return map +} + +const PDFTableEditor = ({ data, mutate }: Props) => { + const isCompleted = data.doc.status === 'Completed' + + const [tables, setTables] = useState(() => data.pdf_tables ?? []) + const [viewMode, setViewMode] = useState<'pdf' | 'table'>('pdf') + const [pageIndex, setPageIndex] = useState(0) + const [collapsed, setCollapsed] = useState>(new Set()) + + const toggleCollapsed = (tableIndex: number) => + setCollapsed((prev) => { + const next = new Set(prev) + if (next.has(tableIndex)) { + next.delete(tableIndex) + } else { + next.add(tableIndex) + } + return next + }) + + const { call, loading, error } = useUpdatePDFTables() + const { call: reextract, loading: reextracting } = useReextractPDFTable() + const { call: setHeaderCall, loading: settingHeader } = useSetPDFTableHeader() + const busy = loading || reextracting || settingHeader + + // Persist edits automatically (debounced) so the transaction preview updates in realtime. + const tablesRef = useRef(tables) + const saveTimer = useRef>(undefined) + const reextractTimer = useRef>(undefined) + + const scheduleSave = () => { + if (isCompleted) return + clearTimeout(saveTimer.current) + saveTimer.current = setTimeout(() => { + call({ statement_import_id: data.doc.name, tables: tablesRef.current }) + .then(() => mutate()) + .catch(() => toast.error(_('Could not save the table settings.'))) + }, 500) + } + + // After a bbox change, re-extract that table's rows from the new region (debounced). + // The target is read inside the timeout so it always reflects the committed bbox. + const scheduleReextract = (tableIndex: number) => { + if (isCompleted) return + clearTimeout(reextractTimer.current) + reextractTimer.current = setTimeout(() => { + const target = tablesRef.current[tableIndex] + reextract({ + statement_import_id: data.doc.name, + page: target.page, + table_index: target.table_index, + bbox: target.bbox, + }) + .then((res) => { + commitTables(res?.message?.pdf_tables ?? []) + mutate() + }) + .catch(() => toast.error(_('Could not re-extract the table.'))) + }, 500) + } + + useEffect(() => () => { + clearTimeout(saveTimer.current) + clearTimeout(reextractTimer.current) + }, []) + + const pages = useMemo(() => Array.from(new Set(tables.map((t) => t.page))).sort((a, b) => a - b), [tables]) + const currentPage = pages[pageIndex] + // Keep the table's position in the flat array so edits target the right one. + const pageTables = useMemo( + () => tables.map((table, index) => ({ table, index })).filter((t) => t.table.page === currentPage), + [tables, currentPage], + ) + + // Keep tablesRef in sync synchronously so the debounced save/re-extract never read stale state. + const commitTables = (next: PDFTable[]) => { + tablesRef.current = next + setTables(next) + } + + const updateTable = (tableIndex: number, updater: (table: PDFTable) => PDFTable) => { + commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? updater(t) : t))) + scheduleSave() + } + + const onChangeMapping = (tableIndex: number, columnIndex: number, mapsTo: ColumnMapsTo) => { + updateTable(tableIndex, (table) => ({ + ...table, + column_mapping: applyColumnMappingChange(table.column_mapping, columnIndex, mapsTo), + })) + } + + const onToggleIncluded = (tableIndex: number, included: boolean) => + updateTable(tableIndex, (table) => ({ ...table, included })) + + const onBboxCommit = (tableIndex: number, bbox: [number, number, number, number]) => { + commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? { ...t, bbox } : t))) + scheduleReextract(tableIndex) + } + + // Set/clear the header row of a table; the backend re-derives the column mapping. + const onSetHeader = (tableIndex: number, headerIndex: number | null) => { + commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? { ...t, header_index: headerIndex } : t))) + const target = tablesRef.current[tableIndex] + setHeaderCall({ + statement_import_id: data.doc.name, + page: target.page, + table_index: target.table_index, + header_index: headerIndex ?? -1, + }) + .then((res) => { + commitTables(res?.message?.pdf_tables ?? []) + mutate() + }) + .catch(() => toast.error(_('Could not update the header row.'))) + } + + if (tables.length === 0) { + return ( +
+ + {_('No tables were extracted from this PDF.')} + +
+ ) + } + + return ( +
+
+

{_('Detected Tables')}

+ + {_('Review each page. In the Table view, map each column, click a row number to set/clear the header row, and exclude anything that is not transactions (ads, summaries).')} + +
+ + {error && } + +
+ setViewMode(v as 'pdf' | 'table')}> + + {_('PDF')} + {_('Table')} + + + +
+ {busy && ( + + + {reextracting ? _('Re-extracting') : _('Saving')} + + )} + + + {_('Page {0} of {1}', [currentPage.toString(), pages.length.toString()])} + + +
+
+ + {viewMode === 'pdf' ? ( + + ) : ( +
+ {pageTables.map(({ table, index }, position) => { + const isCollapsed = collapsed.has(index) + return ( +
+
+ + {_('Table {0}', [(position + 1).toString()])} + +
+ onToggleIncluded(index, c)} + /> + +
+
+ {!isCollapsed && ( +
+ onChangeMapping(index, columnIndex, mapsTo)} + onSetHeader={(rowIndex) => onSetHeader(index, rowIndex)} + /> +
+ )} +
+ ) + })} +
+ )} +
+ ) +} + +type PageViewProps = { + pageTables: { table: PDFTable; index: number }[] + disabled: boolean + onToggleIncluded: (tableIndex: number, included: boolean) => void + onBboxCommit: (tableIndex: number, bbox: [number, number, number, number]) => void +} + +const PageView = ({ pageTables, disabled, onToggleIncluded, onBboxCommit }: PageViewProps) => { + const containerRef = useRef(null) + const pageImage = pageTables[0]?.table.page_image + const pageWidth = pageTables[0]?.table.page_width ?? 1 + const pageHeight = pageTables[0]?.table.page_height ?? 1 + + if (!pageImage) { + return ( + + {_('No page image is available for this page.')} + + ) + } + + return ( +
+ {!disabled && ( + + {_('Drag a box to move it, or drag a corner to resize. The table is re-read from the new region automatically.')} + + )} +
+ {_('Page + {pageTables.map(({ table, index }, position) => { + const color = OVERLAY_COLORS[position % OVERLAY_COLORS.length] + return ( + onBboxCommit(index, bbox)} + /> + ) + })} +
+ +
+ {pageTables.map(({ table, index }, position) => { + const color = OVERLAY_COLORS[position % OVERLAY_COLORS.length] + return ( +
+
+ + {_('Table {0}', [(position + 1).toString()])} +
+ onToggleIncluded(index, c)} + /> +
+ ) + })} +
+
+ ) +} + +const IncludeToggle = ({ + id, + checked, + disabled, + onCheckedChange, +}: { + id: string + checked: boolean + disabled: boolean + onCheckedChange: (checked: boolean) => void +}) => ( +
+ + +
+) + +export default PDFTableEditor diff --git a/banking/src/components/features/BankStatementImporter/RawTableGrid.tsx b/banking/src/components/features/BankStatementImporter/RawTableGrid.tsx new file mode 100644 index 00000000000..efe1642f3b3 --- /dev/null +++ b/banking/src/components/features/BankStatementImporter/RawTableGrid.tsx @@ -0,0 +1,222 @@ +import { useMemo } from 'react' +import { + ArrowDownRightIcon, + ArrowUpDownIcon, + ArrowUpRightIcon, + BanknoteIcon, + CalendarIcon, + DollarSignIcon, + FileTextIcon, + ListIcon, + ReceiptIcon, +} from 'lucide-react' +import _ from '@/lib/translate' +import { cn } from '@/lib/utils' +import { Table, TableBody, TableCell, TableHead, TableRow } from '@/components/ui/table' +import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip' +import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select' +import { COLUMN_MAPS_TO_OPTIONS, ColumnMapsTo } from './import_utils' + +const AMOUNT_COLUMNS: ColumnMapsTo[] = ['Amount', 'Withdrawal', 'Deposit', 'Balance'] +const DATE_LIKE = /\d{1,4}[/\-.\s]\d{1,2}[/\-.\s]\d{1,4}|\d{1,2}[\s-][a-z]{3}/i + +type Props = { + rows: string[][] + /** Column index -> mapped field */ + columnMapping: Record + headerIndex: number | null + editable?: boolean + disabled?: boolean + onChangeMapping?: (columnIndex: number, mapsTo: ColumnMapsTo) => void + /** Set the header row (or null to mark the table as having no header). */ + onSetHeader?: (rowIndex: number | null) => void +} + +/** + * A preview of extracted rows with CSV-style colour coding: the header row is highlighted, + * detected transaction rows are green, and mapped columns are emphasised. When `editable`, a + * compact row of column -> field dropdowns sits at the top, and row numbers can be clicked to + * set/clear the header row. + */ +const RawTableGrid = ({ rows, columnMapping, headerIndex, editable, disabled, onChangeMapping, onSetHeader }: Props) => { + // Tabular (XLSX) cells can be numbers/dates, not strings - coerce so .trim()/render are safe. + const stringRows = useMemo( + () => rows.map((row) => row.map((cell) => (cell == null ? '' : String(cell)))), + [rows], + ) + const numColumns = useMemo(() => stringRows.reduce((max, row) => Math.max(max, row.length), 0), [stringRows]) + + const validColumns = useMemo( + () => Object.entries(columnMapping).filter(([, m]) => m && m !== 'Do not import').map(([i]) => Number(i)), + [columnMapping], + ) + const dateColumn = useMemo(() => Object.entries(columnMapping).find(([, m]) => m === 'Date')?.[0], [columnMapping]) + const amountColumns = useMemo( + () => Object.entries(columnMapping).filter(([, m]) => ['Amount', 'Withdrawal', 'Deposit'].includes(m)).map(([i]) => Number(i)), + [columnMapping], + ) + + // Approximate the backend's transaction-row detection so the highlighting tracks edits live. + const transactionRows = useMemo(() => { + const set = new Set() + if (dateColumn === undefined) return set + const dateIdx = Number(dateColumn) + stringRows.forEach((row, index) => { + if (index === headerIndex) return + const dateCell = (row[dateIdx] ?? '').trim() + if (!dateCell || !DATE_LIKE.test(dateCell)) return + if (amountColumns.some((c) => (row[c] ?? '').trim() !== '')) set.add(index) + }) + return set + }, [stringRows, headerIndex, dateColumn, amountColumns]) + + return ( + + + {editable && ( + + + {Array.from({ length: numColumns }).map((_unused, columnIndex) => ( + + + + ))} + + )} + + {stringRows.map((row, index) => { + const isHeaderRow = index === headerIndex + const isTransactionRow = transactionRows.has(index) + + return ( + + {editable && onSetHeader ? ( + + + + + + + {isHeaderRow + ? _('This is the header row. Click to mark the table as having no header.') + : _('Click to set this as the header row.')} + + + + ) : ( + {index + 1} + )} + + {Array.from({ length: numColumns }).map((_unused, cellIndex) => { + const columnType = columnMapping[cellIndex] + const isValidColumn = validColumns.includes(cellIndex) + const isAmountColumn = AMOUNT_COLUMNS.includes(columnType) + const cellText = row[cellIndex] ?? '' + + // Read-only header row: icon + label. + if (isHeaderRow) { + return ( + +
+ {columnType && ( + + + + + {_(columnType)} + + )} + {cellText} +
+
+ ) + } + + return ( + +
+ {cellText} +
+
+ ) + })} +
+ ) + })} +
+
+ ) +} + +const ColumnHeaderIcon = ({ columnType }: { columnType?: ColumnMapsTo }) => { + switch (columnType) { + case 'Amount': + return + case 'Withdrawal': + return + case 'Deposit': + return + case 'Balance': + return + case 'Date': + return + case 'Description': + return + case 'Reference': + return + case 'Transaction Type': + return + case 'Debit/Credit': + return + default: + return null + } +} + +export default RawTableGrid diff --git a/banking/src/components/features/BankStatementImporter/import_utils.ts b/banking/src/components/features/BankStatementImporter/import_utils.ts index 1f918977751..8358a36bf03 100644 --- a/banking/src/components/features/BankStatementImporter/import_utils.ts +++ b/banking/src/components/features/BankStatementImporter/import_utils.ts @@ -1,6 +1,97 @@ import { BankStatementImportLog } from "@/types/Accounts/BankStatementImportLog" -import { useFrappeGetCall } from "frappe-react-sdk" +import { useFrappeGetCall, useFrappePostCall } from "frappe-react-sdk" +export type ColumnMapsTo = + | "Do not import" + | "Date" + | "Withdrawal" + | "Deposit" + | "Amount" + | "Description" + | "Reference" + | "Transaction Type" + | "Debit/Credit" + | "Balance" + | "Included Fee" + | "Excluded Fee" + | "Party Name/Account Holder" + | "Party Account No." + | "Party IBAN" + +export type ColumnMappingEntry = { + index: number + maps_to: ColumnMapsTo | string + header_text?: string + variable?: string +} + +/** Apply a column mapping change, clearing the same mapping from any other column. */ +export function applyColumnMappingChange( + columns: T[], + columnIndex: number, + mapsTo: ColumnMapsTo, +): T[] { + const previous = columns.find((c) => c.index === columnIndex) + const cleared = + mapsTo === "Do not import" + ? columns + : columns.map((c) => + c.index !== columnIndex && c.maps_to === mapsTo + ? { ...c, maps_to: "Do not import" as ColumnMapsTo } + : c, + ) + + return [ + ...cleared.filter((c) => c.index !== columnIndex), + { + index: columnIndex, + maps_to: mapsTo, + header_text: previous?.header_text ?? "", + variable: previous?.variable ?? `column_${columnIndex}`, + } as T, + ].sort((a, b) => a.index - b.index) +} + +export const COLUMN_MAPS_TO_OPTIONS: ColumnMapsTo[] = [ + "Do not import", + "Date", + "Description", + "Reference", + "Withdrawal", + "Deposit", + "Amount", + "Balance", + "Debit/Credit", + "Transaction Type", + "Included Fee", + "Excluded Fee", + "Party Name/Account Holder", + "Party Account No.", + "Party IBAN", +] + +export interface PDFTableColumn { + index: number + header_text: string + variable?: string + maps_to: ColumnMapsTo +} + +export interface PDFTable { + page: number + table_index: number + bbox: [number, number, number, number] + page_width: number + page_height: number + page_image: string | null + render_scale: number | null + rows: string[][] + header_index: number | null + column_mapping: PDFTableColumn[] + date_format?: string + amount_format?: string + included: boolean +} export interface GetStatementDetailsResponse { doc: BankStatementImportLog, @@ -30,6 +121,7 @@ export interface GetStatementDetailsResponse { date_format: string, raw_data: Array>, currency: string, + pdf_tables?: PDFTable[], } export const useGetStatementDetails = (id: string) => { @@ -39,4 +131,24 @@ export const useGetStatementDetails = (id: string) => { revalidateOnFocus: false }) +} + +export const useUpdatePDFTables = () => { + return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.update_pdf_tables") +} + +export const useReextractPDFTable = () => { + return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.reextract_pdf_table") +} + +export const useSetPDFTableHeader = () => { + return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.set_pdf_table_header") +} + +export const useUpdateColumnMapping = () => { + return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.update_column_mapping") +} + +export const useSetHeaderIndex = () => { + return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.set_header_index") } \ No newline at end of file diff --git a/banking/src/components/ui/file-dropzone.tsx b/banking/src/components/ui/file-dropzone.tsx index 5e9cc41e631..71426045919 100644 --- a/banking/src/components/ui/file-dropzone.tsx +++ b/banking/src/components/ui/file-dropzone.tsx @@ -231,7 +231,7 @@ export const FileTypeIcon = ({ const getTextColor = () => { switch (fileType.toLowerCase()) { case 'pdf': - return 'text-red-700' + return 'text-ink-red-3' case 'doc': case 'docx': return 'text-[#1A5CBD]' diff --git a/banking/src/pages/BankStatementImporter.tsx b/banking/src/pages/BankStatementImporter.tsx index e2ba9c6fdaa..8e6e5345bd7 100644 --- a/banking/src/pages/BankStatementImporter.tsx +++ b/banking/src/pages/BankStatementImporter.tsx @@ -7,6 +7,7 @@ import { Dialog, DialogClose, DialogContent, DialogDescription, DialogFooter, Di import { Empty, EmptyHeader, EmptyMedia, EmptyTitle } from "@/components/ui/empty" import ErrorBanner from "@/components/ui/error-banner" import { FileDropzone } from "@/components/ui/file-dropzone" +import { Input } from "@/components/ui/input" import { Label } from "@/components/ui/label" import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table" import { H3, Paragraph } from "@/components/ui/typography" @@ -16,7 +17,7 @@ import { flt, formatCurrency } from "@/lib/numbers" import _ from "@/lib/translate" import { cn } from "@/lib/utils" import { BankStatementImportLog } from "@/types/Accounts/BankStatementImportLog" -import { useFrappeCreateDoc, useFrappeFileUpload, useFrappeGetDocList } from "frappe-react-sdk" +import { useFrappeCreateDoc, useFrappeFileUpload, useFrappeGetDocList, useFrappeUpdateDoc } from "frappe-react-sdk" import { useAtom, useAtomValue } from "jotai" import { ListIcon, Loader2Icon } from "lucide-react" import { useState } from "react" @@ -30,11 +31,15 @@ const BankStatementImporter = () => { const [selectedBankAccount] = useAtom(selectedBankAccountAtom) const [files, setFiles] = useState([]) + const [password, setPassword] = useState("") const { upload, error, loading } = useFrappeFileUpload() const navigate = useNavigate() const { createDoc, loading: createLoading, error: createError } = useFrappeCreateDoc() + const { updateDoc, error: updateError } = useFrappeUpdateDoc() + + const isPdf = files[0]?.name?.toLowerCase().endsWith(".pdf") ?? false const onUpload = () => { @@ -44,12 +49,18 @@ const BankStatementImporter = () => { const id = `new-bank-statement-import-log-${Date.now()}` - upload(files[0], { + // For protected PDFs, persist the password on the Bank Account so it is reused for + // every statement of this account (and is available before the import doc is created). + const ensurePassword = isPdf && password + ? updateDoc("Bank Account", selectedBankAccount.name, { statement_password: password }) + : Promise.resolve() + + ensurePassword.then(() => upload(files[0], { isPrivate: true, doctype: "Bank Statement Import Log", docname: id, fieldname: 'file' - }).then((file) => { + })).then((file) => { return createDoc("Bank Statement Import Log", // @ts-expect-error - not filling everything else { @@ -67,6 +78,7 @@ const BankStatementImporter = () => {
{error && } {createError && } + {updateError && }
@@ -89,7 +101,7 @@ const BankStatementImporter = () => { data-slot="form-description" className={cn("text-ink-gray-5 text-xs")} > - {_("Upload your bank statement file to start the import process. We support CSV, and XLSX files.")} + {_("Upload your bank statement file to start the import process. We support CSV, XLSX and PDF files.")}

@@ -105,10 +117,27 @@ const BankStatementImporter = () => { 'text/csv': ['.csv'], 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'], 'application/vnd.ms-excel': ['.xls'], + 'application/pdf': ['.pdf'], // 'application/xml': ['.xml'], }} multiple={false} /> + + {isPdf &&
+ + setPassword(e.target.value)} + placeholder={_("Only if the PDF is password protected")} + className="max-w-sm" + /> +

+ {_("Leave blank to use the password already saved for this bank account (if any). It is stored encrypted and reused for future statements.")} +

+
}
}