mirror of
https://github.com/frappe/erpnext.git
synced 2026-06-05 05:09:11 +00:00
feat(banking): PDF statement importer and overriding column mapping (#55559)
* feat(banking): PDF statement importer * feat(banking): allow users to override column mapping * fix: store pending page images in flags
This commit is contained in:
@@ -2,9 +2,7 @@ import CSVRawDataPreview from './CSVRawDataPreview'
|
||||
import StatementDetails from './StatementDetails'
|
||||
import { GetStatementDetailsResponse } from '../import_utils'
|
||||
|
||||
const CSVImport = ({ data }: { data: { message: GetStatementDetailsResponse } }) => {
|
||||
|
||||
|
||||
const CSVImport = ({ data, mutate }: { data: { message: GetStatementDetailsResponse }, mutate: () => void }) => {
|
||||
|
||||
return (
|
||||
<div className="w-full flex">
|
||||
@@ -12,7 +10,7 @@ const CSVImport = ({ data }: { data: { message: GetStatementDetailsResponse } })
|
||||
<StatementDetails data={data.message} />
|
||||
</div>
|
||||
<div className="w-[50%] border-s border-t pe-1 ps-0 border-outline-gray-2 h-[calc(100vh-72px)] overflow-scroll">
|
||||
<CSVRawDataPreview data={data.message} />
|
||||
<CSVRawDataPreview data={data.message} mutate={mutate} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
|
||||
@@ -1,151 +1,104 @@
|
||||
import { Table, TableBody, TableCell, TableHead, TableRow } from "@/components/ui/table"
|
||||
import { cn } from "@/lib/utils"
|
||||
import { ArrowDownRightIcon, ArrowUpDownIcon, ArrowUpRightIcon, BanknoteIcon, CalendarIcon, DollarSignIcon, FileTextIcon, ListIcon, ReceiptIcon } from "lucide-react"
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"
|
||||
import { useEffect, useRef, useState } from "react"
|
||||
import { toast } from "sonner"
|
||||
import _ from "@/lib/translate"
|
||||
import { GetStatementDetailsResponse } from "../import_utils"
|
||||
import { useMemo } from "react"
|
||||
import RawTableGrid from "../RawTableGrid"
|
||||
import {
|
||||
applyColumnMappingChange,
|
||||
ColumnMapsTo,
|
||||
GetStatementDetailsResponse,
|
||||
useSetHeaderIndex,
|
||||
useUpdateColumnMapping,
|
||||
} from "../import_utils"
|
||||
import { BankStatementImportLogColumnMap } from "@/types/Accounts/BankStatementImportLogColumnMap"
|
||||
|
||||
type Mapping = Pick<BankStatementImportLogColumnMap, "index" | "maps_to" | "header_text" | "variable">
|
||||
|
||||
const CSVRawDataPreview = ({ data }: { data: GetStatementDetailsResponse }) => {
|
||||
const toMapping = (columns?: BankStatementImportLogColumnMap[]): Mapping[] =>
|
||||
(columns ?? []).map((c) => ({
|
||||
index: c.index,
|
||||
maps_to: c.maps_to,
|
||||
header_text: c.header_text,
|
||||
variable: c.variable,
|
||||
}))
|
||||
|
||||
const column_mapping: Record<StandardColumnTypes, number> = useMemo(() => {
|
||||
const headerToState = (index?: number) => (index != null && index >= 0 ? index : null)
|
||||
|
||||
const col_map: Record<string, number> = {}
|
||||
const CSVRawDataPreview = ({
|
||||
data,
|
||||
mutate,
|
||||
}: {
|
||||
data: GetStatementDetailsResponse
|
||||
mutate: () => void
|
||||
}) => {
|
||||
const isCompleted = data.doc.status === "Completed"
|
||||
|
||||
data.doc.column_mapping?.forEach(col => {
|
||||
if (col.maps_to && col.maps_to !== "Do not import") {
|
||||
col_map[col.maps_to] = col.index;
|
||||
}
|
||||
})
|
||||
const [mapping, setMapping] = useState<Mapping[]>(() => toMapping(data.doc.column_mapping))
|
||||
const [headerIndex, setHeaderIndex] = useState<number | null>(() =>
|
||||
headerToState(data.doc.detected_header_index),
|
||||
)
|
||||
|
||||
return col_map
|
||||
const { call: updateMapping, loading: savingMapping } = useUpdateColumnMapping()
|
||||
const { call: setHeader, loading: savingHeader } = useSetHeaderIndex()
|
||||
|
||||
}, [data])
|
||||
const mappingRef = useRef(mapping)
|
||||
const saveTimer = useRef<ReturnType<typeof setTimeout>>(undefined)
|
||||
|
||||
const validColumns = Object.values(column_mapping)
|
||||
useEffect(() => () => clearTimeout(saveTimer.current), [])
|
||||
|
||||
// Reverse the column mapping to get a map of column index to variable name
|
||||
const columnIndexMap: Record<number, StandardColumnTypes> = Object.fromEntries(Object.entries(column_mapping).map(([variable, columnIndex]) => [columnIndex, variable as StandardColumnTypes]))
|
||||
const columnMappingRecord: Record<number, ColumnMapsTo> = {}
|
||||
mapping.forEach((c) => {
|
||||
if (c.maps_to) columnMappingRecord[c.index] = c.maps_to as ColumnMapsTo
|
||||
})
|
||||
|
||||
const commitMapping = (next: Mapping[]) => {
|
||||
mappingRef.current = next
|
||||
setMapping(next)
|
||||
}
|
||||
|
||||
// Persist mapping edits (debounced) so the transaction preview updates in realtime.
|
||||
const scheduleSaveMapping = () => {
|
||||
if (isCompleted) return
|
||||
clearTimeout(saveTimer.current)
|
||||
saveTimer.current = setTimeout(() => {
|
||||
updateMapping({ statement_import_id: data.doc.name, column_mapping: mappingRef.current })
|
||||
.then(() => mutate())
|
||||
.catch(() => toast.error(_("Could not save the column mapping.")))
|
||||
}, 500)
|
||||
}
|
||||
|
||||
const onChangeMapping = (columnIndex: number, mapsTo: ColumnMapsTo) => {
|
||||
if (isCompleted) return
|
||||
commitMapping(applyColumnMappingChange(mappingRef.current, columnIndex, mapsTo))
|
||||
scheduleSaveMapping()
|
||||
}
|
||||
|
||||
const onSetHeader = (rowIndex: number | null) => {
|
||||
if (isCompleted) return
|
||||
setHeaderIndex(rowIndex)
|
||||
setHeader({ statement_import_id: data.doc.name, header_index: rowIndex ?? -1 })
|
||||
.then((res) => {
|
||||
// The backend re-derives the mapping for the new header; sync local state.
|
||||
const doc = res?.message?.doc
|
||||
if (doc) {
|
||||
commitMapping(toMapping(doc.column_mapping))
|
||||
setHeaderIndex(headerToState(doc.detected_header_index))
|
||||
}
|
||||
mutate()
|
||||
})
|
||||
.catch(() => toast.error(_("Could not update the header row.")))
|
||||
}
|
||||
|
||||
// Loop over the contents of the CSV file and show a preview - highlight the header row and the transaction rows
|
||||
return (
|
||||
<Table containerClassName="rounded-none">
|
||||
<TableBody>
|
||||
{data.raw_data.map((row, index) => {
|
||||
|
||||
const isHeaderRow = index === data.doc.detected_header_index;
|
||||
const isTransactionRow = index >= (data.doc.detected_transaction_starting_index ?? 0) && index <= (data.doc.detected_transaction_ending_index ?? 0);
|
||||
|
||||
return <TableRow key={index}
|
||||
title={isHeaderRow ? "Header Row" : ""}
|
||||
className={cn({
|
||||
// "bg-yellow-100": isHeaderRow,
|
||||
// "hover:bg-yellow-100": isHeaderRow,
|
||||
"bg-green-50 hover:bg-green-50 dark:bg-green-700 dark:hover:bg-green-700": isTransactionRow,
|
||||
"text-ink-gray-5/70": !isTransactionRow && !isHeaderRow,
|
||||
})}>
|
||||
{isHeaderRow ? <TableHead className="bg-yellow-100 hover:bg-yellow-100 dark:bg-yellow-400 text-center font-semibold text-ink-gray-8">
|
||||
{index + 1}
|
||||
</TableHead> :
|
||||
<TableCell className="text-center px-1 py-0.5">
|
||||
{index + 1}
|
||||
</TableCell>
|
||||
}
|
||||
{row.map((cell, cellIndex) => {
|
||||
|
||||
const isValidColumn = validColumns.includes(cellIndex);
|
||||
const columnType = columnIndexMap[cellIndex];
|
||||
const isAmountColumn = ["Amount", "Withdrawal", "Deposit", "Balance"].includes(columnType);
|
||||
|
||||
if (isHeaderRow) {
|
||||
return <TableHead key={cellIndex} className={cn("max-w-[250px] w-fit overflow-hidden text-ellipsis py-0.5",
|
||||
isValidColumn ? "bg-yellow-100 hover:bg-yellow-100 dark:bg-yellow-400" : "bg-surface-gray-2",
|
||||
)}>
|
||||
<div className={cn("flex items-center text-xs gap-1 px-1 text-ink-gray-8 font-medium", {
|
||||
"justify-end": isAmountColumn && isValidColumn
|
||||
})}>
|
||||
{columnType && <Tooltip>
|
||||
<TooltipTrigger>
|
||||
<ColumnHeaderIcon columnType={columnType} />
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
{_(columnType)}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
}
|
||||
{cell}
|
||||
</div>
|
||||
</TableHead>
|
||||
} else {
|
||||
return <TableCell key={cellIndex} className={cn("max-w-[200px] w-fit overflow-hidden text-ellipsis py-0.5",
|
||||
{
|
||||
"bg-green-100 dark:bg-green-400 hover:bg-green-100 dark:hover:bg-green-400": isValidColumn && isTransactionRow,
|
||||
"text-ink-gray-5": !isValidColumn && isTransactionRow,
|
||||
}
|
||||
)} >
|
||||
<div className={cn("min-h-5 flex items-center text-xs px-1", {
|
||||
"justify-end": isAmountColumn && isValidColumn && isTransactionRow
|
||||
})} title={cell}>
|
||||
{cell}
|
||||
</div>
|
||||
</TableCell>
|
||||
}
|
||||
}
|
||||
|
||||
)}
|
||||
</TableRow>
|
||||
})}
|
||||
</TableBody>
|
||||
</Table >
|
||||
<RawTableGrid
|
||||
rows={data.raw_data}
|
||||
columnMapping={columnMappingRecord}
|
||||
headerIndex={headerIndex}
|
||||
editable={!isCompleted}
|
||||
disabled={isCompleted || savingMapping || savingHeader}
|
||||
onChangeMapping={onChangeMapping}
|
||||
onSetHeader={onSetHeader}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
type StandardColumnTypes = BankStatementImportLogColumnMap['maps_to'];
|
||||
|
||||
const ColumnHeaderIcon = ({ columnType }: { columnType?: StandardColumnTypes }) => {
|
||||
if (!columnType) {
|
||||
return null
|
||||
}
|
||||
|
||||
if (columnType === 'Amount') {
|
||||
return <DollarSignIcon className="w-4 h-4" />
|
||||
}
|
||||
|
||||
if (columnType === 'Withdrawal') {
|
||||
return <ArrowUpRightIcon className="w-4 h-4 text-ink-red-3" />
|
||||
}
|
||||
|
||||
if (columnType === 'Deposit') {
|
||||
return <ArrowDownRightIcon className="w-4 h-4 text-ink-green-3" />
|
||||
}
|
||||
|
||||
if (columnType === 'Balance') {
|
||||
return <BanknoteIcon className="w-4 h-4" />
|
||||
}
|
||||
|
||||
if (columnType === 'Date') {
|
||||
return <CalendarIcon className="w-4 h-4" />
|
||||
}
|
||||
|
||||
if (columnType === 'Description') {
|
||||
return <FileTextIcon className="w-4 h-4" />
|
||||
}
|
||||
|
||||
if (columnType === 'Reference') {
|
||||
return <ReceiptIcon className="w-4 h-4" />
|
||||
}
|
||||
|
||||
if (columnType === 'Transaction Type') {
|
||||
return <ListIcon className="w-4 h-4" />
|
||||
}
|
||||
|
||||
if (columnType === 'Debit/Credit') {
|
||||
return <ArrowUpDownIcon className="w-4 h-4" />
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
export default CSVRawDataPreview
|
||||
export default CSVRawDataPreview
|
||||
|
||||
@@ -142,11 +142,16 @@ const StatementDetails = ({ data }: Props) => {
|
||||
<TableCell>
|
||||
<div className='flex items-center gap-2'>
|
||||
<BankLogo bank={bank} />
|
||||
<span className="tracking-tight text-sm font-medium">{bank?.account_name}</span>
|
||||
<span title="GL Account" className="text-sm">{bank?.account}</span>
|
||||
<span className="text-sm">{bank?.account_name}</span>
|
||||
</div>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
<TableRow>
|
||||
<TableHead>{_("Account")}</TableHead>
|
||||
<TableCell>
|
||||
<span title="GL Account" className="text-sm">{bank?.account}</span>
|
||||
</TableCell>
|
||||
</TableRow>
|
||||
<TableRow>
|
||||
<TableHead>{_("Statement File")}</TableHead>
|
||||
<TableCell>
|
||||
@@ -158,7 +163,11 @@ const StatementDetails = ({ data }: Props) => {
|
||||
</TableRow>
|
||||
<TableRow>
|
||||
<TableHead>{_("Transaction Dates")}</TableHead>
|
||||
<TableCell>{_("{0} to {1}", [formatDate(data.doc.start_date, "Do MMMM YYYY"), formatDate(data.doc.end_date, "Do MMMM YYYY")])}</TableCell>
|
||||
{data.doc.start_date && data.doc.end_date ? (
|
||||
<TableCell>{_("{0} to {1}", [formatDate(data.doc.start_date, "Do MMMM YYYY"), formatDate(data.doc.end_date, "Do MMMM YYYY")])}</TableCell>
|
||||
) : (
|
||||
<TableCell>-</TableCell>
|
||||
)}
|
||||
</TableRow>
|
||||
<TableRow>
|
||||
<TableHead>{_("Number of Transactions")}</TableHead>
|
||||
|
||||
@@ -0,0 +1,129 @@
|
||||
import { RefObject, useEffect, useRef, useState } from 'react'
|
||||
import { cn } from '@/lib/utils'
|
||||
|
||||
type Bbox = [number, number, number, number]
|
||||
|
||||
const MIN_SIZE = 8 // PDF points
|
||||
|
||||
// Keep the box valid: normalise flipped edges, enforce a min size, clamp to the page.
|
||||
const clampBbox = (bbox: Bbox, pageWidth: number, pageHeight: number): Bbox => {
|
||||
let [x0, top, x1, bottom] = bbox
|
||||
if (x1 < x0) [x0, x1] = [x1, x0]
|
||||
if (bottom < top) [top, bottom] = [bottom, top]
|
||||
x0 = Math.max(0, Math.min(x0, pageWidth - MIN_SIZE))
|
||||
top = Math.max(0, Math.min(top, pageHeight - MIN_SIZE))
|
||||
x1 = Math.min(pageWidth, Math.max(x1, x0 + MIN_SIZE))
|
||||
bottom = Math.min(pageHeight, Math.max(bottom, top + MIN_SIZE))
|
||||
return [x0, top, x1, bottom]
|
||||
}
|
||||
|
||||
const HANDLES = [
|
||||
{ id: 'nw', className: 'left-0 top-0 -translate-x-1/2 -translate-y-1/2 cursor-nwse-resize' },
|
||||
{ id: 'ne', className: 'right-0 top-0 translate-x-1/2 -translate-y-1/2 cursor-nesw-resize' },
|
||||
{ id: 'sw', className: 'left-0 bottom-0 -translate-x-1/2 translate-y-1/2 cursor-nesw-resize' },
|
||||
{ id: 'se', className: 'right-0 bottom-0 translate-x-1/2 translate-y-1/2 cursor-nwse-resize' },
|
||||
]
|
||||
|
||||
type Props = {
|
||||
bbox: Bbox
|
||||
pageWidth: number
|
||||
pageHeight: number
|
||||
color: { border: string; bg: string; swatch: string }
|
||||
label: string
|
||||
included: boolean
|
||||
disabled?: boolean
|
||||
containerRef: RefObject<HTMLDivElement | null>
|
||||
onCommit: (bbox: Bbox) => void
|
||||
}
|
||||
|
||||
/** A draggable + corner-resizable rectangle over a rendered PDF page. Coordinates are in PDF
|
||||
* points (top-left origin); pixel deltas are converted using the container's rendered size. */
|
||||
const BBoxOverlay = ({ bbox, pageWidth, pageHeight, color, label, included, disabled, containerRef, onCommit }: Props) => {
|
||||
const [draft, setDraft] = useState<Bbox>(bbox)
|
||||
const draftRef = useRef<Bbox>(bbox)
|
||||
const drag = useRef<{ mode: string; startX: number; startY: number; start: Bbox } | null>(null)
|
||||
|
||||
// Reset to the authoritative bbox whenever it changes (e.g. after a server re-extract).
|
||||
useEffect(() => {
|
||||
setDraft(bbox)
|
||||
draftRef.current = bbox
|
||||
}, [bbox])
|
||||
|
||||
const apply = (next: Bbox) => {
|
||||
draftRef.current = next
|
||||
setDraft(next)
|
||||
}
|
||||
|
||||
const onPointerDown = (e: React.PointerEvent) => {
|
||||
if (disabled) return
|
||||
e.preventDefault()
|
||||
e.stopPropagation()
|
||||
const mode = (e.target as HTMLElement).dataset.handle ?? 'move'
|
||||
;(e.currentTarget as HTMLElement).setPointerCapture(e.pointerId)
|
||||
drag.current = { mode, startX: e.clientX, startY: e.clientY, start: draftRef.current }
|
||||
}
|
||||
|
||||
const onPointerMove = (e: React.PointerEvent) => {
|
||||
if (!drag.current || !containerRef.current) return
|
||||
const rect = containerRef.current.getBoundingClientRect()
|
||||
const dx = ((e.clientX - drag.current.startX) / rect.width) * pageWidth
|
||||
const dy = ((e.clientY - drag.current.startY) / rect.height) * pageHeight
|
||||
let [x0, top, x1, bottom] = drag.current.start
|
||||
const m = drag.current.mode
|
||||
if (m === 'move') {
|
||||
x0 += dx
|
||||
x1 += dx
|
||||
top += dy
|
||||
bottom += dy
|
||||
} else {
|
||||
if (m.includes('w')) x0 += dx
|
||||
if (m.includes('e')) x1 += dx
|
||||
if (m.includes('n')) top += dy
|
||||
if (m.includes('s')) bottom += dy
|
||||
}
|
||||
apply(clampBbox([x0, top, x1, bottom], pageWidth, pageHeight))
|
||||
}
|
||||
|
||||
const onPointerUp = (e: React.PointerEvent) => {
|
||||
if (!drag.current) return
|
||||
;(e.currentTarget as HTMLElement).releasePointerCapture(e.pointerId)
|
||||
drag.current = null
|
||||
onCommit(draftRef.current)
|
||||
}
|
||||
|
||||
const [x0, top, x1, bottom] = draft
|
||||
|
||||
return (
|
||||
<div
|
||||
className={cn(
|
||||
'absolute touch-none border-2',
|
||||
color.border,
|
||||
included ? color.bg : 'opacity-40',
|
||||
disabled ? 'pointer-events-none' : 'cursor-move',
|
||||
)}
|
||||
style={{
|
||||
left: `${(x0 / pageWidth) * 100}%`,
|
||||
top: `${(top / pageHeight) * 100}%`,
|
||||
width: `${((x1 - x0) / pageWidth) * 100}%`,
|
||||
height: `${((bottom - top) / pageHeight) * 100}%`,
|
||||
}}
|
||||
onPointerDown={onPointerDown}
|
||||
onPointerMove={onPointerMove}
|
||||
onPointerUp={onPointerUp}
|
||||
>
|
||||
<span className={cn('pointer-events-none absolute -top-5 left-0 rounded px-1 text-[10px] font-medium text-white', color.swatch)}>
|
||||
{label}
|
||||
</span>
|
||||
{!disabled &&
|
||||
HANDLES.map((handle) => (
|
||||
<span
|
||||
key={handle.id}
|
||||
data-handle={handle.id}
|
||||
className={cn('absolute size-2.5 rounded-sm border border-white', color.swatch, handle.className)}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default BBoxOverlay
|
||||
@@ -0,0 +1,23 @@
|
||||
import StatementDetails from '../CSV/StatementDetails'
|
||||
import PDFTableEditor from './PDFTableEditor'
|
||||
import { GetStatementDetailsResponse } from '../import_utils'
|
||||
|
||||
type Props = {
|
||||
data: { message: GetStatementDetailsResponse }
|
||||
mutate: () => void
|
||||
}
|
||||
|
||||
const PDFImport = ({ data, mutate }: Props) => {
|
||||
return (
|
||||
<div className="w-full flex">
|
||||
<div className="w-[45%] p-4 h-[calc(100vh-72px)] overflow-scroll">
|
||||
<StatementDetails data={data.message} />
|
||||
</div>
|
||||
<div className="w-[55%] border-s pe-1 ps-0 border-outline-gray-2 h-[calc(100vh-72px)] overflow-scroll">
|
||||
<PDFTableEditor data={data.message} mutate={mutate} />
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
export default PDFImport
|
||||
@@ -0,0 +1,362 @@
|
||||
import { useEffect, useMemo, useRef, useState } from 'react'
|
||||
import { toast } from 'sonner'
|
||||
import { ChevronDownIcon, ChevronLeftIcon, ChevronRightIcon, FileTextIcon, Loader2Icon, TableIcon } from 'lucide-react'
|
||||
import _ from '@/lib/translate'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { Button } from '@/components/ui/button'
|
||||
import { Switch } from '@/components/ui/switch'
|
||||
import { Label } from '@/components/ui/label'
|
||||
import { H3, Paragraph } from '@/components/ui/typography'
|
||||
import { Tabs, TabsList, TabsTrigger } from '@/components/ui/tabs'
|
||||
import ErrorBanner from '@/components/ui/error-banner'
|
||||
import RawTableGrid from '../RawTableGrid'
|
||||
import BBoxOverlay from './BBoxOverlay'
|
||||
import {
|
||||
applyColumnMappingChange,
|
||||
ColumnMapsTo,
|
||||
GetStatementDetailsResponse,
|
||||
PDFTable,
|
||||
useReextractPDFTable,
|
||||
useSetPDFTableHeader,
|
||||
useUpdatePDFTables,
|
||||
} from '../import_utils'
|
||||
|
||||
type Props = {
|
||||
data: GetStatementDetailsResponse
|
||||
mutate: () => void
|
||||
}
|
||||
|
||||
// Distinct overlay colours per table on a page.
|
||||
const OVERLAY_COLORS = [
|
||||
{ border: 'border-blue-500', bg: 'bg-blue-500/10', swatch: 'bg-blue-500' },
|
||||
{ border: 'border-purple-500', bg: 'bg-purple-500/10', swatch: 'bg-purple-500' },
|
||||
{ border: 'border-amber-500', bg: 'bg-amber-500/10', swatch: 'bg-amber-500' },
|
||||
{ border: 'border-teal-500', bg: 'bg-teal-500/10', swatch: 'bg-teal-500' },
|
||||
]
|
||||
|
||||
const columnMappingRecord = (table: PDFTable): Record<number, ColumnMapsTo> => {
|
||||
const map: Record<number, ColumnMapsTo> = {}
|
||||
table.column_mapping?.forEach((col) => {
|
||||
map[col.index] = col.maps_to
|
||||
})
|
||||
return map
|
||||
}
|
||||
|
||||
const PDFTableEditor = ({ data, mutate }: Props) => {
|
||||
const isCompleted = data.doc.status === 'Completed'
|
||||
|
||||
const [tables, setTables] = useState<PDFTable[]>(() => data.pdf_tables ?? [])
|
||||
const [viewMode, setViewMode] = useState<'pdf' | 'table'>('pdf')
|
||||
const [pageIndex, setPageIndex] = useState(0)
|
||||
const [collapsed, setCollapsed] = useState<Set<number>>(new Set())
|
||||
|
||||
const toggleCollapsed = (tableIndex: number) =>
|
||||
setCollapsed((prev) => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(tableIndex)) {
|
||||
next.delete(tableIndex)
|
||||
} else {
|
||||
next.add(tableIndex)
|
||||
}
|
||||
return next
|
||||
})
|
||||
|
||||
const { call, loading, error } = useUpdatePDFTables()
|
||||
const { call: reextract, loading: reextracting } = useReextractPDFTable()
|
||||
const { call: setHeaderCall, loading: settingHeader } = useSetPDFTableHeader()
|
||||
const busy = loading || reextracting || settingHeader
|
||||
|
||||
// Persist edits automatically (debounced) so the transaction preview updates in realtime.
|
||||
const tablesRef = useRef(tables)
|
||||
const saveTimer = useRef<ReturnType<typeof setTimeout>>(undefined)
|
||||
const reextractTimer = useRef<ReturnType<typeof setTimeout>>(undefined)
|
||||
|
||||
const scheduleSave = () => {
|
||||
if (isCompleted) return
|
||||
clearTimeout(saveTimer.current)
|
||||
saveTimer.current = setTimeout(() => {
|
||||
call({ statement_import_id: data.doc.name, tables: tablesRef.current })
|
||||
.then(() => mutate())
|
||||
.catch(() => toast.error(_('Could not save the table settings.')))
|
||||
}, 500)
|
||||
}
|
||||
|
||||
// After a bbox change, re-extract that table's rows from the new region (debounced).
|
||||
// The target is read inside the timeout so it always reflects the committed bbox.
|
||||
const scheduleReextract = (tableIndex: number) => {
|
||||
if (isCompleted) return
|
||||
clearTimeout(reextractTimer.current)
|
||||
reextractTimer.current = setTimeout(() => {
|
||||
const target = tablesRef.current[tableIndex]
|
||||
reextract({
|
||||
statement_import_id: data.doc.name,
|
||||
page: target.page,
|
||||
table_index: target.table_index,
|
||||
bbox: target.bbox,
|
||||
})
|
||||
.then((res) => {
|
||||
commitTables(res?.message?.pdf_tables ?? [])
|
||||
mutate()
|
||||
})
|
||||
.catch(() => toast.error(_('Could not re-extract the table.')))
|
||||
}, 500)
|
||||
}
|
||||
|
||||
useEffect(() => () => {
|
||||
clearTimeout(saveTimer.current)
|
||||
clearTimeout(reextractTimer.current)
|
||||
}, [])
|
||||
|
||||
const pages = useMemo(() => Array.from(new Set(tables.map((t) => t.page))).sort((a, b) => a - b), [tables])
|
||||
const currentPage = pages[pageIndex]
|
||||
// Keep the table's position in the flat array so edits target the right one.
|
||||
const pageTables = useMemo(
|
||||
() => tables.map((table, index) => ({ table, index })).filter((t) => t.table.page === currentPage),
|
||||
[tables, currentPage],
|
||||
)
|
||||
|
||||
// Keep tablesRef in sync synchronously so the debounced save/re-extract never read stale state.
|
||||
const commitTables = (next: PDFTable[]) => {
|
||||
tablesRef.current = next
|
||||
setTables(next)
|
||||
}
|
||||
|
||||
const updateTable = (tableIndex: number, updater: (table: PDFTable) => PDFTable) => {
|
||||
commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? updater(t) : t)))
|
||||
scheduleSave()
|
||||
}
|
||||
|
||||
const onChangeMapping = (tableIndex: number, columnIndex: number, mapsTo: ColumnMapsTo) => {
|
||||
updateTable(tableIndex, (table) => ({
|
||||
...table,
|
||||
column_mapping: applyColumnMappingChange(table.column_mapping, columnIndex, mapsTo),
|
||||
}))
|
||||
}
|
||||
|
||||
const onToggleIncluded = (tableIndex: number, included: boolean) =>
|
||||
updateTable(tableIndex, (table) => ({ ...table, included }))
|
||||
|
||||
const onBboxCommit = (tableIndex: number, bbox: [number, number, number, number]) => {
|
||||
commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? { ...t, bbox } : t)))
|
||||
scheduleReextract(tableIndex)
|
||||
}
|
||||
|
||||
// Set/clear the header row of a table; the backend re-derives the column mapping.
|
||||
const onSetHeader = (tableIndex: number, headerIndex: number | null) => {
|
||||
commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? { ...t, header_index: headerIndex } : t)))
|
||||
const target = tablesRef.current[tableIndex]
|
||||
setHeaderCall({
|
||||
statement_import_id: data.doc.name,
|
||||
page: target.page,
|
||||
table_index: target.table_index,
|
||||
header_index: headerIndex ?? -1,
|
||||
})
|
||||
.then((res) => {
|
||||
commitTables(res?.message?.pdf_tables ?? [])
|
||||
mutate()
|
||||
})
|
||||
.catch(() => toast.error(_('Could not update the header row.')))
|
||||
}
|
||||
|
||||
if (tables.length === 0) {
|
||||
return (
|
||||
<div className="p-4">
|
||||
<Paragraph className="text-p-sm text-ink-gray-5">
|
||||
{_('No tables were extracted from this PDF.')}
|
||||
</Paragraph>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-3 p-4">
|
||||
<div className="flex flex-col gap-1">
|
||||
<H3 className="text-base border-0 p-0">{_('Detected Tables')}</H3>
|
||||
<Paragraph className="text-p-sm">
|
||||
{_('Review each page. In the Table view, map each column, click a row number to set/clear the header row, and exclude anything that is not transactions (ads, summaries).')}
|
||||
</Paragraph>
|
||||
</div>
|
||||
|
||||
{error && <ErrorBanner error={error} />}
|
||||
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<Tabs value={viewMode} onValueChange={(v) => setViewMode(v as 'pdf' | 'table')}>
|
||||
<TabsList variant="subtle">
|
||||
<TabsTrigger value="pdf"><FileTextIcon />{_('PDF')}</TabsTrigger>
|
||||
<TabsTrigger value="table"><TableIcon />{_('Table')}</TabsTrigger>
|
||||
</TabsList>
|
||||
</Tabs>
|
||||
|
||||
<div className="flex items-center gap-1">
|
||||
{busy && (
|
||||
<span className="flex items-center gap-1 pe-1 text-xs text-ink-gray-5">
|
||||
<Loader2Icon className="size-3 animate-spin" />
|
||||
{reextracting ? _('Re-extracting') : _('Saving')}
|
||||
</span>
|
||||
)}
|
||||
<Button
|
||||
variant="ghost"
|
||||
isIconButton
|
||||
disabled={pageIndex === 0}
|
||||
onClick={() => setPageIndex((i) => Math.max(0, i - 1))}
|
||||
>
|
||||
<ChevronLeftIcon />
|
||||
</Button>
|
||||
<span className="min-w-24 text-center text-sm text-ink-gray-7">
|
||||
{_('Page {0} of {1}', [currentPage.toString(), pages.length.toString()])}
|
||||
</span>
|
||||
<Button
|
||||
variant="ghost"
|
||||
isIconButton
|
||||
disabled={pageIndex >= pages.length - 1}
|
||||
onClick={() => setPageIndex((i) => Math.min(pages.length - 1, i + 1))}
|
||||
>
|
||||
<ChevronRightIcon />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{viewMode === 'pdf' ? (
|
||||
<PageView
|
||||
pageTables={pageTables}
|
||||
disabled={isCompleted}
|
||||
onToggleIncluded={onToggleIncluded}
|
||||
onBboxCommit={onBboxCommit}
|
||||
/>
|
||||
) : (
|
||||
<div className="flex flex-col gap-4">
|
||||
{pageTables.map(({ table, index }, position) => {
|
||||
const isCollapsed = collapsed.has(index)
|
||||
return (
|
||||
<div
|
||||
key={index}
|
||||
className={cn('flex flex-col rounded border border-outline-gray-2', !table.included && 'opacity-60')}
|
||||
>
|
||||
<div className="flex items-center justify-between p-2">
|
||||
<span className="ps-1 text-sm font-medium text-ink-gray-8">
|
||||
{_('Table {0}', [(position + 1).toString()])}
|
||||
</span>
|
||||
<div className="flex items-center gap-2">
|
||||
<IncludeToggle
|
||||
id={`tbl-${index}`}
|
||||
checked={table.included}
|
||||
disabled={isCompleted}
|
||||
onCheckedChange={(c) => onToggleIncluded(index, c)}
|
||||
/>
|
||||
<Button variant="ghost" size="sm" isIconButton onClick={() => toggleCollapsed(index)}>
|
||||
<ChevronDownIcon className={cn('transition-transform', isCollapsed && '-rotate-90')} />
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
{!isCollapsed && (
|
||||
<div className="overflow-auto border-t border-outline-gray-2">
|
||||
<RawTableGrid
|
||||
rows={table.rows}
|
||||
columnMapping={columnMappingRecord(table)}
|
||||
headerIndex={table.header_index}
|
||||
editable
|
||||
disabled={isCompleted}
|
||||
onChangeMapping={(columnIndex, mapsTo) => onChangeMapping(index, columnIndex, mapsTo)}
|
||||
onSetHeader={(rowIndex) => onSetHeader(index, rowIndex)}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
type PageViewProps = {
|
||||
pageTables: { table: PDFTable; index: number }[]
|
||||
disabled: boolean
|
||||
onToggleIncluded: (tableIndex: number, included: boolean) => void
|
||||
onBboxCommit: (tableIndex: number, bbox: [number, number, number, number]) => void
|
||||
}
|
||||
|
||||
const PageView = ({ pageTables, disabled, onToggleIncluded, onBboxCommit }: PageViewProps) => {
|
||||
const containerRef = useRef<HTMLDivElement>(null)
|
||||
const pageImage = pageTables[0]?.table.page_image
|
||||
const pageWidth = pageTables[0]?.table.page_width ?? 1
|
||||
const pageHeight = pageTables[0]?.table.page_height ?? 1
|
||||
|
||||
if (!pageImage) {
|
||||
return (
|
||||
<Paragraph className="text-p-sm text-ink-gray-5">
|
||||
{_('No page image is available for this page.')}
|
||||
</Paragraph>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col gap-3">
|
||||
{!disabled && (
|
||||
<Paragraph className="text-xs text-ink-gray-5">
|
||||
{_('Drag a box to move it, or drag a corner to resize. The table is re-read from the new region automatically.')}
|
||||
</Paragraph>
|
||||
)}
|
||||
<div ref={containerRef} className="relative w-full overflow-auto rounded border border-outline-gray-2 bg-surface-gray-1">
|
||||
<img src={pageImage} alt={_('Page preview')} className="w-full" />
|
||||
{pageTables.map(({ table, index }, position) => {
|
||||
const color = OVERLAY_COLORS[position % OVERLAY_COLORS.length]
|
||||
return (
|
||||
<BBoxOverlay
|
||||
key={index}
|
||||
bbox={table.bbox}
|
||||
pageWidth={pageWidth}
|
||||
pageHeight={pageHeight}
|
||||
color={color}
|
||||
label={_('Table {0}', [(position + 1).toString()])}
|
||||
included={table.included}
|
||||
disabled={disabled}
|
||||
containerRef={containerRef}
|
||||
onCommit={(bbox) => onBboxCommit(index, bbox)}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-1.5">
|
||||
{pageTables.map(({ table, index }, position) => {
|
||||
const color = OVERLAY_COLORS[position % OVERLAY_COLORS.length]
|
||||
return (
|
||||
<div key={index} className="flex items-center justify-between rounded border border-outline-gray-2 px-2 py-1.5">
|
||||
<div className="flex items-center gap-2">
|
||||
<span className={cn('size-3 rounded-sm', color.swatch)} />
|
||||
<span className="text-xs">{_('Table {0}', [(position + 1).toString()])}</span>
|
||||
</div>
|
||||
<IncludeToggle
|
||||
id={`pdf-tbl-${index}`}
|
||||
checked={table.included}
|
||||
disabled={disabled}
|
||||
onCheckedChange={(c) => onToggleIncluded(index, c)}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
const IncludeToggle = ({
|
||||
id,
|
||||
checked,
|
||||
disabled,
|
||||
onCheckedChange,
|
||||
}: {
|
||||
id: string
|
||||
checked: boolean
|
||||
disabled: boolean
|
||||
onCheckedChange: (checked: boolean) => void
|
||||
}) => (
|
||||
<div className="flex items-center gap-2">
|
||||
<Label htmlFor={id} className="text-xs text-ink-gray-6">{_('Include')}</Label>
|
||||
<Switch id={id} checked={checked} disabled={disabled} onCheckedChange={onCheckedChange} />
|
||||
</div>
|
||||
)
|
||||
|
||||
export default PDFTableEditor
|
||||
@@ -0,0 +1,222 @@
|
||||
import { useMemo } from 'react'
|
||||
import {
|
||||
ArrowDownRightIcon,
|
||||
ArrowUpDownIcon,
|
||||
ArrowUpRightIcon,
|
||||
BanknoteIcon,
|
||||
CalendarIcon,
|
||||
DollarSignIcon,
|
||||
FileTextIcon,
|
||||
ListIcon,
|
||||
ReceiptIcon,
|
||||
} from 'lucide-react'
|
||||
import _ from '@/lib/translate'
|
||||
import { cn } from '@/lib/utils'
|
||||
import { Table, TableBody, TableCell, TableHead, TableRow } from '@/components/ui/table'
|
||||
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'
|
||||
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
|
||||
import { COLUMN_MAPS_TO_OPTIONS, ColumnMapsTo } from './import_utils'
|
||||
|
||||
const AMOUNT_COLUMNS: ColumnMapsTo[] = ['Amount', 'Withdrawal', 'Deposit', 'Balance']
|
||||
const DATE_LIKE = /\d{1,4}[/\-.\s]\d{1,2}[/\-.\s]\d{1,4}|\d{1,2}[\s-][a-z]{3}/i
|
||||
|
||||
type Props = {
|
||||
rows: string[][]
|
||||
/** Column index -> mapped field */
|
||||
columnMapping: Record<number, ColumnMapsTo>
|
||||
headerIndex: number | null
|
||||
editable?: boolean
|
||||
disabled?: boolean
|
||||
onChangeMapping?: (columnIndex: number, mapsTo: ColumnMapsTo) => void
|
||||
/** Set the header row (or null to mark the table as having no header). */
|
||||
onSetHeader?: (rowIndex: number | null) => void
|
||||
}
|
||||
|
||||
/**
|
||||
* A preview of extracted rows with CSV-style colour coding: the header row is highlighted,
|
||||
* detected transaction rows are green, and mapped columns are emphasised. When `editable`, a
|
||||
* compact row of column -> field dropdowns sits at the top, and row numbers can be clicked to
|
||||
* set/clear the header row.
|
||||
*/
|
||||
const RawTableGrid = ({ rows, columnMapping, headerIndex, editable, disabled, onChangeMapping, onSetHeader }: Props) => {
|
||||
// Tabular (XLSX) cells can be numbers/dates, not strings - coerce so .trim()/render are safe.
|
||||
const stringRows = useMemo(
|
||||
() => rows.map((row) => row.map((cell) => (cell == null ? '' : String(cell)))),
|
||||
[rows],
|
||||
)
|
||||
const numColumns = useMemo(() => stringRows.reduce((max, row) => Math.max(max, row.length), 0), [stringRows])
|
||||
|
||||
const validColumns = useMemo(
|
||||
() => Object.entries(columnMapping).filter(([, m]) => m && m !== 'Do not import').map(([i]) => Number(i)),
|
||||
[columnMapping],
|
||||
)
|
||||
const dateColumn = useMemo(() => Object.entries(columnMapping).find(([, m]) => m === 'Date')?.[0], [columnMapping])
|
||||
const amountColumns = useMemo(
|
||||
() => Object.entries(columnMapping).filter(([, m]) => ['Amount', 'Withdrawal', 'Deposit'].includes(m)).map(([i]) => Number(i)),
|
||||
[columnMapping],
|
||||
)
|
||||
|
||||
// Approximate the backend's transaction-row detection so the highlighting tracks edits live.
|
||||
const transactionRows = useMemo(() => {
|
||||
const set = new Set<number>()
|
||||
if (dateColumn === undefined) return set
|
||||
const dateIdx = Number(dateColumn)
|
||||
stringRows.forEach((row, index) => {
|
||||
if (index === headerIndex) return
|
||||
const dateCell = (row[dateIdx] ?? '').trim()
|
||||
if (!dateCell || !DATE_LIKE.test(dateCell)) return
|
||||
if (amountColumns.some((c) => (row[c] ?? '').trim() !== '')) set.add(index)
|
||||
})
|
||||
return set
|
||||
}, [stringRows, headerIndex, dateColumn, amountColumns])
|
||||
|
||||
return (
|
||||
<Table containerClassName="rounded-none">
|
||||
<TableBody>
|
||||
{editable && (
|
||||
<TableRow className="border-b border-outline-gray-2 bg-surface-white hover:bg-surface-white">
|
||||
<TableHead className="w-8 p-1" />
|
||||
{Array.from({ length: numColumns }).map((_unused, columnIndex) => (
|
||||
<TableHead key={columnIndex} className="p-1 align-top">
|
||||
<Select
|
||||
disabled={disabled}
|
||||
value={columnMapping[columnIndex] ?? 'Do not import'}
|
||||
onValueChange={(value) => onChangeMapping?.(columnIndex, value as ColumnMapsTo)}
|
||||
>
|
||||
<SelectTrigger variant="outline" inputSize="sm" className="h-7 w-full">
|
||||
<SelectValue />
|
||||
</SelectTrigger>
|
||||
<SelectContent>
|
||||
{COLUMN_MAPS_TO_OPTIONS.map((option) => (
|
||||
<SelectItem key={option} value={option}>
|
||||
<span className="flex items-center gap-1.5">
|
||||
<ColumnHeaderIcon columnType={option} />
|
||||
{_(option)}
|
||||
</span>
|
||||
</SelectItem>
|
||||
))}
|
||||
</SelectContent>
|
||||
</Select>
|
||||
</TableHead>
|
||||
))}
|
||||
</TableRow>
|
||||
)}
|
||||
|
||||
{stringRows.map((row, index) => {
|
||||
const isHeaderRow = index === headerIndex
|
||||
const isTransactionRow = transactionRows.has(index)
|
||||
|
||||
return (
|
||||
<TableRow
|
||||
key={index}
|
||||
className={cn({
|
||||
'bg-green-50 hover:bg-green-50 dark:bg-green-700 dark:hover:bg-green-700': isTransactionRow,
|
||||
'bg-yellow-100 hover:bg-yellow-100 dark:bg-yellow-400': isHeaderRow,
|
||||
'text-ink-gray-5/70': !isTransactionRow && !isHeaderRow,
|
||||
})}
|
||||
>
|
||||
{editable && onSetHeader ? (
|
||||
<TableCell className="h-px w-8 p-0 text-center">
|
||||
<Tooltip>
|
||||
<TooltipTrigger asChild>
|
||||
<button
|
||||
type="button"
|
||||
disabled={disabled}
|
||||
onClick={() => onSetHeader(isHeaderRow ? null : index)}
|
||||
className={cn(
|
||||
'flex h-full w-full items-center justify-center px-1 text-ink-gray-6 hover:bg-surface-gray-3',
|
||||
isHeaderRow && 'font-semibold text-ink-gray-8',
|
||||
)}
|
||||
>
|
||||
{index + 1}
|
||||
</button>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
{isHeaderRow
|
||||
? _('This is the header row. Click to mark the table as having no header.')
|
||||
: _('Click to set this as the header row.')}
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TableCell>
|
||||
) : (
|
||||
<TableCell className="w-8 px-1 py-0.5 text-center text-ink-gray-6">{index + 1}</TableCell>
|
||||
)}
|
||||
|
||||
{Array.from({ length: numColumns }).map((_unused, cellIndex) => {
|
||||
const columnType = columnMapping[cellIndex]
|
||||
const isValidColumn = validColumns.includes(cellIndex)
|
||||
const isAmountColumn = AMOUNT_COLUMNS.includes(columnType)
|
||||
const cellText = row[cellIndex] ?? ''
|
||||
|
||||
// Read-only header row: icon + label.
|
||||
if (isHeaderRow) {
|
||||
return (
|
||||
<TableCell key={cellIndex} className="max-w-[200px] overflow-hidden text-ellipsis py-1">
|
||||
<div className="flex items-center gap-1 px-1 text-xs font-medium text-ink-gray-8">
|
||||
{columnType && (
|
||||
<Tooltip>
|
||||
<TooltipTrigger>
|
||||
<ColumnHeaderIcon columnType={columnType} />
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>{_(columnType)}</TooltipContent>
|
||||
</Tooltip>
|
||||
)}
|
||||
{cellText}
|
||||
</div>
|
||||
</TableCell>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<TableCell
|
||||
key={cellIndex}
|
||||
className={cn('max-w-[200px] overflow-hidden text-ellipsis py-0.5', {
|
||||
'bg-green-100 dark:bg-green-400 hover:bg-green-100 dark:hover:bg-green-400': isValidColumn && isTransactionRow,
|
||||
'text-ink-gray-5': !isValidColumn && isTransactionRow,
|
||||
})}
|
||||
>
|
||||
<div
|
||||
className={cn('min-h-5 flex items-center px-1 text-xs', {
|
||||
'justify-end': isAmountColumn && isValidColumn && isTransactionRow,
|
||||
})}
|
||||
title={cellText}
|
||||
>
|
||||
{cellText}
|
||||
</div>
|
||||
</TableCell>
|
||||
)
|
||||
})}
|
||||
</TableRow>
|
||||
)
|
||||
})}
|
||||
</TableBody>
|
||||
</Table>
|
||||
)
|
||||
}
|
||||
|
||||
const ColumnHeaderIcon = ({ columnType }: { columnType?: ColumnMapsTo }) => {
|
||||
switch (columnType) {
|
||||
case 'Amount':
|
||||
return <DollarSignIcon className="size-4" />
|
||||
case 'Withdrawal':
|
||||
return <ArrowUpRightIcon className="size-4 text-ink-red-3" />
|
||||
case 'Deposit':
|
||||
return <ArrowDownRightIcon className="size-4 text-ink-green-3" />
|
||||
case 'Balance':
|
||||
return <BanknoteIcon className="size-4" />
|
||||
case 'Date':
|
||||
return <CalendarIcon className="size-4" />
|
||||
case 'Description':
|
||||
return <FileTextIcon className="size-4" />
|
||||
case 'Reference':
|
||||
return <ReceiptIcon className="size-4" />
|
||||
case 'Transaction Type':
|
||||
return <ListIcon className="size-4" />
|
||||
case 'Debit/Credit':
|
||||
return <ArrowUpDownIcon className="size-4" />
|
||||
default:
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export default RawTableGrid
|
||||
@@ -1,6 +1,97 @@
|
||||
import { BankStatementImportLog } from "@/types/Accounts/BankStatementImportLog"
|
||||
import { useFrappeGetCall } from "frappe-react-sdk"
|
||||
import { useFrappeGetCall, useFrappePostCall } from "frappe-react-sdk"
|
||||
|
||||
export type ColumnMapsTo =
|
||||
| "Do not import"
|
||||
| "Date"
|
||||
| "Withdrawal"
|
||||
| "Deposit"
|
||||
| "Amount"
|
||||
| "Description"
|
||||
| "Reference"
|
||||
| "Transaction Type"
|
||||
| "Debit/Credit"
|
||||
| "Balance"
|
||||
| "Included Fee"
|
||||
| "Excluded Fee"
|
||||
| "Party Name/Account Holder"
|
||||
| "Party Account No."
|
||||
| "Party IBAN"
|
||||
|
||||
export type ColumnMappingEntry = {
|
||||
index: number
|
||||
maps_to: ColumnMapsTo | string
|
||||
header_text?: string
|
||||
variable?: string
|
||||
}
|
||||
|
||||
/** Apply a column mapping change, clearing the same mapping from any other column. */
|
||||
export function applyColumnMappingChange<T extends ColumnMappingEntry>(
|
||||
columns: T[],
|
||||
columnIndex: number,
|
||||
mapsTo: ColumnMapsTo,
|
||||
): T[] {
|
||||
const previous = columns.find((c) => c.index === columnIndex)
|
||||
const cleared =
|
||||
mapsTo === "Do not import"
|
||||
? columns
|
||||
: columns.map((c) =>
|
||||
c.index !== columnIndex && c.maps_to === mapsTo
|
||||
? { ...c, maps_to: "Do not import" as ColumnMapsTo }
|
||||
: c,
|
||||
)
|
||||
|
||||
return [
|
||||
...cleared.filter((c) => c.index !== columnIndex),
|
||||
{
|
||||
index: columnIndex,
|
||||
maps_to: mapsTo,
|
||||
header_text: previous?.header_text ?? "",
|
||||
variable: previous?.variable ?? `column_${columnIndex}`,
|
||||
} as T,
|
||||
].sort((a, b) => a.index - b.index)
|
||||
}
|
||||
|
||||
export const COLUMN_MAPS_TO_OPTIONS: ColumnMapsTo[] = [
|
||||
"Do not import",
|
||||
"Date",
|
||||
"Description",
|
||||
"Reference",
|
||||
"Withdrawal",
|
||||
"Deposit",
|
||||
"Amount",
|
||||
"Balance",
|
||||
"Debit/Credit",
|
||||
"Transaction Type",
|
||||
"Included Fee",
|
||||
"Excluded Fee",
|
||||
"Party Name/Account Holder",
|
||||
"Party Account No.",
|
||||
"Party IBAN",
|
||||
]
|
||||
|
||||
export interface PDFTableColumn {
|
||||
index: number
|
||||
header_text: string
|
||||
variable?: string
|
||||
maps_to: ColumnMapsTo
|
||||
}
|
||||
|
||||
export interface PDFTable {
|
||||
page: number
|
||||
table_index: number
|
||||
bbox: [number, number, number, number]
|
||||
page_width: number
|
||||
page_height: number
|
||||
page_image: string | null
|
||||
render_scale: number | null
|
||||
rows: string[][]
|
||||
header_index: number | null
|
||||
column_mapping: PDFTableColumn[]
|
||||
date_format?: string
|
||||
amount_format?: string
|
||||
included: boolean
|
||||
}
|
||||
|
||||
export interface GetStatementDetailsResponse {
|
||||
doc: BankStatementImportLog,
|
||||
@@ -30,6 +121,7 @@ export interface GetStatementDetailsResponse {
|
||||
date_format: string,
|
||||
raw_data: Array<Array<string>>,
|
||||
currency: string,
|
||||
pdf_tables?: PDFTable[],
|
||||
}
|
||||
|
||||
export const useGetStatementDetails = (id: string) => {
|
||||
@@ -39,4 +131,24 @@ export const useGetStatementDetails = (id: string) => {
|
||||
revalidateOnFocus: false
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
export const useUpdatePDFTables = () => {
|
||||
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.update_pdf_tables")
|
||||
}
|
||||
|
||||
export const useReextractPDFTable = () => {
|
||||
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.reextract_pdf_table")
|
||||
}
|
||||
|
||||
export const useSetPDFTableHeader = () => {
|
||||
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.set_pdf_table_header")
|
||||
}
|
||||
|
||||
export const useUpdateColumnMapping = () => {
|
||||
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.update_column_mapping")
|
||||
}
|
||||
|
||||
export const useSetHeaderIndex = () => {
|
||||
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.set_header_index")
|
||||
}
|
||||
@@ -231,7 +231,7 @@ export const FileTypeIcon = ({
|
||||
const getTextColor = () => {
|
||||
switch (fileType.toLowerCase()) {
|
||||
case 'pdf':
|
||||
return 'text-red-700'
|
||||
return 'text-ink-red-3'
|
||||
case 'doc':
|
||||
case 'docx':
|
||||
return 'text-[#1A5CBD]'
|
||||
|
||||
@@ -7,6 +7,7 @@ import { Dialog, DialogClose, DialogContent, DialogDescription, DialogFooter, Di
|
||||
import { Empty, EmptyHeader, EmptyMedia, EmptyTitle } from "@/components/ui/empty"
|
||||
import ErrorBanner from "@/components/ui/error-banner"
|
||||
import { FileDropzone } from "@/components/ui/file-dropzone"
|
||||
import { Input } from "@/components/ui/input"
|
||||
import { Label } from "@/components/ui/label"
|
||||
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
|
||||
import { H3, Paragraph } from "@/components/ui/typography"
|
||||
@@ -16,7 +17,7 @@ import { flt, formatCurrency } from "@/lib/numbers"
|
||||
import _ from "@/lib/translate"
|
||||
import { cn } from "@/lib/utils"
|
||||
import { BankStatementImportLog } from "@/types/Accounts/BankStatementImportLog"
|
||||
import { useFrappeCreateDoc, useFrappeFileUpload, useFrappeGetDocList } from "frappe-react-sdk"
|
||||
import { useFrappeCreateDoc, useFrappeFileUpload, useFrappeGetDocList, useFrappeUpdateDoc } from "frappe-react-sdk"
|
||||
import { useAtom, useAtomValue } from "jotai"
|
||||
import { ListIcon, Loader2Icon } from "lucide-react"
|
||||
import { useState } from "react"
|
||||
@@ -30,11 +31,15 @@ const BankStatementImporter = () => {
|
||||
const [selectedBankAccount] = useAtom(selectedBankAccountAtom)
|
||||
|
||||
const [files, setFiles] = useState<File[]>([])
|
||||
const [password, setPassword] = useState("")
|
||||
|
||||
const { upload, error, loading } = useFrappeFileUpload()
|
||||
|
||||
const navigate = useNavigate()
|
||||
const { createDoc, loading: createLoading, error: createError } = useFrappeCreateDoc<BankStatementImportLog>()
|
||||
const { updateDoc, error: updateError } = useFrappeUpdateDoc()
|
||||
|
||||
const isPdf = files[0]?.name?.toLowerCase().endsWith(".pdf") ?? false
|
||||
|
||||
const onUpload = () => {
|
||||
|
||||
@@ -44,12 +49,18 @@ const BankStatementImporter = () => {
|
||||
|
||||
const id = `new-bank-statement-import-log-${Date.now()}`
|
||||
|
||||
upload(files[0], {
|
||||
// For protected PDFs, persist the password on the Bank Account so it is reused for
|
||||
// every statement of this account (and is available before the import doc is created).
|
||||
const ensurePassword = isPdf && password
|
||||
? updateDoc("Bank Account", selectedBankAccount.name, { statement_password: password })
|
||||
: Promise.resolve()
|
||||
|
||||
ensurePassword.then(() => upload(files[0], {
|
||||
isPrivate: true,
|
||||
doctype: "Bank Statement Import Log",
|
||||
docname: id,
|
||||
fieldname: 'file'
|
||||
}).then((file) => {
|
||||
})).then((file) => {
|
||||
return createDoc("Bank Statement Import Log",
|
||||
// @ts-expect-error - not filling everything else
|
||||
{
|
||||
@@ -67,6 +78,7 @@ const BankStatementImporter = () => {
|
||||
<div className="w-[52%]">
|
||||
{error && <ErrorBanner error={error} />}
|
||||
{createError && <ErrorBanner error={createError} />}
|
||||
{updateError && <ErrorBanner error={updateError} />}
|
||||
<div className="py-2 flex flex-col gap-6">
|
||||
<div className="flex flex-col gap-2">
|
||||
<Label>{_("Company")}<span className="text-ink-red-3">*</span></Label>
|
||||
@@ -89,7 +101,7 @@ const BankStatementImporter = () => {
|
||||
data-slot="form-description"
|
||||
className={cn("text-ink-gray-5 text-xs")}
|
||||
>
|
||||
{_("Upload your bank statement file to start the import process. We support CSV, and XLSX files.")}
|
||||
{_("Upload your bank statement file to start the import process. We support CSV, XLSX and PDF files.")}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
@@ -105,10 +117,27 @@ const BankStatementImporter = () => {
|
||||
'text/csv': ['.csv'],
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
|
||||
'application/vnd.ms-excel': ['.xls'],
|
||||
'application/pdf': ['.pdf'],
|
||||
// 'application/xml': ['.xml'],
|
||||
}}
|
||||
multiple={false}
|
||||
/>
|
||||
|
||||
{isPdf && <div className="flex flex-col gap-2">
|
||||
<Label htmlFor="pdf-password">{_("PDF Password")}</Label>
|
||||
<Input
|
||||
id="pdf-password"
|
||||
type="password"
|
||||
autoComplete="off"
|
||||
value={password}
|
||||
onChange={(e) => setPassword(e.target.value)}
|
||||
placeholder={_("Only if the PDF is password protected")}
|
||||
className="max-w-sm"
|
||||
/>
|
||||
<p data-slot="form-description" className={cn("text-ink-gray-5 text-p-sm")}>
|
||||
{_("Leave blank to use the password already saved for this bank account (if any). It is stored encrypted and reused for future statements.")}
|
||||
</p>
|
||||
</div>}
|
||||
</div>}
|
||||
<div className="flex justify-end px-4">
|
||||
<Button
|
||||
@@ -137,9 +166,10 @@ const StatementInstructions = () => {
|
||||
<DialogContent className="min-w-7xl">
|
||||
<DialogHeader>
|
||||
<DialogTitle>{_("Statement Import Instructions")}</DialogTitle>
|
||||
<DialogDescription>{_("We support uploading CSV, XLSX and XLS files. Please make sure the file contains the correct columns.")}</DialogDescription>
|
||||
<DialogDescription>{_("We support uploading CSV, XLSX, XLS and PDF files. Please make sure the file contains the correct columns.")}</DialogDescription>
|
||||
</DialogHeader>
|
||||
<Paragraph className="text-sm">{_("The file should contain the following columns with a distinct header row. You can upload most bank statements as is without changing the columns.")}</Paragraph>
|
||||
<Paragraph className="text-sm text-ink-gray-6">{_("For PDF statements, we auto-detect the tables on each page. You can then confirm each detected table, map its columns, and exclude anything that is not transactions (e.g. ads or summaries). Password-protected PDFs are supported - the password is saved on the bank account and reused.")}</Paragraph>
|
||||
<Table>
|
||||
<TableHeader>
|
||||
<TableRow>
|
||||
@@ -231,7 +261,13 @@ const StatementImportLog = () => {
|
||||
<TableRow key={item.name} onClick={() => onViewDetails(item.name)} className="cursor-pointer hover:bg-surface-gray-2">
|
||||
<TableCell>{formatDate(item.creation, 'Do MMM YYYY')}</TableCell>
|
||||
<TableCell><Badge theme={item.status === "Completed" ? "green" : "gray"}>{item.status}</Badge></TableCell>
|
||||
<TableCell>{formatDate(item.start_date, 'Do MMM YYYY')} to {formatDate(item.end_date, 'Do MMM YYYY')}</TableCell>
|
||||
<TableCell>
|
||||
{item.start_date && item.end_date ? (
|
||||
<span>{formatDate(item.start_date, 'Do MMM YYYY')} to {formatDate(item.end_date, 'Do MMM YYYY')}</span>
|
||||
) : (
|
||||
<span>-</span>
|
||||
)}
|
||||
</TableCell>
|
||||
<TableCell className="text-end">{item.number_of_transactions}</TableCell>
|
||||
<TableCell className="text-end font-numeric">{formatCurrency(flt(item.closing_balance, 2))}</TableCell>
|
||||
<TableCell><a
|
||||
|
||||
@@ -9,12 +9,13 @@ import { ChevronLeftIcon, ChevronRightIcon } from 'lucide-react'
|
||||
import { Link, useParams } from 'react-router'
|
||||
|
||||
const CSVImport = lazy(() => import('@/components/features/BankStatementImporter/CSV/CSVImport'))
|
||||
const PDFImport = lazy(() => import('@/components/features/BankStatementImporter/PDF/PDFImport'))
|
||||
|
||||
const ViewBankStatementImportLog = () => {
|
||||
|
||||
const { id } = useParams<{ id: string }>()
|
||||
|
||||
const { data, isLoading, error } = useGetStatementDetails(id ?? "")
|
||||
const { data, isLoading, error, mutate } = useGetStatementDetails(id ?? "")
|
||||
|
||||
useFrappeDocumentEventListener("Bank Statement Import Log", id ?? "", () => {
|
||||
})
|
||||
@@ -42,7 +43,13 @@ const ViewBankStatementImportLog = () => {
|
||||
<ErrorBanner error={error} />
|
||||
</div>
|
||||
}
|
||||
return <CSVImport data={data} />
|
||||
const isPdf = data.message.doc.file?.toLowerCase().endsWith('.pdf')
|
||||
|
||||
if (isPdf) {
|
||||
return <PDFImport data={data} mutate={mutate} />
|
||||
}
|
||||
|
||||
return <CSVImport data={data} mutate={mutate} />
|
||||
}
|
||||
|
||||
export default ViewBankStatementImportLog
|
||||
@@ -38,6 +38,8 @@ export interface BankAccount{
|
||||
branch_code?: string
|
||||
/** Bank Account No : Data */
|
||||
bank_account_no?: string
|
||||
/** Statement PDF Password : Password - Password used to open password-protected PDF statements for this account. Stored encrypted. */
|
||||
statement_password?: string
|
||||
/** Is Credit Card : Check */
|
||||
is_credit_card?: 0 | 1
|
||||
/** Integration ID : Data */
|
||||
|
||||
@@ -47,4 +47,6 @@ export interface BankStatementImportLog {
|
||||
detected_transaction_ending_index?: number
|
||||
/** Column Mapping : Table - Bank Statement Import Log Column Map */
|
||||
column_mapping?: BankStatementImportLogColumnMap[]
|
||||
/** PDF Tables : JSON - Per-table extraction data for PDF statements */
|
||||
pdf_tables?: string
|
||||
}
|
||||
@@ -27,6 +27,7 @@
|
||||
"column_break_12",
|
||||
"branch_code",
|
||||
"bank_account_no",
|
||||
"statement_password",
|
||||
"address_and_contact",
|
||||
"address_html",
|
||||
"column_break_13",
|
||||
@@ -149,6 +150,12 @@
|
||||
"label": "Bank Account No",
|
||||
"length": 30
|
||||
},
|
||||
{
|
||||
"description": "Password used to open password-protected PDF statements for this account. Stored encrypted.",
|
||||
"fieldname": "statement_password",
|
||||
"fieldtype": "Password",
|
||||
"label": "Statement PDF Password"
|
||||
},
|
||||
{
|
||||
"fieldname": "address_and_contact",
|
||||
"fieldtype": "Section Break",
|
||||
|
||||
@@ -41,6 +41,7 @@ class BankAccount(Document):
|
||||
mask: DF.Data | None
|
||||
party: DF.DynamicLink | None
|
||||
party_type: DF.Link | None
|
||||
statement_password: DF.Password | None
|
||||
# end: auto-generated types
|
||||
|
||||
def onload(self):
|
||||
|
||||
@@ -28,7 +28,8 @@
|
||||
"detected_transaction_starting_index",
|
||||
"detected_transaction_ending_index",
|
||||
"section_break_yulq",
|
||||
"column_mapping"
|
||||
"column_mapping",
|
||||
"pdf_tables"
|
||||
],
|
||||
"fields": [
|
||||
{
|
||||
@@ -128,6 +129,13 @@
|
||||
"label": "Column Mapping",
|
||||
"options": "Bank Statement Import Log Column Map"
|
||||
},
|
||||
{
|
||||
"description": "Per-table extraction data for PDF statements (rows, bbox, page image, column mapping). Edited via the banking app.",
|
||||
"fieldname": "pdf_tables",
|
||||
"fieldtype": "JSON",
|
||||
"label": "PDF Tables",
|
||||
"read_only": 1
|
||||
},
|
||||
{
|
||||
"default": "Not Started",
|
||||
"fieldname": "status",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,7 +7,18 @@ from frappe.utils import getdate
|
||||
|
||||
from erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log import (
|
||||
BankStatementImportLog,
|
||||
build_table_transactions,
|
||||
detect_column_mapping,
|
||||
detect_header_row,
|
||||
extract_pdf_tables,
|
||||
get_float_amount,
|
||||
get_statement_details,
|
||||
guess_column_mapping_by_content,
|
||||
reextract_pdf_table,
|
||||
set_header_index,
|
||||
set_pdf_table_header,
|
||||
update_column_mapping,
|
||||
update_pdf_tables,
|
||||
)
|
||||
from erpnext.accounts.test.accounts_mixin import AccountsTestMixin
|
||||
from erpnext.tests.utils import ERPNextTestSuite
|
||||
@@ -113,6 +124,346 @@ class TestBankStatementImportLog(ERPNextTestSuite, AccountsTestMixin):
|
||||
self.assertIsNone(get_float_amount("ABCD"))
|
||||
self.assertIsNone(get_float_amount("****"))
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# PDF statement import
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
@staticmethod
|
||||
def _make_pdf(html: str) -> bytes:
|
||||
import pdfkit
|
||||
|
||||
return pdfkit.from_string(html, False)
|
||||
|
||||
@staticmethod
|
||||
def _encrypt(pdf_bytes: bytes, password: str) -> bytes:
|
||||
import io
|
||||
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
reader = PdfReader(io.BytesIO(pdf_bytes))
|
||||
writer = PdfWriter()
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
writer.encrypt(password)
|
||||
buffer = io.BytesIO()
|
||||
writer.write(buffer)
|
||||
return buffer.getvalue()
|
||||
|
||||
@staticmethod
|
||||
def _auto_map(table: dict) -> dict:
|
||||
"""Mimic prepare_pdf_tables' best-effort mapping for a single extracted table."""
|
||||
header_index, score = detect_header_row(table["rows"])
|
||||
if score >= 2:
|
||||
table["header_index"] = header_index
|
||||
table["column_mapping"] = detect_column_mapping(table["rows"][header_index])
|
||||
else:
|
||||
table["header_index"] = None
|
||||
table["column_mapping"] = guess_column_mapping_by_content(table["rows"])
|
||||
table["included"] = True
|
||||
return table
|
||||
|
||||
def test_pdf_multi_page_kept_separate_and_unioned(self):
|
||||
"""Tables on separate pages must NOT be merged; transactions are the union."""
|
||||
html = """
|
||||
<html><body>
|
||||
<table border="1"><tr><th>Date</th><th>Narration</th><th>Withdrawal</th><th>Deposit</th><th>Balance</th></tr>
|
||||
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td><td></td><td>9500.00</td></tr>
|
||||
<tr><td>03/04/2024</td><td>SALARY</td><td></td><td>20000.00</td><td>29500.00</td></tr></table>
|
||||
<div style="page-break-before: always"></div>
|
||||
<table border="1"><tr><th>Date</th><th>Narration</th><th>Withdrawal</th><th>Deposit</th><th>Balance</th></tr>
|
||||
<tr><td>05/04/2024</td><td>ATM WDL</td><td>2000.00</td><td></td><td>27500.00</td></tr></table>
|
||||
</body></html>
|
||||
"""
|
||||
tables = extract_pdf_tables(self._make_pdf(html))
|
||||
|
||||
# Two separate tables, one per page
|
||||
self.assertEqual(len(tables), 2)
|
||||
self.assertEqual(sorted(t["page"] for t in tables), [1, 2])
|
||||
for table in tables:
|
||||
self.assertIn("bbox", table)
|
||||
self.assertEqual(len(table["bbox"]), 4)
|
||||
|
||||
union = []
|
||||
for table in tables:
|
||||
final, _df, _af = build_table_transactions(self._auto_map(table))
|
||||
union.extend(final)
|
||||
|
||||
self.assertEqual(len(union), 3)
|
||||
self.assertEqual(sorted(t["date"] for t in union), ["2024-04-01", "2024-04-03", "2024-04-05"])
|
||||
|
||||
def test_pdf_junk_table_excluded(self):
|
||||
"""A non-transactions table (ad/summary) should yield zero transactions."""
|
||||
ad_table = self._auto_map({"rows": [["Open a new account!", "Call 1800-XYZ"]]})
|
||||
final, _df, _af = build_table_transactions(ad_table)
|
||||
self.assertEqual(final, [])
|
||||
|
||||
def test_headerless_content_mapping(self):
|
||||
"""Without a header row, columns are guessed from their contents."""
|
||||
rows = [
|
||||
["01/04/2024", "UPI PAYMENT", "500.00"],
|
||||
["03/04/2024", "SALARY CREDIT", "20000.00"],
|
||||
]
|
||||
mapping = {
|
||||
c["maps_to"]: c["index"]
|
||||
for c in guess_column_mapping_by_content(rows)
|
||||
if c["maps_to"] != "Do not import"
|
||||
}
|
||||
self.assertEqual(mapping.get("Date"), 0)
|
||||
self.assertEqual(mapping.get("Description"), 1)
|
||||
self.assertEqual(mapping.get("Amount"), 2)
|
||||
|
||||
def test_pdf_password_protected(self):
|
||||
"""Encrypted PDFs error without a password and succeed with the right one."""
|
||||
html = """
|
||||
<html><body><table border="1">
|
||||
<tr><th>Date</th><th>Narration</th><th>Amount</th></tr>
|
||||
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td></tr></table></body></html>
|
||||
"""
|
||||
encrypted = self._encrypt(self._make_pdf(html), "secret123")
|
||||
|
||||
# No / wrong password -> recognizable error
|
||||
self.assertRaises(frappe.ValidationError, extract_pdf_tables, encrypted)
|
||||
self.assertRaises(frappe.ValidationError, extract_pdf_tables, encrypted, "wrong")
|
||||
|
||||
# Correct password -> extracts
|
||||
tables = extract_pdf_tables(encrypted, "secret123")
|
||||
self.assertTrue(tables)
|
||||
|
||||
def test_pdf_no_tables_detected(self):
|
||||
"""A PDF with no detectable tables raises a clear error (e.g. scanned PDFs)."""
|
||||
html = "<html><body><p>Just some prose with no tabular data at all.</p></body></html>"
|
||||
self.assertRaises(frappe.ValidationError, extract_pdf_tables, self._make_pdf(html))
|
||||
|
||||
def _create_pdf_import_log(self, html: str) -> BankStatementImportLog:
|
||||
pdf_bytes = self._make_pdf(html)
|
||||
file_doc = frappe.get_doc(
|
||||
{
|
||||
"doctype": "File",
|
||||
"file_name": f"test-statement-{frappe.generate_hash(length=8)}.pdf",
|
||||
"is_private": 1,
|
||||
"content": pdf_bytes,
|
||||
}
|
||||
).insert(ignore_permissions=True)
|
||||
|
||||
doc = frappe.get_doc(
|
||||
{
|
||||
"doctype": "Bank Statement Import Log",
|
||||
"name": f"test-pdf-{frappe.generate_hash(length=8)}",
|
||||
"bank_account": self.bank_account,
|
||||
"file": file_doc.file_url,
|
||||
}
|
||||
)
|
||||
return doc.insert()
|
||||
|
||||
def test_pdf_full_lifecycle(self):
|
||||
"""End-to-end doc lifecycle: insert -> rasterize -> preview -> edit -> import."""
|
||||
html = """
|
||||
<html><body>
|
||||
<table border="1"><tr><th>Date</th><th>Narration</th><th>Withdrawal</th><th>Deposit</th><th>Balance</th></tr>
|
||||
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td><td></td><td>9500.00</td></tr>
|
||||
<tr><td>03/04/2024</td><td>SALARY</td><td></td><td>20000.00</td><td>29500.00</td></tr></table>
|
||||
<div style="page-break-before: always"></div>
|
||||
<table border="1"><tr><th>Date</th><th>Narration</th><th>Withdrawal</th><th>Deposit</th><th>Balance</th></tr>
|
||||
<tr><td>05/04/2024</td><td>ATM WDL</td><td>2000.00</td><td></td><td>27500.00</td></tr></table>
|
||||
</body></html>
|
||||
"""
|
||||
doc = self._create_pdf_import_log(html)
|
||||
|
||||
# before_insert populated the per-table JSON, page images and the union summary
|
||||
tables = doc.get_pdf_tables()
|
||||
self.assertEqual(len(tables), 2)
|
||||
for table in tables:
|
||||
self.assertTrue(table.get("page_image"))
|
||||
self.assertIn("bbox", table)
|
||||
# Page-image File must be attached to the final docname, not the client's temp id
|
||||
attached_to = frappe.db.get_value("File", {"file_url": table["page_image"]}, "attached_to_name")
|
||||
self.assertEqual(attached_to, doc.name)
|
||||
self.assertEqual(doc.number_of_transactions, 3)
|
||||
self.assertEqual(doc.total_debit_transactions, 2)
|
||||
self.assertEqual(doc.total_credit_transactions, 1)
|
||||
|
||||
# get_statement_details returns the union and the per-table data for the editor
|
||||
details = get_statement_details(doc.name)
|
||||
self.assertEqual(len(details["final_transactions"]), 3)
|
||||
self.assertEqual(details["raw_data"], [])
|
||||
self.assertEqual(len(details["pdf_tables"]), 2)
|
||||
|
||||
# Excluding the second table (page 2) drops its single transaction
|
||||
tables[1]["included"] = False
|
||||
update_pdf_tables(doc.name, tables)
|
||||
doc.reload()
|
||||
self.assertEqual(doc.number_of_transactions, 2)
|
||||
|
||||
# Re-include and import; transactions are created for the union
|
||||
tables[1]["included"] = True
|
||||
update_pdf_tables(doc.name, tables)
|
||||
doc.reload()
|
||||
doc.insert_transactions()
|
||||
doc.reload()
|
||||
self.assertEqual(doc.status, "Completed")
|
||||
|
||||
created = frappe.get_all(
|
||||
"Bank Transaction", filters={"bank_account": self.bank_account, "docstatus": 1}
|
||||
)
|
||||
self.assertEqual(len(created), 3)
|
||||
|
||||
def test_pdf_reextract_table_from_bbox(self):
|
||||
"""Re-extracting a table from an adjusted bbox updates its rows and stores the bbox."""
|
||||
html = """
|
||||
<html><body>
|
||||
<table border="1"><tr><th>Date</th><th>Narration</th><th>Amount</th></tr>
|
||||
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td></tr>
|
||||
<tr><td>03/04/2024</td><td>SALARY</td><td>20000.00</td></tr></table>
|
||||
</body></html>
|
||||
"""
|
||||
doc = self._create_pdf_import_log(html)
|
||||
table = doc.get_pdf_tables()[0]
|
||||
bbox = table["bbox"]
|
||||
|
||||
details = reextract_pdf_table(doc.name, table["page"], table["table_index"], bbox)
|
||||
updated = details["pdf_tables"][0]
|
||||
|
||||
# Same region -> same rows; bbox is persisted
|
||||
self.assertTrue(updated["rows"])
|
||||
self.assertEqual(updated["bbox"], [round(float(v), 2) for v in bbox])
|
||||
self.assertEqual(updated["rows"], table["rows"])
|
||||
|
||||
def test_pdf_reextract_changed_bbox_updates_rows_and_transactions(self):
|
||||
"""Shrinking a table's bbox must drop rows and update the transaction count end-to-end."""
|
||||
html = """
|
||||
<html><body>
|
||||
<table border="1"><tr><th>Date</th><th>Narration</th><th>Amount</th></tr>
|
||||
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td></tr>
|
||||
<tr><td>03/04/2024</td><td>SALARY</td><td>20000.00</td></tr>
|
||||
<tr><td>05/04/2024</td><td>ATM WDL</td><td>2000.00</td></tr>
|
||||
<tr><td>07/04/2024</td><td>INTEREST</td><td>12.50</td></tr></table>
|
||||
</body></html>
|
||||
"""
|
||||
doc = self._create_pdf_import_log(html)
|
||||
original = doc.get_pdf_tables()[0]
|
||||
original_rows = len(original["rows"])
|
||||
original_txns = doc.number_of_transactions
|
||||
|
||||
# Shrink the box to roughly the top half (simulating a user drag).
|
||||
x0, top, x1, bottom = original["bbox"]
|
||||
shrunk = [x0, top, x1, top + (bottom - top) * 0.5]
|
||||
|
||||
details = reextract_pdf_table(doc.name, original["page"], original["table_index"], shrunk)
|
||||
updated = details["pdf_tables"][0]
|
||||
doc.reload()
|
||||
|
||||
self.assertLess(len(updated["rows"]), original_rows)
|
||||
self.assertLess(doc.number_of_transactions, original_txns)
|
||||
self.assertEqual(len(details["final_transactions"]), doc.number_of_transactions)
|
||||
|
||||
def test_pdf_set_table_header(self):
|
||||
"""User can clear a table's header (no header row) or set a specific header row."""
|
||||
html = """
|
||||
<html><body>
|
||||
<table border="1"><tr><th>Date</th><th>Narration</th><th>Amount</th></tr>
|
||||
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td></tr>
|
||||
<tr><td>03/04/2024</td><td>SALARY</td><td>20000.00</td></tr></table>
|
||||
</body></html>
|
||||
"""
|
||||
doc = self._create_pdf_import_log(html)
|
||||
table = doc.get_pdf_tables()[0]
|
||||
self.assertEqual(table["header_index"], 0)
|
||||
original = {
|
||||
c["maps_to"]: c["index"] for c in table["column_mapping"] if c["maps_to"] != "Do not import"
|
||||
}
|
||||
|
||||
# Clear the header (-1): header is removed but the mapping is preserved (not re-guessed).
|
||||
details = set_pdf_table_header(doc.name, table["page"], table["table_index"], -1)
|
||||
updated = details["pdf_tables"][0]
|
||||
self.assertIsNone(updated["header_index"])
|
||||
preserved = {
|
||||
c["maps_to"]: c["index"] for c in updated["column_mapping"] if c["maps_to"] != "Do not import"
|
||||
}
|
||||
self.assertEqual(preserved, original)
|
||||
|
||||
# Set row 0 back as the header: it resolves meaningfully, so mapping is re-derived.
|
||||
details = set_pdf_table_header(doc.name, table["page"], table["table_index"], 0)
|
||||
updated = details["pdf_tables"][0]
|
||||
self.assertEqual(updated["header_index"], 0)
|
||||
mapped = {
|
||||
c["maps_to"]: c["index"] for c in updated["column_mapping"] if c["maps_to"] != "Do not import"
|
||||
}
|
||||
self.assertEqual(mapped.get("Date"), 0)
|
||||
self.assertEqual(mapped.get("Description"), 1)
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
# CSV/XLSX column mapping + header overrides
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def _create_csv_import_log(self, csv_text: str) -> BankStatementImportLog:
|
||||
file_doc = frappe.get_doc(
|
||||
{
|
||||
"doctype": "File",
|
||||
"file_name": f"test-statement-{frappe.generate_hash(length=8)}.csv",
|
||||
"is_private": 1,
|
||||
"content": csv_text,
|
||||
}
|
||||
).insert(ignore_permissions=True)
|
||||
|
||||
doc = frappe.get_doc(
|
||||
{
|
||||
"doctype": "Bank Statement Import Log",
|
||||
"bank_account": self.bank_account,
|
||||
"file": file_doc.file_url,
|
||||
}
|
||||
)
|
||||
return doc.insert()
|
||||
|
||||
def test_csv_update_column_mapping(self):
|
||||
"""Overriding the column mapping recomputes the transaction count."""
|
||||
csv_text = "Date,Narration,Amount\n01/04/2024,UPI PAYMENT,500.00\n03/04/2024,SALARY,20000.00\n"
|
||||
doc = self._create_csv_import_log(csv_text)
|
||||
self.assertEqual(doc.number_of_transactions, 2)
|
||||
|
||||
# Drop the amount column -> no amount -> no transactions detected.
|
||||
mapping = [
|
||||
{"index": c.index, "maps_to": "Do not import" if c.maps_to == "Amount" else c.maps_to}
|
||||
for c in doc.column_mapping
|
||||
]
|
||||
details = update_column_mapping(doc.name, mapping)
|
||||
doc.reload()
|
||||
self.assertEqual(doc.number_of_transactions, 0)
|
||||
self.assertEqual(len(details["final_transactions"]), 0)
|
||||
|
||||
def test_csv_set_header_index_preserves_mapping(self):
|
||||
"""Clearing the header keeps the user's mapping; it is not re-guessed."""
|
||||
csv_text = "Date,Narration,Amount\n01/04/2024,UPI PAYMENT,500.00\n03/04/2024,SALARY,20000.00\n"
|
||||
doc = self._create_csv_import_log(csv_text)
|
||||
self.assertEqual(doc.detected_header_index, 0)
|
||||
|
||||
# Manually map the Narration column (1) as Reference.
|
||||
mapping = [
|
||||
{
|
||||
"index": c.index,
|
||||
"maps_to": "Reference" if c.index == 1 else c.maps_to,
|
||||
"header_text": c.header_text,
|
||||
}
|
||||
for c in doc.column_mapping
|
||||
]
|
||||
update_column_mapping(doc.name, mapping)
|
||||
doc.reload()
|
||||
|
||||
# Clear the header row: the manual mapping must be preserved (column 1 stays Reference,
|
||||
# not re-guessed to Description). The label row fails date parsing, so 2 transactions remain.
|
||||
set_header_index(doc.name, -1)
|
||||
doc.reload()
|
||||
self.assertEqual(doc.detected_header_index, -1)
|
||||
self.assertEqual(doc.number_of_transactions, 2)
|
||||
current = {c.index: c.maps_to for c in doc.column_mapping}
|
||||
self.assertEqual(current.get(1), "Reference")
|
||||
|
||||
# Restore row 0 as the header (resolves meaningfully -> re-derived from labels).
|
||||
set_header_index(doc.name, 0)
|
||||
doc.reload()
|
||||
self.assertEqual(doc.detected_header_index, 0)
|
||||
restored = {c.maps_to: c.index for c in doc.column_mapping if c.maps_to != "Do not import"}
|
||||
self.assertEqual(restored.get("Description"), 1)
|
||||
|
||||
|
||||
test_hdfc_sample_statement_data = [
|
||||
["HDFC BANK Ltd. Page No .: 1 Statement of accounts", "", "", "", "", "", ""],
|
||||
|
||||
@@ -24,6 +24,11 @@ dependencies = [
|
||||
|
||||
# MT940 parser for bank statements
|
||||
"mt-940>=4.26.0",
|
||||
|
||||
# PDF bank statement table extraction + page rasterization.
|
||||
# Pulls pdfminer.six (parsing), Pillow, and pypdfium2 (render backend for
|
||||
# Page.to_image) - all pure-Python wheels, no system binaries required.
|
||||
"pdfplumber>=0.11.0",
|
||||
]
|
||||
|
||||
[build-system]
|
||||
|
||||
Reference in New Issue
Block a user