feat(banking): PDF statement importer and overriding column mapping (#55559)

* feat(banking): PDF statement importer

* feat(banking): allow users to override column mapping

* fix: store pending page images in flags
This commit is contained in:
Nikhil Kothari
2026-06-03 19:48:14 +05:30
committed by GitHub
parent 855eeb1078
commit 3294490040
19 changed files with 2238 additions and 420 deletions

View File

@@ -2,9 +2,7 @@ import CSVRawDataPreview from './CSVRawDataPreview'
import StatementDetails from './StatementDetails' import StatementDetails from './StatementDetails'
import { GetStatementDetailsResponse } from '../import_utils' import { GetStatementDetailsResponse } from '../import_utils'
const CSVImport = ({ data }: { data: { message: GetStatementDetailsResponse } }) => { const CSVImport = ({ data, mutate }: { data: { message: GetStatementDetailsResponse }, mutate: () => void }) => {
return ( return (
<div className="w-full flex"> <div className="w-full flex">
@@ -12,7 +10,7 @@ const CSVImport = ({ data }: { data: { message: GetStatementDetailsResponse } })
<StatementDetails data={data.message} /> <StatementDetails data={data.message} />
</div> </div>
<div className="w-[50%] border-s border-t pe-1 ps-0 border-outline-gray-2 h-[calc(100vh-72px)] overflow-scroll"> <div className="w-[50%] border-s border-t pe-1 ps-0 border-outline-gray-2 h-[calc(100vh-72px)] overflow-scroll">
<CSVRawDataPreview data={data.message} /> <CSVRawDataPreview data={data.message} mutate={mutate} />
</div> </div>
</div> </div>
) )

View File

@@ -1,151 +1,104 @@
import { Table, TableBody, TableCell, TableHead, TableRow } from "@/components/ui/table" import { useEffect, useRef, useState } from "react"
import { cn } from "@/lib/utils" import { toast } from "sonner"
import { ArrowDownRightIcon, ArrowUpDownIcon, ArrowUpRightIcon, BanknoteIcon, CalendarIcon, DollarSignIcon, FileTextIcon, ListIcon, ReceiptIcon } from "lucide-react"
import { Tooltip, TooltipContent, TooltipTrigger } from "@/components/ui/tooltip"
import _ from "@/lib/translate" import _ from "@/lib/translate"
import { GetStatementDetailsResponse } from "../import_utils" import RawTableGrid from "../RawTableGrid"
import { useMemo } from "react" import {
applyColumnMappingChange,
ColumnMapsTo,
GetStatementDetailsResponse,
useSetHeaderIndex,
useUpdateColumnMapping,
} from "../import_utils"
import { BankStatementImportLogColumnMap } from "@/types/Accounts/BankStatementImportLogColumnMap" import { BankStatementImportLogColumnMap } from "@/types/Accounts/BankStatementImportLogColumnMap"
type Mapping = Pick<BankStatementImportLogColumnMap, "index" | "maps_to" | "header_text" | "variable">
const CSVRawDataPreview = ({ data }: { data: GetStatementDetailsResponse }) => { const toMapping = (columns?: BankStatementImportLogColumnMap[]): Mapping[] =>
(columns ?? []).map((c) => ({
index: c.index,
maps_to: c.maps_to,
header_text: c.header_text,
variable: c.variable,
}))
const column_mapping: Record<StandardColumnTypes, number> = useMemo(() => { const headerToState = (index?: number) => (index != null && index >= 0 ? index : null)
const col_map: Record<string, number> = {} const CSVRawDataPreview = ({
data,
mutate,
}: {
data: GetStatementDetailsResponse
mutate: () => void
}) => {
const isCompleted = data.doc.status === "Completed"
data.doc.column_mapping?.forEach(col => { const [mapping, setMapping] = useState<Mapping[]>(() => toMapping(data.doc.column_mapping))
if (col.maps_to && col.maps_to !== "Do not import") { const [headerIndex, setHeaderIndex] = useState<number | null>(() =>
col_map[col.maps_to] = col.index; headerToState(data.doc.detected_header_index),
} )
})
return col_map const { call: updateMapping, loading: savingMapping } = useUpdateColumnMapping()
const { call: setHeader, loading: savingHeader } = useSetHeaderIndex()
}, [data]) const mappingRef = useRef(mapping)
const saveTimer = useRef<ReturnType<typeof setTimeout>>(undefined)
const validColumns = Object.values(column_mapping) useEffect(() => () => clearTimeout(saveTimer.current), [])
// Reverse the column mapping to get a map of column index to variable name const columnMappingRecord: Record<number, ColumnMapsTo> = {}
const columnIndexMap: Record<number, StandardColumnTypes> = Object.fromEntries(Object.entries(column_mapping).map(([variable, columnIndex]) => [columnIndex, variable as StandardColumnTypes])) mapping.forEach((c) => {
if (c.maps_to) columnMappingRecord[c.index] = c.maps_to as ColumnMapsTo
})
const commitMapping = (next: Mapping[]) => {
mappingRef.current = next
setMapping(next)
}
// Persist mapping edits (debounced) so the transaction preview updates in realtime.
const scheduleSaveMapping = () => {
if (isCompleted) return
clearTimeout(saveTimer.current)
saveTimer.current = setTimeout(() => {
updateMapping({ statement_import_id: data.doc.name, column_mapping: mappingRef.current })
.then(() => mutate())
.catch(() => toast.error(_("Could not save the column mapping.")))
}, 500)
}
const onChangeMapping = (columnIndex: number, mapsTo: ColumnMapsTo) => {
if (isCompleted) return
commitMapping(applyColumnMappingChange(mappingRef.current, columnIndex, mapsTo))
scheduleSaveMapping()
}
const onSetHeader = (rowIndex: number | null) => {
if (isCompleted) return
setHeaderIndex(rowIndex)
setHeader({ statement_import_id: data.doc.name, header_index: rowIndex ?? -1 })
.then((res) => {
// The backend re-derives the mapping for the new header; sync local state.
const doc = res?.message?.doc
if (doc) {
commitMapping(toMapping(doc.column_mapping))
setHeaderIndex(headerToState(doc.detected_header_index))
}
mutate()
})
.catch(() => toast.error(_("Could not update the header row.")))
}
// Loop over the contents of the CSV file and show a preview - highlight the header row and the transaction rows
return ( return (
<Table containerClassName="rounded-none"> <RawTableGrid
<TableBody> rows={data.raw_data}
{data.raw_data.map((row, index) => { columnMapping={columnMappingRecord}
headerIndex={headerIndex}
const isHeaderRow = index === data.doc.detected_header_index; editable={!isCompleted}
const isTransactionRow = index >= (data.doc.detected_transaction_starting_index ?? 0) && index <= (data.doc.detected_transaction_ending_index ?? 0); disabled={isCompleted || savingMapping || savingHeader}
onChangeMapping={onChangeMapping}
return <TableRow key={index} onSetHeader={onSetHeader}
title={isHeaderRow ? "Header Row" : ""} />
className={cn({
// "bg-yellow-100": isHeaderRow,
// "hover:bg-yellow-100": isHeaderRow,
"bg-green-50 hover:bg-green-50 dark:bg-green-700 dark:hover:bg-green-700": isTransactionRow,
"text-ink-gray-5/70": !isTransactionRow && !isHeaderRow,
})}>
{isHeaderRow ? <TableHead className="bg-yellow-100 hover:bg-yellow-100 dark:bg-yellow-400 text-center font-semibold text-ink-gray-8">
{index + 1}
</TableHead> :
<TableCell className="text-center px-1 py-0.5">
{index + 1}
</TableCell>
}
{row.map((cell, cellIndex) => {
const isValidColumn = validColumns.includes(cellIndex);
const columnType = columnIndexMap[cellIndex];
const isAmountColumn = ["Amount", "Withdrawal", "Deposit", "Balance"].includes(columnType);
if (isHeaderRow) {
return <TableHead key={cellIndex} className={cn("max-w-[250px] w-fit overflow-hidden text-ellipsis py-0.5",
isValidColumn ? "bg-yellow-100 hover:bg-yellow-100 dark:bg-yellow-400" : "bg-surface-gray-2",
)}>
<div className={cn("flex items-center text-xs gap-1 px-1 text-ink-gray-8 font-medium", {
"justify-end": isAmountColumn && isValidColumn
})}>
{columnType && <Tooltip>
<TooltipTrigger>
<ColumnHeaderIcon columnType={columnType} />
</TooltipTrigger>
<TooltipContent>
{_(columnType)}
</TooltipContent>
</Tooltip>
}
{cell}
</div>
</TableHead>
} else {
return <TableCell key={cellIndex} className={cn("max-w-[200px] w-fit overflow-hidden text-ellipsis py-0.5",
{
"bg-green-100 dark:bg-green-400 hover:bg-green-100 dark:hover:bg-green-400": isValidColumn && isTransactionRow,
"text-ink-gray-5": !isValidColumn && isTransactionRow,
}
)} >
<div className={cn("min-h-5 flex items-center text-xs px-1", {
"justify-end": isAmountColumn && isValidColumn && isTransactionRow
})} title={cell}>
{cell}
</div>
</TableCell>
}
}
)}
</TableRow>
})}
</TableBody>
</Table >
) )
} }
type StandardColumnTypes = BankStatementImportLogColumnMap['maps_to'];
const ColumnHeaderIcon = ({ columnType }: { columnType?: StandardColumnTypes }) => {
if (!columnType) {
return null
}
if (columnType === 'Amount') {
return <DollarSignIcon className="w-4 h-4" />
}
if (columnType === 'Withdrawal') {
return <ArrowUpRightIcon className="w-4 h-4 text-ink-red-3" />
}
if (columnType === 'Deposit') {
return <ArrowDownRightIcon className="w-4 h-4 text-ink-green-3" />
}
if (columnType === 'Balance') {
return <BanknoteIcon className="w-4 h-4" />
}
if (columnType === 'Date') {
return <CalendarIcon className="w-4 h-4" />
}
if (columnType === 'Description') {
return <FileTextIcon className="w-4 h-4" />
}
if (columnType === 'Reference') {
return <ReceiptIcon className="w-4 h-4" />
}
if (columnType === 'Transaction Type') {
return <ListIcon className="w-4 h-4" />
}
if (columnType === 'Debit/Credit') {
return <ArrowUpDownIcon className="w-4 h-4" />
}
return null
}
export default CSVRawDataPreview export default CSVRawDataPreview

View File

@@ -142,11 +142,16 @@ const StatementDetails = ({ data }: Props) => {
<TableCell> <TableCell>
<div className='flex items-center gap-2'> <div className='flex items-center gap-2'>
<BankLogo bank={bank} /> <BankLogo bank={bank} />
<span className="tracking-tight text-sm font-medium">{bank?.account_name}</span> <span className="text-sm">{bank?.account_name}</span>
<span title="GL Account" className="text-sm">{bank?.account}</span>
</div> </div>
</TableCell> </TableCell>
</TableRow> </TableRow>
<TableRow>
<TableHead>{_("Account")}</TableHead>
<TableCell>
<span title="GL Account" className="text-sm">{bank?.account}</span>
</TableCell>
</TableRow>
<TableRow> <TableRow>
<TableHead>{_("Statement File")}</TableHead> <TableHead>{_("Statement File")}</TableHead>
<TableCell> <TableCell>
@@ -158,7 +163,11 @@ const StatementDetails = ({ data }: Props) => {
</TableRow> </TableRow>
<TableRow> <TableRow>
<TableHead>{_("Transaction Dates")}</TableHead> <TableHead>{_("Transaction Dates")}</TableHead>
<TableCell>{_("{0} to {1}", [formatDate(data.doc.start_date, "Do MMMM YYYY"), formatDate(data.doc.end_date, "Do MMMM YYYY")])}</TableCell> {data.doc.start_date && data.doc.end_date ? (
<TableCell>{_("{0} to {1}", [formatDate(data.doc.start_date, "Do MMMM YYYY"), formatDate(data.doc.end_date, "Do MMMM YYYY")])}</TableCell>
) : (
<TableCell>-</TableCell>
)}
</TableRow> </TableRow>
<TableRow> <TableRow>
<TableHead>{_("Number of Transactions")}</TableHead> <TableHead>{_("Number of Transactions")}</TableHead>

View File

@@ -0,0 +1,129 @@
import { RefObject, useEffect, useRef, useState } from 'react'
import { cn } from '@/lib/utils'
type Bbox = [number, number, number, number]
const MIN_SIZE = 8 // PDF points
// Keep the box valid: normalise flipped edges, enforce a min size, clamp to the page.
const clampBbox = (bbox: Bbox, pageWidth: number, pageHeight: number): Bbox => {
let [x0, top, x1, bottom] = bbox
if (x1 < x0) [x0, x1] = [x1, x0]
if (bottom < top) [top, bottom] = [bottom, top]
x0 = Math.max(0, Math.min(x0, pageWidth - MIN_SIZE))
top = Math.max(0, Math.min(top, pageHeight - MIN_SIZE))
x1 = Math.min(pageWidth, Math.max(x1, x0 + MIN_SIZE))
bottom = Math.min(pageHeight, Math.max(bottom, top + MIN_SIZE))
return [x0, top, x1, bottom]
}
const HANDLES = [
{ id: 'nw', className: 'left-0 top-0 -translate-x-1/2 -translate-y-1/2 cursor-nwse-resize' },
{ id: 'ne', className: 'right-0 top-0 translate-x-1/2 -translate-y-1/2 cursor-nesw-resize' },
{ id: 'sw', className: 'left-0 bottom-0 -translate-x-1/2 translate-y-1/2 cursor-nesw-resize' },
{ id: 'se', className: 'right-0 bottom-0 translate-x-1/2 translate-y-1/2 cursor-nwse-resize' },
]
type Props = {
bbox: Bbox
pageWidth: number
pageHeight: number
color: { border: string; bg: string; swatch: string }
label: string
included: boolean
disabled?: boolean
containerRef: RefObject<HTMLDivElement | null>
onCommit: (bbox: Bbox) => void
}
/** A draggable + corner-resizable rectangle over a rendered PDF page. Coordinates are in PDF
* points (top-left origin); pixel deltas are converted using the container's rendered size. */
const BBoxOverlay = ({ bbox, pageWidth, pageHeight, color, label, included, disabled, containerRef, onCommit }: Props) => {
const [draft, setDraft] = useState<Bbox>(bbox)
const draftRef = useRef<Bbox>(bbox)
const drag = useRef<{ mode: string; startX: number; startY: number; start: Bbox } | null>(null)
// Reset to the authoritative bbox whenever it changes (e.g. after a server re-extract).
useEffect(() => {
setDraft(bbox)
draftRef.current = bbox
}, [bbox])
const apply = (next: Bbox) => {
draftRef.current = next
setDraft(next)
}
const onPointerDown = (e: React.PointerEvent) => {
if (disabled) return
e.preventDefault()
e.stopPropagation()
const mode = (e.target as HTMLElement).dataset.handle ?? 'move'
;(e.currentTarget as HTMLElement).setPointerCapture(e.pointerId)
drag.current = { mode, startX: e.clientX, startY: e.clientY, start: draftRef.current }
}
const onPointerMove = (e: React.PointerEvent) => {
if (!drag.current || !containerRef.current) return
const rect = containerRef.current.getBoundingClientRect()
const dx = ((e.clientX - drag.current.startX) / rect.width) * pageWidth
const dy = ((e.clientY - drag.current.startY) / rect.height) * pageHeight
let [x0, top, x1, bottom] = drag.current.start
const m = drag.current.mode
if (m === 'move') {
x0 += dx
x1 += dx
top += dy
bottom += dy
} else {
if (m.includes('w')) x0 += dx
if (m.includes('e')) x1 += dx
if (m.includes('n')) top += dy
if (m.includes('s')) bottom += dy
}
apply(clampBbox([x0, top, x1, bottom], pageWidth, pageHeight))
}
const onPointerUp = (e: React.PointerEvent) => {
if (!drag.current) return
;(e.currentTarget as HTMLElement).releasePointerCapture(e.pointerId)
drag.current = null
onCommit(draftRef.current)
}
const [x0, top, x1, bottom] = draft
return (
<div
className={cn(
'absolute touch-none border-2',
color.border,
included ? color.bg : 'opacity-40',
disabled ? 'pointer-events-none' : 'cursor-move',
)}
style={{
left: `${(x0 / pageWidth) * 100}%`,
top: `${(top / pageHeight) * 100}%`,
width: `${((x1 - x0) / pageWidth) * 100}%`,
height: `${((bottom - top) / pageHeight) * 100}%`,
}}
onPointerDown={onPointerDown}
onPointerMove={onPointerMove}
onPointerUp={onPointerUp}
>
<span className={cn('pointer-events-none absolute -top-5 left-0 rounded px-1 text-[10px] font-medium text-white', color.swatch)}>
{label}
</span>
{!disabled &&
HANDLES.map((handle) => (
<span
key={handle.id}
data-handle={handle.id}
className={cn('absolute size-2.5 rounded-sm border border-white', color.swatch, handle.className)}
/>
))}
</div>
)
}
export default BBoxOverlay

View File

@@ -0,0 +1,23 @@
import StatementDetails from '../CSV/StatementDetails'
import PDFTableEditor from './PDFTableEditor'
import { GetStatementDetailsResponse } from '../import_utils'
type Props = {
data: { message: GetStatementDetailsResponse }
mutate: () => void
}
const PDFImport = ({ data, mutate }: Props) => {
return (
<div className="w-full flex">
<div className="w-[45%] p-4 h-[calc(100vh-72px)] overflow-scroll">
<StatementDetails data={data.message} />
</div>
<div className="w-[55%] border-s pe-1 ps-0 border-outline-gray-2 h-[calc(100vh-72px)] overflow-scroll">
<PDFTableEditor data={data.message} mutate={mutate} />
</div>
</div>
)
}
export default PDFImport

View File

@@ -0,0 +1,362 @@
import { useEffect, useMemo, useRef, useState } from 'react'
import { toast } from 'sonner'
import { ChevronDownIcon, ChevronLeftIcon, ChevronRightIcon, FileTextIcon, Loader2Icon, TableIcon } from 'lucide-react'
import _ from '@/lib/translate'
import { cn } from '@/lib/utils'
import { Button } from '@/components/ui/button'
import { Switch } from '@/components/ui/switch'
import { Label } from '@/components/ui/label'
import { H3, Paragraph } from '@/components/ui/typography'
import { Tabs, TabsList, TabsTrigger } from '@/components/ui/tabs'
import ErrorBanner from '@/components/ui/error-banner'
import RawTableGrid from '../RawTableGrid'
import BBoxOverlay from './BBoxOverlay'
import {
applyColumnMappingChange,
ColumnMapsTo,
GetStatementDetailsResponse,
PDFTable,
useReextractPDFTable,
useSetPDFTableHeader,
useUpdatePDFTables,
} from '../import_utils'
type Props = {
data: GetStatementDetailsResponse
mutate: () => void
}
// Distinct overlay colours per table on a page.
const OVERLAY_COLORS = [
{ border: 'border-blue-500', bg: 'bg-blue-500/10', swatch: 'bg-blue-500' },
{ border: 'border-purple-500', bg: 'bg-purple-500/10', swatch: 'bg-purple-500' },
{ border: 'border-amber-500', bg: 'bg-amber-500/10', swatch: 'bg-amber-500' },
{ border: 'border-teal-500', bg: 'bg-teal-500/10', swatch: 'bg-teal-500' },
]
const columnMappingRecord = (table: PDFTable): Record<number, ColumnMapsTo> => {
const map: Record<number, ColumnMapsTo> = {}
table.column_mapping?.forEach((col) => {
map[col.index] = col.maps_to
})
return map
}
const PDFTableEditor = ({ data, mutate }: Props) => {
const isCompleted = data.doc.status === 'Completed'
const [tables, setTables] = useState<PDFTable[]>(() => data.pdf_tables ?? [])
const [viewMode, setViewMode] = useState<'pdf' | 'table'>('pdf')
const [pageIndex, setPageIndex] = useState(0)
const [collapsed, setCollapsed] = useState<Set<number>>(new Set())
const toggleCollapsed = (tableIndex: number) =>
setCollapsed((prev) => {
const next = new Set(prev)
if (next.has(tableIndex)) {
next.delete(tableIndex)
} else {
next.add(tableIndex)
}
return next
})
const { call, loading, error } = useUpdatePDFTables()
const { call: reextract, loading: reextracting } = useReextractPDFTable()
const { call: setHeaderCall, loading: settingHeader } = useSetPDFTableHeader()
const busy = loading || reextracting || settingHeader
// Persist edits automatically (debounced) so the transaction preview updates in realtime.
const tablesRef = useRef(tables)
const saveTimer = useRef<ReturnType<typeof setTimeout>>(undefined)
const reextractTimer = useRef<ReturnType<typeof setTimeout>>(undefined)
const scheduleSave = () => {
if (isCompleted) return
clearTimeout(saveTimer.current)
saveTimer.current = setTimeout(() => {
call({ statement_import_id: data.doc.name, tables: tablesRef.current })
.then(() => mutate())
.catch(() => toast.error(_('Could not save the table settings.')))
}, 500)
}
// After a bbox change, re-extract that table's rows from the new region (debounced).
// The target is read inside the timeout so it always reflects the committed bbox.
const scheduleReextract = (tableIndex: number) => {
if (isCompleted) return
clearTimeout(reextractTimer.current)
reextractTimer.current = setTimeout(() => {
const target = tablesRef.current[tableIndex]
reextract({
statement_import_id: data.doc.name,
page: target.page,
table_index: target.table_index,
bbox: target.bbox,
})
.then((res) => {
commitTables(res?.message?.pdf_tables ?? [])
mutate()
})
.catch(() => toast.error(_('Could not re-extract the table.')))
}, 500)
}
useEffect(() => () => {
clearTimeout(saveTimer.current)
clearTimeout(reextractTimer.current)
}, [])
const pages = useMemo(() => Array.from(new Set(tables.map((t) => t.page))).sort((a, b) => a - b), [tables])
const currentPage = pages[pageIndex]
// Keep the table's position in the flat array so edits target the right one.
const pageTables = useMemo(
() => tables.map((table, index) => ({ table, index })).filter((t) => t.table.page === currentPage),
[tables, currentPage],
)
// Keep tablesRef in sync synchronously so the debounced save/re-extract never read stale state.
const commitTables = (next: PDFTable[]) => {
tablesRef.current = next
setTables(next)
}
const updateTable = (tableIndex: number, updater: (table: PDFTable) => PDFTable) => {
commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? updater(t) : t)))
scheduleSave()
}
const onChangeMapping = (tableIndex: number, columnIndex: number, mapsTo: ColumnMapsTo) => {
updateTable(tableIndex, (table) => ({
...table,
column_mapping: applyColumnMappingChange(table.column_mapping, columnIndex, mapsTo),
}))
}
const onToggleIncluded = (tableIndex: number, included: boolean) =>
updateTable(tableIndex, (table) => ({ ...table, included }))
const onBboxCommit = (tableIndex: number, bbox: [number, number, number, number]) => {
commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? { ...t, bbox } : t)))
scheduleReextract(tableIndex)
}
// Set/clear the header row of a table; the backend re-derives the column mapping.
const onSetHeader = (tableIndex: number, headerIndex: number | null) => {
commitTables(tablesRef.current.map((t, i) => (i === tableIndex ? { ...t, header_index: headerIndex } : t)))
const target = tablesRef.current[tableIndex]
setHeaderCall({
statement_import_id: data.doc.name,
page: target.page,
table_index: target.table_index,
header_index: headerIndex ?? -1,
})
.then((res) => {
commitTables(res?.message?.pdf_tables ?? [])
mutate()
})
.catch(() => toast.error(_('Could not update the header row.')))
}
if (tables.length === 0) {
return (
<div className="p-4">
<Paragraph className="text-p-sm text-ink-gray-5">
{_('No tables were extracted from this PDF.')}
</Paragraph>
</div>
)
}
return (
<div className="flex flex-col gap-3 p-4">
<div className="flex flex-col gap-1">
<H3 className="text-base border-0 p-0">{_('Detected Tables')}</H3>
<Paragraph className="text-p-sm">
{_('Review each page. In the Table view, map each column, click a row number to set/clear the header row, and exclude anything that is not transactions (ads, summaries).')}
</Paragraph>
</div>
{error && <ErrorBanner error={error} />}
<div className="flex items-center justify-between gap-2">
<Tabs value={viewMode} onValueChange={(v) => setViewMode(v as 'pdf' | 'table')}>
<TabsList variant="subtle">
<TabsTrigger value="pdf"><FileTextIcon />{_('PDF')}</TabsTrigger>
<TabsTrigger value="table"><TableIcon />{_('Table')}</TabsTrigger>
</TabsList>
</Tabs>
<div className="flex items-center gap-1">
{busy && (
<span className="flex items-center gap-1 pe-1 text-xs text-ink-gray-5">
<Loader2Icon className="size-3 animate-spin" />
{reextracting ? _('Re-extracting') : _('Saving')}
</span>
)}
<Button
variant="ghost"
isIconButton
disabled={pageIndex === 0}
onClick={() => setPageIndex((i) => Math.max(0, i - 1))}
>
<ChevronLeftIcon />
</Button>
<span className="min-w-24 text-center text-sm text-ink-gray-7">
{_('Page {0} of {1}', [currentPage.toString(), pages.length.toString()])}
</span>
<Button
variant="ghost"
isIconButton
disabled={pageIndex >= pages.length - 1}
onClick={() => setPageIndex((i) => Math.min(pages.length - 1, i + 1))}
>
<ChevronRightIcon />
</Button>
</div>
</div>
{viewMode === 'pdf' ? (
<PageView
pageTables={pageTables}
disabled={isCompleted}
onToggleIncluded={onToggleIncluded}
onBboxCommit={onBboxCommit}
/>
) : (
<div className="flex flex-col gap-4">
{pageTables.map(({ table, index }, position) => {
const isCollapsed = collapsed.has(index)
return (
<div
key={index}
className={cn('flex flex-col rounded border border-outline-gray-2', !table.included && 'opacity-60')}
>
<div className="flex items-center justify-between p-2">
<span className="ps-1 text-sm font-medium text-ink-gray-8">
{_('Table {0}', [(position + 1).toString()])}
</span>
<div className="flex items-center gap-2">
<IncludeToggle
id={`tbl-${index}`}
checked={table.included}
disabled={isCompleted}
onCheckedChange={(c) => onToggleIncluded(index, c)}
/>
<Button variant="ghost" size="sm" isIconButton onClick={() => toggleCollapsed(index)}>
<ChevronDownIcon className={cn('transition-transform', isCollapsed && '-rotate-90')} />
</Button>
</div>
</div>
{!isCollapsed && (
<div className="overflow-auto border-t border-outline-gray-2">
<RawTableGrid
rows={table.rows}
columnMapping={columnMappingRecord(table)}
headerIndex={table.header_index}
editable
disabled={isCompleted}
onChangeMapping={(columnIndex, mapsTo) => onChangeMapping(index, columnIndex, mapsTo)}
onSetHeader={(rowIndex) => onSetHeader(index, rowIndex)}
/>
</div>
)}
</div>
)
})}
</div>
)}
</div>
)
}
type PageViewProps = {
pageTables: { table: PDFTable; index: number }[]
disabled: boolean
onToggleIncluded: (tableIndex: number, included: boolean) => void
onBboxCommit: (tableIndex: number, bbox: [number, number, number, number]) => void
}
const PageView = ({ pageTables, disabled, onToggleIncluded, onBboxCommit }: PageViewProps) => {
const containerRef = useRef<HTMLDivElement>(null)
const pageImage = pageTables[0]?.table.page_image
const pageWidth = pageTables[0]?.table.page_width ?? 1
const pageHeight = pageTables[0]?.table.page_height ?? 1
if (!pageImage) {
return (
<Paragraph className="text-p-sm text-ink-gray-5">
{_('No page image is available for this page.')}
</Paragraph>
)
}
return (
<div className="flex flex-col gap-3">
{!disabled && (
<Paragraph className="text-xs text-ink-gray-5">
{_('Drag a box to move it, or drag a corner to resize. The table is re-read from the new region automatically.')}
</Paragraph>
)}
<div ref={containerRef} className="relative w-full overflow-auto rounded border border-outline-gray-2 bg-surface-gray-1">
<img src={pageImage} alt={_('Page preview')} className="w-full" />
{pageTables.map(({ table, index }, position) => {
const color = OVERLAY_COLORS[position % OVERLAY_COLORS.length]
return (
<BBoxOverlay
key={index}
bbox={table.bbox}
pageWidth={pageWidth}
pageHeight={pageHeight}
color={color}
label={_('Table {0}', [(position + 1).toString()])}
included={table.included}
disabled={disabled}
containerRef={containerRef}
onCommit={(bbox) => onBboxCommit(index, bbox)}
/>
)
})}
</div>
<div className="flex flex-col gap-1.5">
{pageTables.map(({ table, index }, position) => {
const color = OVERLAY_COLORS[position % OVERLAY_COLORS.length]
return (
<div key={index} className="flex items-center justify-between rounded border border-outline-gray-2 px-2 py-1.5">
<div className="flex items-center gap-2">
<span className={cn('size-3 rounded-sm', color.swatch)} />
<span className="text-xs">{_('Table {0}', [(position + 1).toString()])}</span>
</div>
<IncludeToggle
id={`pdf-tbl-${index}`}
checked={table.included}
disabled={disabled}
onCheckedChange={(c) => onToggleIncluded(index, c)}
/>
</div>
)
})}
</div>
</div>
)
}
const IncludeToggle = ({
id,
checked,
disabled,
onCheckedChange,
}: {
id: string
checked: boolean
disabled: boolean
onCheckedChange: (checked: boolean) => void
}) => (
<div className="flex items-center gap-2">
<Label htmlFor={id} className="text-xs text-ink-gray-6">{_('Include')}</Label>
<Switch id={id} checked={checked} disabled={disabled} onCheckedChange={onCheckedChange} />
</div>
)
export default PDFTableEditor

View File

@@ -0,0 +1,222 @@
import { useMemo } from 'react'
import {
ArrowDownRightIcon,
ArrowUpDownIcon,
ArrowUpRightIcon,
BanknoteIcon,
CalendarIcon,
DollarSignIcon,
FileTextIcon,
ListIcon,
ReceiptIcon,
} from 'lucide-react'
import _ from '@/lib/translate'
import { cn } from '@/lib/utils'
import { Table, TableBody, TableCell, TableHead, TableRow } from '@/components/ui/table'
import { Tooltip, TooltipContent, TooltipTrigger } from '@/components/ui/tooltip'
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@/components/ui/select'
import { COLUMN_MAPS_TO_OPTIONS, ColumnMapsTo } from './import_utils'
const AMOUNT_COLUMNS: ColumnMapsTo[] = ['Amount', 'Withdrawal', 'Deposit', 'Balance']
const DATE_LIKE = /\d{1,4}[/\-.\s]\d{1,2}[/\-.\s]\d{1,4}|\d{1,2}[\s-][a-z]{3}/i
type Props = {
rows: string[][]
/** Column index -> mapped field */
columnMapping: Record<number, ColumnMapsTo>
headerIndex: number | null
editable?: boolean
disabled?: boolean
onChangeMapping?: (columnIndex: number, mapsTo: ColumnMapsTo) => void
/** Set the header row (or null to mark the table as having no header). */
onSetHeader?: (rowIndex: number | null) => void
}
/**
* A preview of extracted rows with CSV-style colour coding: the header row is highlighted,
* detected transaction rows are green, and mapped columns are emphasised. When `editable`, a
* compact row of column -> field dropdowns sits at the top, and row numbers can be clicked to
* set/clear the header row.
*/
const RawTableGrid = ({ rows, columnMapping, headerIndex, editable, disabled, onChangeMapping, onSetHeader }: Props) => {
// Tabular (XLSX) cells can be numbers/dates, not strings - coerce so .trim()/render are safe.
const stringRows = useMemo(
() => rows.map((row) => row.map((cell) => (cell == null ? '' : String(cell)))),
[rows],
)
const numColumns = useMemo(() => stringRows.reduce((max, row) => Math.max(max, row.length), 0), [stringRows])
const validColumns = useMemo(
() => Object.entries(columnMapping).filter(([, m]) => m && m !== 'Do not import').map(([i]) => Number(i)),
[columnMapping],
)
const dateColumn = useMemo(() => Object.entries(columnMapping).find(([, m]) => m === 'Date')?.[0], [columnMapping])
const amountColumns = useMemo(
() => Object.entries(columnMapping).filter(([, m]) => ['Amount', 'Withdrawal', 'Deposit'].includes(m)).map(([i]) => Number(i)),
[columnMapping],
)
// Approximate the backend's transaction-row detection so the highlighting tracks edits live.
const transactionRows = useMemo(() => {
const set = new Set<number>()
if (dateColumn === undefined) return set
const dateIdx = Number(dateColumn)
stringRows.forEach((row, index) => {
if (index === headerIndex) return
const dateCell = (row[dateIdx] ?? '').trim()
if (!dateCell || !DATE_LIKE.test(dateCell)) return
if (amountColumns.some((c) => (row[c] ?? '').trim() !== '')) set.add(index)
})
return set
}, [stringRows, headerIndex, dateColumn, amountColumns])
return (
<Table containerClassName="rounded-none">
<TableBody>
{editable && (
<TableRow className="border-b border-outline-gray-2 bg-surface-white hover:bg-surface-white">
<TableHead className="w-8 p-1" />
{Array.from({ length: numColumns }).map((_unused, columnIndex) => (
<TableHead key={columnIndex} className="p-1 align-top">
<Select
disabled={disabled}
value={columnMapping[columnIndex] ?? 'Do not import'}
onValueChange={(value) => onChangeMapping?.(columnIndex, value as ColumnMapsTo)}
>
<SelectTrigger variant="outline" inputSize="sm" className="h-7 w-full">
<SelectValue />
</SelectTrigger>
<SelectContent>
{COLUMN_MAPS_TO_OPTIONS.map((option) => (
<SelectItem key={option} value={option}>
<span className="flex items-center gap-1.5">
<ColumnHeaderIcon columnType={option} />
{_(option)}
</span>
</SelectItem>
))}
</SelectContent>
</Select>
</TableHead>
))}
</TableRow>
)}
{stringRows.map((row, index) => {
const isHeaderRow = index === headerIndex
const isTransactionRow = transactionRows.has(index)
return (
<TableRow
key={index}
className={cn({
'bg-green-50 hover:bg-green-50 dark:bg-green-700 dark:hover:bg-green-700': isTransactionRow,
'bg-yellow-100 hover:bg-yellow-100 dark:bg-yellow-400': isHeaderRow,
'text-ink-gray-5/70': !isTransactionRow && !isHeaderRow,
})}
>
{editable && onSetHeader ? (
<TableCell className="h-px w-8 p-0 text-center">
<Tooltip>
<TooltipTrigger asChild>
<button
type="button"
disabled={disabled}
onClick={() => onSetHeader(isHeaderRow ? null : index)}
className={cn(
'flex h-full w-full items-center justify-center px-1 text-ink-gray-6 hover:bg-surface-gray-3',
isHeaderRow && 'font-semibold text-ink-gray-8',
)}
>
{index + 1}
</button>
</TooltipTrigger>
<TooltipContent>
{isHeaderRow
? _('This is the header row. Click to mark the table as having no header.')
: _('Click to set this as the header row.')}
</TooltipContent>
</Tooltip>
</TableCell>
) : (
<TableCell className="w-8 px-1 py-0.5 text-center text-ink-gray-6">{index + 1}</TableCell>
)}
{Array.from({ length: numColumns }).map((_unused, cellIndex) => {
const columnType = columnMapping[cellIndex]
const isValidColumn = validColumns.includes(cellIndex)
const isAmountColumn = AMOUNT_COLUMNS.includes(columnType)
const cellText = row[cellIndex] ?? ''
// Read-only header row: icon + label.
if (isHeaderRow) {
return (
<TableCell key={cellIndex} className="max-w-[200px] overflow-hidden text-ellipsis py-1">
<div className="flex items-center gap-1 px-1 text-xs font-medium text-ink-gray-8">
{columnType && (
<Tooltip>
<TooltipTrigger>
<ColumnHeaderIcon columnType={columnType} />
</TooltipTrigger>
<TooltipContent>{_(columnType)}</TooltipContent>
</Tooltip>
)}
{cellText}
</div>
</TableCell>
)
}
return (
<TableCell
key={cellIndex}
className={cn('max-w-[200px] overflow-hidden text-ellipsis py-0.5', {
'bg-green-100 dark:bg-green-400 hover:bg-green-100 dark:hover:bg-green-400': isValidColumn && isTransactionRow,
'text-ink-gray-5': !isValidColumn && isTransactionRow,
})}
>
<div
className={cn('min-h-5 flex items-center px-1 text-xs', {
'justify-end': isAmountColumn && isValidColumn && isTransactionRow,
})}
title={cellText}
>
{cellText}
</div>
</TableCell>
)
})}
</TableRow>
)
})}
</TableBody>
</Table>
)
}
const ColumnHeaderIcon = ({ columnType }: { columnType?: ColumnMapsTo }) => {
switch (columnType) {
case 'Amount':
return <DollarSignIcon className="size-4" />
case 'Withdrawal':
return <ArrowUpRightIcon className="size-4 text-ink-red-3" />
case 'Deposit':
return <ArrowDownRightIcon className="size-4 text-ink-green-3" />
case 'Balance':
return <BanknoteIcon className="size-4" />
case 'Date':
return <CalendarIcon className="size-4" />
case 'Description':
return <FileTextIcon className="size-4" />
case 'Reference':
return <ReceiptIcon className="size-4" />
case 'Transaction Type':
return <ListIcon className="size-4" />
case 'Debit/Credit':
return <ArrowUpDownIcon className="size-4" />
default:
return null
}
}
export default RawTableGrid

View File

@@ -1,6 +1,97 @@
import { BankStatementImportLog } from "@/types/Accounts/BankStatementImportLog" import { BankStatementImportLog } from "@/types/Accounts/BankStatementImportLog"
import { useFrappeGetCall } from "frappe-react-sdk" import { useFrappeGetCall, useFrappePostCall } from "frappe-react-sdk"
export type ColumnMapsTo =
| "Do not import"
| "Date"
| "Withdrawal"
| "Deposit"
| "Amount"
| "Description"
| "Reference"
| "Transaction Type"
| "Debit/Credit"
| "Balance"
| "Included Fee"
| "Excluded Fee"
| "Party Name/Account Holder"
| "Party Account No."
| "Party IBAN"
export type ColumnMappingEntry = {
index: number
maps_to: ColumnMapsTo | string
header_text?: string
variable?: string
}
/** Apply a column mapping change, clearing the same mapping from any other column. */
export function applyColumnMappingChange<T extends ColumnMappingEntry>(
columns: T[],
columnIndex: number,
mapsTo: ColumnMapsTo,
): T[] {
const previous = columns.find((c) => c.index === columnIndex)
const cleared =
mapsTo === "Do not import"
? columns
: columns.map((c) =>
c.index !== columnIndex && c.maps_to === mapsTo
? { ...c, maps_to: "Do not import" as ColumnMapsTo }
: c,
)
return [
...cleared.filter((c) => c.index !== columnIndex),
{
index: columnIndex,
maps_to: mapsTo,
header_text: previous?.header_text ?? "",
variable: previous?.variable ?? `column_${columnIndex}`,
} as T,
].sort((a, b) => a.index - b.index)
}
export const COLUMN_MAPS_TO_OPTIONS: ColumnMapsTo[] = [
"Do not import",
"Date",
"Description",
"Reference",
"Withdrawal",
"Deposit",
"Amount",
"Balance",
"Debit/Credit",
"Transaction Type",
"Included Fee",
"Excluded Fee",
"Party Name/Account Holder",
"Party Account No.",
"Party IBAN",
]
export interface PDFTableColumn {
index: number
header_text: string
variable?: string
maps_to: ColumnMapsTo
}
export interface PDFTable {
page: number
table_index: number
bbox: [number, number, number, number]
page_width: number
page_height: number
page_image: string | null
render_scale: number | null
rows: string[][]
header_index: number | null
column_mapping: PDFTableColumn[]
date_format?: string
amount_format?: string
included: boolean
}
export interface GetStatementDetailsResponse { export interface GetStatementDetailsResponse {
doc: BankStatementImportLog, doc: BankStatementImportLog,
@@ -30,6 +121,7 @@ export interface GetStatementDetailsResponse {
date_format: string, date_format: string,
raw_data: Array<Array<string>>, raw_data: Array<Array<string>>,
currency: string, currency: string,
pdf_tables?: PDFTable[],
} }
export const useGetStatementDetails = (id: string) => { export const useGetStatementDetails = (id: string) => {
@@ -40,3 +132,23 @@ export const useGetStatementDetails = (id: string) => {
}) })
} }
export const useUpdatePDFTables = () => {
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.update_pdf_tables")
}
export const useReextractPDFTable = () => {
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.reextract_pdf_table")
}
export const useSetPDFTableHeader = () => {
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.set_pdf_table_header")
}
export const useUpdateColumnMapping = () => {
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.update_column_mapping")
}
export const useSetHeaderIndex = () => {
return useFrappePostCall<{ message: GetStatementDetailsResponse }>("erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log.set_header_index")
}

View File

@@ -231,7 +231,7 @@ export const FileTypeIcon = ({
const getTextColor = () => { const getTextColor = () => {
switch (fileType.toLowerCase()) { switch (fileType.toLowerCase()) {
case 'pdf': case 'pdf':
return 'text-red-700' return 'text-ink-red-3'
case 'doc': case 'doc':
case 'docx': case 'docx':
return 'text-[#1A5CBD]' return 'text-[#1A5CBD]'

View File

@@ -7,6 +7,7 @@ import { Dialog, DialogClose, DialogContent, DialogDescription, DialogFooter, Di
import { Empty, EmptyHeader, EmptyMedia, EmptyTitle } from "@/components/ui/empty" import { Empty, EmptyHeader, EmptyMedia, EmptyTitle } from "@/components/ui/empty"
import ErrorBanner from "@/components/ui/error-banner" import ErrorBanner from "@/components/ui/error-banner"
import { FileDropzone } from "@/components/ui/file-dropzone" import { FileDropzone } from "@/components/ui/file-dropzone"
import { Input } from "@/components/ui/input"
import { Label } from "@/components/ui/label" import { Label } from "@/components/ui/label"
import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table" import { Table, TableBody, TableCell, TableHead, TableHeader, TableRow } from "@/components/ui/table"
import { H3, Paragraph } from "@/components/ui/typography" import { H3, Paragraph } from "@/components/ui/typography"
@@ -16,7 +17,7 @@ import { flt, formatCurrency } from "@/lib/numbers"
import _ from "@/lib/translate" import _ from "@/lib/translate"
import { cn } from "@/lib/utils" import { cn } from "@/lib/utils"
import { BankStatementImportLog } from "@/types/Accounts/BankStatementImportLog" import { BankStatementImportLog } from "@/types/Accounts/BankStatementImportLog"
import { useFrappeCreateDoc, useFrappeFileUpload, useFrappeGetDocList } from "frappe-react-sdk" import { useFrappeCreateDoc, useFrappeFileUpload, useFrappeGetDocList, useFrappeUpdateDoc } from "frappe-react-sdk"
import { useAtom, useAtomValue } from "jotai" import { useAtom, useAtomValue } from "jotai"
import { ListIcon, Loader2Icon } from "lucide-react" import { ListIcon, Loader2Icon } from "lucide-react"
import { useState } from "react" import { useState } from "react"
@@ -30,11 +31,15 @@ const BankStatementImporter = () => {
const [selectedBankAccount] = useAtom(selectedBankAccountAtom) const [selectedBankAccount] = useAtom(selectedBankAccountAtom)
const [files, setFiles] = useState<File[]>([]) const [files, setFiles] = useState<File[]>([])
const [password, setPassword] = useState("")
const { upload, error, loading } = useFrappeFileUpload() const { upload, error, loading } = useFrappeFileUpload()
const navigate = useNavigate() const navigate = useNavigate()
const { createDoc, loading: createLoading, error: createError } = useFrappeCreateDoc<BankStatementImportLog>() const { createDoc, loading: createLoading, error: createError } = useFrappeCreateDoc<BankStatementImportLog>()
const { updateDoc, error: updateError } = useFrappeUpdateDoc()
const isPdf = files[0]?.name?.toLowerCase().endsWith(".pdf") ?? false
const onUpload = () => { const onUpload = () => {
@@ -44,12 +49,18 @@ const BankStatementImporter = () => {
const id = `new-bank-statement-import-log-${Date.now()}` const id = `new-bank-statement-import-log-${Date.now()}`
upload(files[0], { // For protected PDFs, persist the password on the Bank Account so it is reused for
// every statement of this account (and is available before the import doc is created).
const ensurePassword = isPdf && password
? updateDoc("Bank Account", selectedBankAccount.name, { statement_password: password })
: Promise.resolve()
ensurePassword.then(() => upload(files[0], {
isPrivate: true, isPrivate: true,
doctype: "Bank Statement Import Log", doctype: "Bank Statement Import Log",
docname: id, docname: id,
fieldname: 'file' fieldname: 'file'
}).then((file) => { })).then((file) => {
return createDoc("Bank Statement Import Log", return createDoc("Bank Statement Import Log",
// @ts-expect-error - not filling everything else // @ts-expect-error - not filling everything else
{ {
@@ -67,6 +78,7 @@ const BankStatementImporter = () => {
<div className="w-[52%]"> <div className="w-[52%]">
{error && <ErrorBanner error={error} />} {error && <ErrorBanner error={error} />}
{createError && <ErrorBanner error={createError} />} {createError && <ErrorBanner error={createError} />}
{updateError && <ErrorBanner error={updateError} />}
<div className="py-2 flex flex-col gap-6"> <div className="py-2 flex flex-col gap-6">
<div className="flex flex-col gap-2"> <div className="flex flex-col gap-2">
<Label>{_("Company")}<span className="text-ink-red-3">*</span></Label> <Label>{_("Company")}<span className="text-ink-red-3">*</span></Label>
@@ -89,7 +101,7 @@ const BankStatementImporter = () => {
data-slot="form-description" data-slot="form-description"
className={cn("text-ink-gray-5 text-xs")} className={cn("text-ink-gray-5 text-xs")}
> >
{_("Upload your bank statement file to start the import process. We support CSV, and XLSX files.")} {_("Upload your bank statement file to start the import process. We support CSV, XLSX and PDF files.")}
</p> </p>
</div> </div>
<div> <div>
@@ -105,10 +117,27 @@ const BankStatementImporter = () => {
'text/csv': ['.csv'], 'text/csv': ['.csv'],
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'], 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
'application/vnd.ms-excel': ['.xls'], 'application/vnd.ms-excel': ['.xls'],
'application/pdf': ['.pdf'],
// 'application/xml': ['.xml'], // 'application/xml': ['.xml'],
}} }}
multiple={false} multiple={false}
/> />
{isPdf && <div className="flex flex-col gap-2">
<Label htmlFor="pdf-password">{_("PDF Password")}</Label>
<Input
id="pdf-password"
type="password"
autoComplete="off"
value={password}
onChange={(e) => setPassword(e.target.value)}
placeholder={_("Only if the PDF is password protected")}
className="max-w-sm"
/>
<p data-slot="form-description" className={cn("text-ink-gray-5 text-p-sm")}>
{_("Leave blank to use the password already saved for this bank account (if any). It is stored encrypted and reused for future statements.")}
</p>
</div>}
</div>} </div>}
<div className="flex justify-end px-4"> <div className="flex justify-end px-4">
<Button <Button
@@ -137,9 +166,10 @@ const StatementInstructions = () => {
<DialogContent className="min-w-7xl"> <DialogContent className="min-w-7xl">
<DialogHeader> <DialogHeader>
<DialogTitle>{_("Statement Import Instructions")}</DialogTitle> <DialogTitle>{_("Statement Import Instructions")}</DialogTitle>
<DialogDescription>{_("We support uploading CSV, XLSX and XLS files. Please make sure the file contains the correct columns.")}</DialogDescription> <DialogDescription>{_("We support uploading CSV, XLSX, XLS and PDF files. Please make sure the file contains the correct columns.")}</DialogDescription>
</DialogHeader> </DialogHeader>
<Paragraph className="text-sm">{_("The file should contain the following columns with a distinct header row. You can upload most bank statements as is without changing the columns.")}</Paragraph> <Paragraph className="text-sm">{_("The file should contain the following columns with a distinct header row. You can upload most bank statements as is without changing the columns.")}</Paragraph>
<Paragraph className="text-sm text-ink-gray-6">{_("For PDF statements, we auto-detect the tables on each page. You can then confirm each detected table, map its columns, and exclude anything that is not transactions (e.g. ads or summaries). Password-protected PDFs are supported - the password is saved on the bank account and reused.")}</Paragraph>
<Table> <Table>
<TableHeader> <TableHeader>
<TableRow> <TableRow>
@@ -231,7 +261,13 @@ const StatementImportLog = () => {
<TableRow key={item.name} onClick={() => onViewDetails(item.name)} className="cursor-pointer hover:bg-surface-gray-2"> <TableRow key={item.name} onClick={() => onViewDetails(item.name)} className="cursor-pointer hover:bg-surface-gray-2">
<TableCell>{formatDate(item.creation, 'Do MMM YYYY')}</TableCell> <TableCell>{formatDate(item.creation, 'Do MMM YYYY')}</TableCell>
<TableCell><Badge theme={item.status === "Completed" ? "green" : "gray"}>{item.status}</Badge></TableCell> <TableCell><Badge theme={item.status === "Completed" ? "green" : "gray"}>{item.status}</Badge></TableCell>
<TableCell>{formatDate(item.start_date, 'Do MMM YYYY')} to {formatDate(item.end_date, 'Do MMM YYYY')}</TableCell> <TableCell>
{item.start_date && item.end_date ? (
<span>{formatDate(item.start_date, 'Do MMM YYYY')} to {formatDate(item.end_date, 'Do MMM YYYY')}</span>
) : (
<span>-</span>
)}
</TableCell>
<TableCell className="text-end">{item.number_of_transactions}</TableCell> <TableCell className="text-end">{item.number_of_transactions}</TableCell>
<TableCell className="text-end font-numeric">{formatCurrency(flt(item.closing_balance, 2))}</TableCell> <TableCell className="text-end font-numeric">{formatCurrency(flt(item.closing_balance, 2))}</TableCell>
<TableCell><a <TableCell><a

View File

@@ -9,12 +9,13 @@ import { ChevronLeftIcon, ChevronRightIcon } from 'lucide-react'
import { Link, useParams } from 'react-router' import { Link, useParams } from 'react-router'
const CSVImport = lazy(() => import('@/components/features/BankStatementImporter/CSV/CSVImport')) const CSVImport = lazy(() => import('@/components/features/BankStatementImporter/CSV/CSVImport'))
const PDFImport = lazy(() => import('@/components/features/BankStatementImporter/PDF/PDFImport'))
const ViewBankStatementImportLog = () => { const ViewBankStatementImportLog = () => {
const { id } = useParams<{ id: string }>() const { id } = useParams<{ id: string }>()
const { data, isLoading, error } = useGetStatementDetails(id ?? "") const { data, isLoading, error, mutate } = useGetStatementDetails(id ?? "")
useFrappeDocumentEventListener("Bank Statement Import Log", id ?? "", () => { useFrappeDocumentEventListener("Bank Statement Import Log", id ?? "", () => {
}) })
@@ -42,7 +43,13 @@ const ViewBankStatementImportLog = () => {
<ErrorBanner error={error} /> <ErrorBanner error={error} />
</div> </div>
} }
return <CSVImport data={data} /> const isPdf = data.message.doc.file?.toLowerCase().endsWith('.pdf')
if (isPdf) {
return <PDFImport data={data} mutate={mutate} />
}
return <CSVImport data={data} mutate={mutate} />
} }
export default ViewBankStatementImportLog export default ViewBankStatementImportLog

View File

@@ -38,6 +38,8 @@ export interface BankAccount{
branch_code?: string branch_code?: string
/** Bank Account No : Data */ /** Bank Account No : Data */
bank_account_no?: string bank_account_no?: string
/** Statement PDF Password : Password - Password used to open password-protected PDF statements for this account. Stored encrypted. */
statement_password?: string
/** Is Credit Card : Check */ /** Is Credit Card : Check */
is_credit_card?: 0 | 1 is_credit_card?: 0 | 1
/** Integration ID : Data */ /** Integration ID : Data */

View File

@@ -47,4 +47,6 @@ export interface BankStatementImportLog {
detected_transaction_ending_index?: number detected_transaction_ending_index?: number
/** Column Mapping : Table - Bank Statement Import Log Column Map */ /** Column Mapping : Table - Bank Statement Import Log Column Map */
column_mapping?: BankStatementImportLogColumnMap[] column_mapping?: BankStatementImportLogColumnMap[]
/** PDF Tables : JSON - Per-table extraction data for PDF statements */
pdf_tables?: string
} }

View File

@@ -27,6 +27,7 @@
"column_break_12", "column_break_12",
"branch_code", "branch_code",
"bank_account_no", "bank_account_no",
"statement_password",
"address_and_contact", "address_and_contact",
"address_html", "address_html",
"column_break_13", "column_break_13",
@@ -149,6 +150,12 @@
"label": "Bank Account No", "label": "Bank Account No",
"length": 30 "length": 30
}, },
{
"description": "Password used to open password-protected PDF statements for this account. Stored encrypted.",
"fieldname": "statement_password",
"fieldtype": "Password",
"label": "Statement PDF Password"
},
{ {
"fieldname": "address_and_contact", "fieldname": "address_and_contact",
"fieldtype": "Section Break", "fieldtype": "Section Break",

View File

@@ -41,6 +41,7 @@ class BankAccount(Document):
mask: DF.Data | None mask: DF.Data | None
party: DF.DynamicLink | None party: DF.DynamicLink | None
party_type: DF.Link | None party_type: DF.Link | None
statement_password: DF.Password | None
# end: auto-generated types # end: auto-generated types
def onload(self): def onload(self):

View File

@@ -28,7 +28,8 @@
"detected_transaction_starting_index", "detected_transaction_starting_index",
"detected_transaction_ending_index", "detected_transaction_ending_index",
"section_break_yulq", "section_break_yulq",
"column_mapping" "column_mapping",
"pdf_tables"
], ],
"fields": [ "fields": [
{ {
@@ -128,6 +129,13 @@
"label": "Column Mapping", "label": "Column Mapping",
"options": "Bank Statement Import Log Column Map" "options": "Bank Statement Import Log Column Map"
}, },
{
"description": "Per-table extraction data for PDF statements (rows, bbox, page image, column mapping). Edited via the banking app.",
"fieldname": "pdf_tables",
"fieldtype": "JSON",
"label": "PDF Tables",
"read_only": 1
},
{ {
"default": "Not Started", "default": "Not Started",
"fieldname": "status", "fieldname": "status",

View File

@@ -7,7 +7,18 @@ from frappe.utils import getdate
from erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log import ( from erpnext.accounts.doctype.bank_statement_import_log.bank_statement_import_log import (
BankStatementImportLog, BankStatementImportLog,
build_table_transactions,
detect_column_mapping,
detect_header_row,
extract_pdf_tables,
get_float_amount, get_float_amount,
get_statement_details,
guess_column_mapping_by_content,
reextract_pdf_table,
set_header_index,
set_pdf_table_header,
update_column_mapping,
update_pdf_tables,
) )
from erpnext.accounts.test.accounts_mixin import AccountsTestMixin from erpnext.accounts.test.accounts_mixin import AccountsTestMixin
from erpnext.tests.utils import ERPNextTestSuite from erpnext.tests.utils import ERPNextTestSuite
@@ -113,6 +124,346 @@ class TestBankStatementImportLog(ERPNextTestSuite, AccountsTestMixin):
self.assertIsNone(get_float_amount("ABCD")) self.assertIsNone(get_float_amount("ABCD"))
self.assertIsNone(get_float_amount("****")) self.assertIsNone(get_float_amount("****"))
# ------------------------------------------------------------------ #
# PDF statement import
# ------------------------------------------------------------------ #
@staticmethod
def _make_pdf(html: str) -> bytes:
import pdfkit
return pdfkit.from_string(html, False)
@staticmethod
def _encrypt(pdf_bytes: bytes, password: str) -> bytes:
import io
from pypdf import PdfReader, PdfWriter
reader = PdfReader(io.BytesIO(pdf_bytes))
writer = PdfWriter()
for page in reader.pages:
writer.add_page(page)
writer.encrypt(password)
buffer = io.BytesIO()
writer.write(buffer)
return buffer.getvalue()
@staticmethod
def _auto_map(table: dict) -> dict:
"""Mimic prepare_pdf_tables' best-effort mapping for a single extracted table."""
header_index, score = detect_header_row(table["rows"])
if score >= 2:
table["header_index"] = header_index
table["column_mapping"] = detect_column_mapping(table["rows"][header_index])
else:
table["header_index"] = None
table["column_mapping"] = guess_column_mapping_by_content(table["rows"])
table["included"] = True
return table
def test_pdf_multi_page_kept_separate_and_unioned(self):
"""Tables on separate pages must NOT be merged; transactions are the union."""
html = """
<html><body>
<table border="1"><tr><th>Date</th><th>Narration</th><th>Withdrawal</th><th>Deposit</th><th>Balance</th></tr>
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td><td></td><td>9500.00</td></tr>
<tr><td>03/04/2024</td><td>SALARY</td><td></td><td>20000.00</td><td>29500.00</td></tr></table>
<div style="page-break-before: always"></div>
<table border="1"><tr><th>Date</th><th>Narration</th><th>Withdrawal</th><th>Deposit</th><th>Balance</th></tr>
<tr><td>05/04/2024</td><td>ATM WDL</td><td>2000.00</td><td></td><td>27500.00</td></tr></table>
</body></html>
"""
tables = extract_pdf_tables(self._make_pdf(html))
# Two separate tables, one per page
self.assertEqual(len(tables), 2)
self.assertEqual(sorted(t["page"] for t in tables), [1, 2])
for table in tables:
self.assertIn("bbox", table)
self.assertEqual(len(table["bbox"]), 4)
union = []
for table in tables:
final, _df, _af = build_table_transactions(self._auto_map(table))
union.extend(final)
self.assertEqual(len(union), 3)
self.assertEqual(sorted(t["date"] for t in union), ["2024-04-01", "2024-04-03", "2024-04-05"])
def test_pdf_junk_table_excluded(self):
"""A non-transactions table (ad/summary) should yield zero transactions."""
ad_table = self._auto_map({"rows": [["Open a new account!", "Call 1800-XYZ"]]})
final, _df, _af = build_table_transactions(ad_table)
self.assertEqual(final, [])
def test_headerless_content_mapping(self):
"""Without a header row, columns are guessed from their contents."""
rows = [
["01/04/2024", "UPI PAYMENT", "500.00"],
["03/04/2024", "SALARY CREDIT", "20000.00"],
]
mapping = {
c["maps_to"]: c["index"]
for c in guess_column_mapping_by_content(rows)
if c["maps_to"] != "Do not import"
}
self.assertEqual(mapping.get("Date"), 0)
self.assertEqual(mapping.get("Description"), 1)
self.assertEqual(mapping.get("Amount"), 2)
def test_pdf_password_protected(self):
"""Encrypted PDFs error without a password and succeed with the right one."""
html = """
<html><body><table border="1">
<tr><th>Date</th><th>Narration</th><th>Amount</th></tr>
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td></tr></table></body></html>
"""
encrypted = self._encrypt(self._make_pdf(html), "secret123")
# No / wrong password -> recognizable error
self.assertRaises(frappe.ValidationError, extract_pdf_tables, encrypted)
self.assertRaises(frappe.ValidationError, extract_pdf_tables, encrypted, "wrong")
# Correct password -> extracts
tables = extract_pdf_tables(encrypted, "secret123")
self.assertTrue(tables)
def test_pdf_no_tables_detected(self):
"""A PDF with no detectable tables raises a clear error (e.g. scanned PDFs)."""
html = "<html><body><p>Just some prose with no tabular data at all.</p></body></html>"
self.assertRaises(frappe.ValidationError, extract_pdf_tables, self._make_pdf(html))
def _create_pdf_import_log(self, html: str) -> BankStatementImportLog:
pdf_bytes = self._make_pdf(html)
file_doc = frappe.get_doc(
{
"doctype": "File",
"file_name": f"test-statement-{frappe.generate_hash(length=8)}.pdf",
"is_private": 1,
"content": pdf_bytes,
}
).insert(ignore_permissions=True)
doc = frappe.get_doc(
{
"doctype": "Bank Statement Import Log",
"name": f"test-pdf-{frappe.generate_hash(length=8)}",
"bank_account": self.bank_account,
"file": file_doc.file_url,
}
)
return doc.insert()
def test_pdf_full_lifecycle(self):
"""End-to-end doc lifecycle: insert -> rasterize -> preview -> edit -> import."""
html = """
<html><body>
<table border="1"><tr><th>Date</th><th>Narration</th><th>Withdrawal</th><th>Deposit</th><th>Balance</th></tr>
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td><td></td><td>9500.00</td></tr>
<tr><td>03/04/2024</td><td>SALARY</td><td></td><td>20000.00</td><td>29500.00</td></tr></table>
<div style="page-break-before: always"></div>
<table border="1"><tr><th>Date</th><th>Narration</th><th>Withdrawal</th><th>Deposit</th><th>Balance</th></tr>
<tr><td>05/04/2024</td><td>ATM WDL</td><td>2000.00</td><td></td><td>27500.00</td></tr></table>
</body></html>
"""
doc = self._create_pdf_import_log(html)
# before_insert populated the per-table JSON, page images and the union summary
tables = doc.get_pdf_tables()
self.assertEqual(len(tables), 2)
for table in tables:
self.assertTrue(table.get("page_image"))
self.assertIn("bbox", table)
# Page-image File must be attached to the final docname, not the client's temp id
attached_to = frappe.db.get_value("File", {"file_url": table["page_image"]}, "attached_to_name")
self.assertEqual(attached_to, doc.name)
self.assertEqual(doc.number_of_transactions, 3)
self.assertEqual(doc.total_debit_transactions, 2)
self.assertEqual(doc.total_credit_transactions, 1)
# get_statement_details returns the union and the per-table data for the editor
details = get_statement_details(doc.name)
self.assertEqual(len(details["final_transactions"]), 3)
self.assertEqual(details["raw_data"], [])
self.assertEqual(len(details["pdf_tables"]), 2)
# Excluding the second table (page 2) drops its single transaction
tables[1]["included"] = False
update_pdf_tables(doc.name, tables)
doc.reload()
self.assertEqual(doc.number_of_transactions, 2)
# Re-include and import; transactions are created for the union
tables[1]["included"] = True
update_pdf_tables(doc.name, tables)
doc.reload()
doc.insert_transactions()
doc.reload()
self.assertEqual(doc.status, "Completed")
created = frappe.get_all(
"Bank Transaction", filters={"bank_account": self.bank_account, "docstatus": 1}
)
self.assertEqual(len(created), 3)
def test_pdf_reextract_table_from_bbox(self):
"""Re-extracting a table from an adjusted bbox updates its rows and stores the bbox."""
html = """
<html><body>
<table border="1"><tr><th>Date</th><th>Narration</th><th>Amount</th></tr>
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td></tr>
<tr><td>03/04/2024</td><td>SALARY</td><td>20000.00</td></tr></table>
</body></html>
"""
doc = self._create_pdf_import_log(html)
table = doc.get_pdf_tables()[0]
bbox = table["bbox"]
details = reextract_pdf_table(doc.name, table["page"], table["table_index"], bbox)
updated = details["pdf_tables"][0]
# Same region -> same rows; bbox is persisted
self.assertTrue(updated["rows"])
self.assertEqual(updated["bbox"], [round(float(v), 2) for v in bbox])
self.assertEqual(updated["rows"], table["rows"])
def test_pdf_reextract_changed_bbox_updates_rows_and_transactions(self):
"""Shrinking a table's bbox must drop rows and update the transaction count end-to-end."""
html = """
<html><body>
<table border="1"><tr><th>Date</th><th>Narration</th><th>Amount</th></tr>
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td></tr>
<tr><td>03/04/2024</td><td>SALARY</td><td>20000.00</td></tr>
<tr><td>05/04/2024</td><td>ATM WDL</td><td>2000.00</td></tr>
<tr><td>07/04/2024</td><td>INTEREST</td><td>12.50</td></tr></table>
</body></html>
"""
doc = self._create_pdf_import_log(html)
original = doc.get_pdf_tables()[0]
original_rows = len(original["rows"])
original_txns = doc.number_of_transactions
# Shrink the box to roughly the top half (simulating a user drag).
x0, top, x1, bottom = original["bbox"]
shrunk = [x0, top, x1, top + (bottom - top) * 0.5]
details = reextract_pdf_table(doc.name, original["page"], original["table_index"], shrunk)
updated = details["pdf_tables"][0]
doc.reload()
self.assertLess(len(updated["rows"]), original_rows)
self.assertLess(doc.number_of_transactions, original_txns)
self.assertEqual(len(details["final_transactions"]), doc.number_of_transactions)
def test_pdf_set_table_header(self):
"""User can clear a table's header (no header row) or set a specific header row."""
html = """
<html><body>
<table border="1"><tr><th>Date</th><th>Narration</th><th>Amount</th></tr>
<tr><td>01/04/2024</td><td>UPI PAYMENT</td><td>500.00</td></tr>
<tr><td>03/04/2024</td><td>SALARY</td><td>20000.00</td></tr></table>
</body></html>
"""
doc = self._create_pdf_import_log(html)
table = doc.get_pdf_tables()[0]
self.assertEqual(table["header_index"], 0)
original = {
c["maps_to"]: c["index"] for c in table["column_mapping"] if c["maps_to"] != "Do not import"
}
# Clear the header (-1): header is removed but the mapping is preserved (not re-guessed).
details = set_pdf_table_header(doc.name, table["page"], table["table_index"], -1)
updated = details["pdf_tables"][0]
self.assertIsNone(updated["header_index"])
preserved = {
c["maps_to"]: c["index"] for c in updated["column_mapping"] if c["maps_to"] != "Do not import"
}
self.assertEqual(preserved, original)
# Set row 0 back as the header: it resolves meaningfully, so mapping is re-derived.
details = set_pdf_table_header(doc.name, table["page"], table["table_index"], 0)
updated = details["pdf_tables"][0]
self.assertEqual(updated["header_index"], 0)
mapped = {
c["maps_to"]: c["index"] for c in updated["column_mapping"] if c["maps_to"] != "Do not import"
}
self.assertEqual(mapped.get("Date"), 0)
self.assertEqual(mapped.get("Description"), 1)
# ------------------------------------------------------------------ #
# CSV/XLSX column mapping + header overrides
# ------------------------------------------------------------------ #
def _create_csv_import_log(self, csv_text: str) -> BankStatementImportLog:
file_doc = frappe.get_doc(
{
"doctype": "File",
"file_name": f"test-statement-{frappe.generate_hash(length=8)}.csv",
"is_private": 1,
"content": csv_text,
}
).insert(ignore_permissions=True)
doc = frappe.get_doc(
{
"doctype": "Bank Statement Import Log",
"bank_account": self.bank_account,
"file": file_doc.file_url,
}
)
return doc.insert()
def test_csv_update_column_mapping(self):
"""Overriding the column mapping recomputes the transaction count."""
csv_text = "Date,Narration,Amount\n01/04/2024,UPI PAYMENT,500.00\n03/04/2024,SALARY,20000.00\n"
doc = self._create_csv_import_log(csv_text)
self.assertEqual(doc.number_of_transactions, 2)
# Drop the amount column -> no amount -> no transactions detected.
mapping = [
{"index": c.index, "maps_to": "Do not import" if c.maps_to == "Amount" else c.maps_to}
for c in doc.column_mapping
]
details = update_column_mapping(doc.name, mapping)
doc.reload()
self.assertEqual(doc.number_of_transactions, 0)
self.assertEqual(len(details["final_transactions"]), 0)
def test_csv_set_header_index_preserves_mapping(self):
"""Clearing the header keeps the user's mapping; it is not re-guessed."""
csv_text = "Date,Narration,Amount\n01/04/2024,UPI PAYMENT,500.00\n03/04/2024,SALARY,20000.00\n"
doc = self._create_csv_import_log(csv_text)
self.assertEqual(doc.detected_header_index, 0)
# Manually map the Narration column (1) as Reference.
mapping = [
{
"index": c.index,
"maps_to": "Reference" if c.index == 1 else c.maps_to,
"header_text": c.header_text,
}
for c in doc.column_mapping
]
update_column_mapping(doc.name, mapping)
doc.reload()
# Clear the header row: the manual mapping must be preserved (column 1 stays Reference,
# not re-guessed to Description). The label row fails date parsing, so 2 transactions remain.
set_header_index(doc.name, -1)
doc.reload()
self.assertEqual(doc.detected_header_index, -1)
self.assertEqual(doc.number_of_transactions, 2)
current = {c.index: c.maps_to for c in doc.column_mapping}
self.assertEqual(current.get(1), "Reference")
# Restore row 0 as the header (resolves meaningfully -> re-derived from labels).
set_header_index(doc.name, 0)
doc.reload()
self.assertEqual(doc.detected_header_index, 0)
restored = {c.maps_to: c.index for c in doc.column_mapping if c.maps_to != "Do not import"}
self.assertEqual(restored.get("Description"), 1)
test_hdfc_sample_statement_data = [ test_hdfc_sample_statement_data = [
["HDFC BANK Ltd. Page No .: 1 Statement of accounts", "", "", "", "", "", ""], ["HDFC BANK Ltd. Page No .: 1 Statement of accounts", "", "", "", "", "", ""],

View File

@@ -24,6 +24,11 @@ dependencies = [
# MT940 parser for bank statements # MT940 parser for bank statements
"mt-940>=4.26.0", "mt-940>=4.26.0",
# PDF bank statement table extraction + page rasterization.
# Pulls pdfminer.six (parsing), Pillow, and pypdfium2 (render backend for
# Page.to_image) - all pure-Python wheels, no system binaries required.
"pdfplumber>=0.11.0",
] ]
[build-system] [build-system]