From 61695ff01fb5f9f1df623f8cd6efb1077acc09c2 Mon Sep 17 00:00:00 2001 From: Josh Holtrop Date: Sun, 28 Oct 2012 15:17:41 -0400 Subject: [PATCH] add gnucash-convert-account-report-to-csv --- gnucash-convert-account-report-to-csv | 62 +++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100755 gnucash-convert-account-report-to-csv diff --git a/gnucash-convert-account-report-to-csv b/gnucash-convert-account-report-to-csv new file mode 100755 index 0000000..e9e938a --- /dev/null +++ b/gnucash-convert-account-report-to-csv @@ -0,0 +1,62 @@ +#!/usr/bin/env python + +# Author: Josh Holtrop +# Date: 2012-10-28 +# Purpose: Convert a HTML "Account Report" output from gnucash to csv format +# Usage: gnucash-convert-account-report-to-csv acount.html > acount.csv + +import os +import sys +import re + +def filter_contents(s): + while True: + new_s = re.sub(r'<[^<>]*>', '', s) + if new_s == s: + break; + s = new_s + return s.replace(',', '').strip() + +def slurp_row(fh, rows): + row = [] + contents = '' + in_cell = False + for line in iter(fh.readline, ''): + if re.search(r'<\/tr\s*>', line): + rows.append(row) + return True + m = re.search(r']*>(.*)<\/t[hd]>', line) + if m is not None: + row.append(filter_contents(m.group(1))) + continue + m = re.search(r']*>(.*)$', line) + if m is not None: + in_cell = True + contents = m.group(1) + continue + m = re.match(r'(.*)<\/t[hd]>', line) + if m is not None: + contents += m.group(1) + row.append(filter_contents(contents)) + in_cell = False + continue + if in_cell: + contents += line + return False + +def main(argv): + if len(argv) < 2: + sys.stderr.write("Error: specify input HTML file\n") + return -2 + rows = [] + fh = open(argv[1], 'r') + while slurp_row(fh, rows): + pass + fh.close() + for r in rows: + if len(r) == 8: + sys.stdout.write(','.join(r)) + sys.stdout.write('\n') + +if __name__ == '__main__': + sys.exit(main(sys.argv))