files/gnucash-convert-account-report-to-csv

63 lines
1.6 KiB
Python
Executable File

#!/usr/bin/env python
# Author: Josh Holtrop
# Date: 2012-10-28
# Purpose: Convert a HTML "Account Report" output from gnucash to csv format
# Usage: gnucash-convert-account-report-to-csv acount.html > acount.csv
import os
import sys
import re
def filter_contents(s):
while True:
new_s = re.sub(r'<[^<>]*>', '', s)
if new_s == s:
break;
s = new_s
return s.replace(',', '').replace('$', '').strip()
def slurp_row(fh, rows):
row = []
contents = ''
in_cell = False
for line in iter(fh.readline, ''):
if re.search(r'<\/tr\s*>', line):
rows.append(row)
return True
m = re.search(r'<t[hd]\b[^<>]*>(.*)<\/t[hd]>', line)
if m is not None:
row.append(filter_contents(m.group(1)))
continue
m = re.search(r'<t[hd]\b[^<>]*>(.*)$', line)
if m is not None:
in_cell = True
contents = m.group(1)
continue
m = re.match(r'(.*)<\/t[hd]>', line)
if m is not None:
contents += m.group(1)
row.append(filter_contents(contents))
in_cell = False
continue
if in_cell:
contents += line
return False
def main(argv):
if len(argv) < 2:
sys.stderr.write("Error: specify input HTML file\n")
return -2
rows = []
fh = open(argv[1], 'r')
while slurp_row(fh, rows):
pass
fh.close()
for r in rows:
if len(r) == 8:
sys.stdout.write(','.join(r))
sys.stdout.write('\n')
if __name__ == '__main__':
sys.exit(main(sys.argv))