# -*- coding: utf-8 -*-
"""
Created on Fri Feb 12 07:04:02 2016
@author: ibh
Takes all formula's from a excel work book and translates each to the equivalent expression.
Openpyxl is the fastest library but it can not deal all values. Therefor xlwings is also used.
But only to read repeated formula's which inly will show as '='
Also defines function used when using xlwings to automate excel.
These are used in :any:`modeldump_excel` and :any:`modelload_excel`
Some of the docstring are not very informative, to be improved.
"""
import pandas as pd
import networkx as nx
import openpyxl
from openpyxl import load_workbook
from openpyxl.formula import Tokenizer
from openpyxl.utils import get_column_letter
from openpyxl.utils import cols_from_range,rows_from_range
# try:
# import xlwings as xw
# except:
# ...
# import networkx as nx
import matplotlib.pylab as plt
import seaborn as sns
from pathlib import Path
import modelclass as mc
DEBUG = 0
[docs]
def findequations(name):
'''Takes all formula's from a excel work book and translates each to the equivalent expression.
Multicell ranges are expanded to a comma separated list. \n
The ordinary operators and the SUM function can be handled. If you need more functions. You have to impelent them in the modelclass.
In the model each cell reference is prefixed by <sheet name>_
Openpyxl is the fastest library and it has a tokenizer
but it can not read all values.
Therefor xlwings is used to read repeated formula's which Openpyxl will show as '='
input:
:name: Location of a excel sheeet
Returns:
:modeldic: A dictionary with formulars keyed by cell reference
'''
outdic={}
wb = load_workbook(name, read_only=True,data_only=False) # to read the spresdsheet first save as xml then write it again
try:
wb2 = xw.Book(name) # the same worksheet in xlwings
except:
...
# breakpoint()
allsheets = wb.sheetnames
for wsname in allsheets:
ws=wb[wsname]
try:
ws2=wb2.sheets(wsname) # the same sheet but in xlwings
except:
...
formulacell = [c for row in ws.rows for c in row if c.value != None and c.data_type == 'f']
for cell in formulacell:
cellref=get_column_letter(cell.column)+str(cell.row)
if DEBUG : print('This cell:',cellref,cell.data_type,cell.value)
if cell.value == '=' :
print(f'Repeat cell = so xlwings has to be imported {cell=}')
frml = cell.value if cell.value != '=' else ws2.range(cellref).formula # To avoid emty repeating formula'rs
else:
frml = cell.value
tok=Tokenizer(frml)
if DEBUG and False : print("\n".join("%19s%15s%9s" % (t.value, t.type, t.subtype) for t in tok.items))
# left hand term is <worksheet>!<column><row>=
lhs=wstrans(wsname) + get_column_letter(cell.column)+str(cell.row)
out=[lhs+'=']
for t in tok.items:
if t.subtype == "RANGE":
#Find or create the sheetname
sheet0 = t.value.split('!')[0] if '!' in t.value else wsname
sheet = wstrans(sheet0)
# print(t.value,'---->')
# Get all the cells in the range columwize
# the nested list comprehension makes the list works for square ranges.
# the split construct drops the sheet name from the range name if any
thisrange=[sheet+i for subtupler in cols_from_range((t.value.split('!')[-1])) for i in subtupler]
# put a ',' between each element i the list
thistext=','.join(thisrange)
#print(thisrange)
out=out + [thistext]
else:
out.append(t.value)
#create the equation and get rid of the !
equation=''.join(out).replace('!','_')
outdic[lhs]=equation
#print(equation)
try:
wb2.close()
except:
...
return outdic
[docs]
def showcells(name):
'''Finds values in a excel workbook with a value different from 0
'''
wb = load_workbook(name, read_only=True,data_only=False) # to read the spresdsheet first save as xml then write it again
allsheets = wb.sheetnames
for wsname in allsheets:
ws=wb[wsname]
for row in ws.rows:
for c in row:
if c.value != None:
print(wsname,get_column_letter(c.column ),c.row,c.data_type,c.value)
[docs]
def findvalues(name):
'''Finds numerical values in a excel workbook with a value different from 0
'''
wb = load_workbook(name, read_only=True,data_only=True) # to read the spresdsheet first save as xml then write it again
allsheets = wb.sheetnames
values=[]
for wsname in allsheets:
ws=wb[wsname]
twsname = wstrans(wsname)
values+=[(twsname+get_column_letter(c.column )+str(c.row),c.value)
for row in ws.rows for c in row
if c.value != None and c.data_type == 'n' ]
return values
[docs]
def wstrans(wsname):
'Translates workspace names'
res = '_'+wsname.replace("'","").replace(' - ','_').replace(' ','_').replace('-','_')+'_'
return res.upper()
[docs]
def findcoordinates(name):
'''Finds the cell references matching the codes in a LCR workbook from EBA
This is needed for the mapping of the raw data to the excel cell refereces.
input:
:name: Location of a excel sheeet
returns:
:coldf: Dataframe with mapping between excel column and EBA columns_code
:rowdf: Dataframe with row with mapping between excel row and EBS data row_code
'''
wb = load_workbook(name, read_only=True,data_only=True) # to read the spresdsheet first save as xml then write it again
allsheets = wb.sheetnames
colcodes=[]
rowcodes=[]
for wsname in allsheets:
ws=wb[wsname]
try:
#find the anchor for the row and columns id, the first
cell = [c for row in ws.rows for c in row if c.value and c.data_type == 's' and 'Row'== c.value][0]
# find the numeric values in the column below the anchor, only the digits and remenber python index starts with 0 while excels index starts with 1
rowcodes += [(wsname , c.value , c.row ) for r in ws.rows for c in r
if c.value and c.column == cell.column and c.row > cell.row and
( c.data_type == 'n' or (c.data_type == 's' and c.value.isdigit()) )]
# finds the numeric values in the row at the right of the anchor.
# c.rows returns a generator (probably because te potential for a huge number) therefor the list(c.rows)
colcodes += [(wsname , c.value , get_column_letter(c.column ) ) for c in list(ws.rows)[cell.row-1]
if c.value and c.column > cell.column and (c.data_type == 'n' or (c.data_type == 's' and c.value.isdigit()))]
except: # Ok this ws did not have an ancor cell
pass
coldf = pd.DataFrame(colcodes,columns=['sheet','colcode','col'])
rowdf = pd.DataFrame(rowcodes,columns=['sheet','rowcode','row'])
return coldf,rowdf
[docs]
def getexcelmodel(name):
''' Creates a model instance from a excel sheet
SUM is replaced by SUM_EXCEL which is a function in the modelclass
In the excel formulars this function accepts ordinary operators and SUM in excel sheets
input:
:name: Location of a excel sheeet
returns:
:model: A model instance with the formulars of the excel sheet
:para: A list of values in the sheet which matches exogeneous variables in the model
'''
modelname = Path(name).stem
eqdic = findequations(name)
eqdic2 = {i : eq.replace('SUM(','SUM_EXCEL(') for i,eq in eqdic.items()}
fdic = {i : 'Frml xx '+eqdic2[i] + r' $ ' for i,eq in eqdic2.items()}
f = '\n'.join([eq for i,eq in fdic.items()])
_mmodel=mc.model(f,modelname=modelname)
zz = findvalues(name)
para = [z for z in zz if z[0] in _mmodel.exogene] # find all values which match a exogeneous variable in model
return _mmodel,para
#%% now functions related to running xlsheets
[docs]
def df_to_sheet(name,df,wb,after=None):
'''
Dataframe to sheet
Args:
name (TYPE): DESCRIPTION.
df (TYPE): DESCRIPTION.
wb (TYPE): DESCRIPTION.
after (TYPE, optional): DESCRIPTION. Defaults to None.
Returns:
sht (TYPE): DESCRIPTION.
'''
try:
wb.sheets[name].delete()
except:
pass
try:
sht = wb.sheets.add(name,after=after)
except Exception as e :
print('no sheet added',str(e))
df_ = df.copy()
df_.index = indextrans(df.index)
sht.range('A1').value = df_.T
active_window = wb.app.api.ActiveWindow
active_window.FreezePanes = False
active_window.SplitColumn = 1
active_window.SplitRow = 1
active_window.FreezePanes = True
sht.autofit(axis="columns")
sht[(2,25)].select()
return sht
[docs]
def obj_to_sheet(name,obj,wb,after=None):
'''
An python object to sheet
Args:
name (TYPE): DESCRIPTION.
obj (TYPE): DESCRIPTION.
wb (TYPE): DESCRIPTION.
after (TYPE, optional): DESCRIPTION. Defaults to None.
Returns:
None.
'''
# breakpoint()
try:
wb.sheets[name].delete()
except:
pass
try:
sht = wb.sheets.add(name,after=after)
except Exception as e:
print(str(e))
print('no sheet added ')
sht.range('A1').value=obj
[docs]
def sheet_to_df(wb,name):
'''
Sheet to df
Args:
wb (TYPE): DESCRIPTION.
name (TYPE): DESCRIPTION.
Returns:
df (TYPE): DESCRIPTION.
'''
df = wb.sheets[name].range('A1').options(pd.DataFrame, expand='table').value.T
df.index = indextrans(df.index)
return df
[docs]
def sheet_to_dict(wb,name,integers=None):
''' transform the named sheet to a python dict. If we need a integer it has to be in the integer set'''
integers_ = {'max_iterations'} if isinstance(None,type(None)) else integers
try:
out = wb.sheets[name].range('A1').options(dict,expand='table').value
out2 = {k : int(v) if k in integers_ else v for k,v in out.items()}
except:
out2={}
return out2
if __name__ == '__main__':
testxls=Path('exceltest/lcrberegning2.xlsx')
mmodel,para = getexcelmodel(testxls)
eq=mmodel.equations
mmodel.draw('_LCR_C62',up=10,down=1,HR=0,pdf=1) # The LCR
mmodel.draw('_LCR_C25',up=4,down=1,pdf=1) # liquid assets
mmodel.draw('_LCR_C10',up=2,pdf=1) # Leel 1 covered bonds
c,r = findcoordinates(testxls)
xx = findequations(testxls)