Source code for model_Excel

# -*- coding: utf-8 -*-
"""
Created on Fri Feb 12 07:04:02 2016

@author: ibh

Takes all formula's  from a excel work book and translates each to the equivalent expression. 
Openpyxl is the fastest library but it can not deal all values. Therefor xlwings is also used. 
But only to read repeated formula's which inly will show as '='



Also defines function used when using xlwings to automate excel.

These are used in :any:`modeldump_excel` and :any:`modelload_excel`  

Some of the docstring are not very informative, to be improved. 



"""
import pandas as pd 
import networkx as nx 
import openpyxl
from openpyxl import load_workbook
from openpyxl.formula import Tokenizer
from openpyxl.utils import get_column_letter
from openpyxl.utils import cols_from_range,rows_from_range
# try:
#     import xlwings as xw
# except:
#     ...
# import networkx as nx

import matplotlib.pylab  as plt 
import seaborn as sns
from pathlib import Path

import modelclass as mc

DEBUG = 0

[docs] def findequations(name): '''Takes all formula's from a excel work book and translates each to the equivalent expression. Multicell ranges are expanded to a comma separated list. \n The ordinary operators and the SUM function can be handled. If you need more functions. You have to impelent them in the modelclass. In the model each cell reference is prefixed by <sheet name>_ Openpyxl is the fastest library and it has a tokenizer but it can not read all values. Therefor xlwings is used to read repeated formula's which Openpyxl will show as '=' input: :name: Location of a excel sheeet Returns: :modeldic: A dictionary with formulars keyed by cell reference ''' outdic={} wb = load_workbook(name, read_only=True,data_only=False) # to read the spresdsheet first save as xml then write it again try: wb2 = xw.Book(name) # the same worksheet in xlwings except: ... # breakpoint() allsheets = wb.sheetnames for wsname in allsheets: ws=wb[wsname] try: ws2=wb2.sheets(wsname) # the same sheet but in xlwings except: ... formulacell = [c for row in ws.rows for c in row if c.value != None and c.data_type == 'f'] for cell in formulacell: cellref=get_column_letter(cell.column)+str(cell.row) if DEBUG : print('This cell:',cellref,cell.data_type,cell.value) if cell.value == '=' : print(f'Repeat cell = so xlwings has to be imported {cell=}') frml = cell.value if cell.value != '=' else ws2.range(cellref).formula # To avoid emty repeating formula'rs else: frml = cell.value tok=Tokenizer(frml) if DEBUG and False : print("\n".join("%19s%15s%9s" % (t.value, t.type, t.subtype) for t in tok.items)) # left hand term is <worksheet>!<column><row>= lhs=wstrans(wsname) + get_column_letter(cell.column)+str(cell.row) out=[lhs+'='] for t in tok.items: if t.subtype == "RANGE": #Find or create the sheetname sheet0 = t.value.split('!')[0] if '!' in t.value else wsname sheet = wstrans(sheet0) # print(t.value,'---->') # Get all the cells in the range columwize # the nested list comprehension makes the list works for square ranges. # the split construct drops the sheet name from the range name if any thisrange=[sheet+i for subtupler in cols_from_range((t.value.split('!')[-1])) for i in subtupler] # put a ',' between each element i the list thistext=','.join(thisrange) #print(thisrange) out=out + [thistext] else: out.append(t.value) #create the equation and get rid of the ! equation=''.join(out).replace('!','_') outdic[lhs]=equation #print(equation) try: wb2.close() except: ... return outdic
[docs] def showcells(name): '''Finds values in a excel workbook with a value different from 0 ''' wb = load_workbook(name, read_only=True,data_only=False) # to read the spresdsheet first save as xml then write it again allsheets = wb.sheetnames for wsname in allsheets: ws=wb[wsname] for row in ws.rows: for c in row: if c.value != None: print(wsname,get_column_letter(c.column ),c.row,c.data_type,c.value)
[docs] def findvalues(name): '''Finds numerical values in a excel workbook with a value different from 0 ''' wb = load_workbook(name, read_only=True,data_only=True) # to read the spresdsheet first save as xml then write it again allsheets = wb.sheetnames values=[] for wsname in allsheets: ws=wb[wsname] twsname = wstrans(wsname) values+=[(twsname+get_column_letter(c.column )+str(c.row),c.value) for row in ws.rows for c in row if c.value != None and c.data_type == 'n' ] return values
[docs] def wstrans(wsname): 'Translates workspace names' res = '_'+wsname.replace("'","").replace(' - ','_').replace(' ','_').replace('-','_')+'_' return res.upper()
[docs] def findcoordinates(name): '''Finds the cell references matching the codes in a LCR workbook from EBA This is needed for the mapping of the raw data to the excel cell refereces. input: :name: Location of a excel sheeet returns: :coldf: Dataframe with mapping between excel column and EBA columns_code :rowdf: Dataframe with row with mapping between excel row and EBS data row_code ''' wb = load_workbook(name, read_only=True,data_only=True) # to read the spresdsheet first save as xml then write it again allsheets = wb.sheetnames colcodes=[] rowcodes=[] for wsname in allsheets: ws=wb[wsname] try: #find the anchor for the row and columns id, the first cell = [c for row in ws.rows for c in row if c.value and c.data_type == 's' and 'Row'== c.value][0] # find the numeric values in the column below the anchor, only the digits and remenber python index starts with 0 while excels index starts with 1 rowcodes += [(wsname , c.value , c.row ) for r in ws.rows for c in r if c.value and c.column == cell.column and c.row > cell.row and ( c.data_type == 'n' or (c.data_type == 's' and c.value.isdigit()) )] # finds the numeric values in the row at the right of the anchor. # c.rows returns a generator (probably because te potential for a huge number) therefor the list(c.rows) colcodes += [(wsname , c.value , get_column_letter(c.column ) ) for c in list(ws.rows)[cell.row-1] if c.value and c.column > cell.column and (c.data_type == 'n' or (c.data_type == 's' and c.value.isdigit()))] except: # Ok this ws did not have an ancor cell pass coldf = pd.DataFrame(colcodes,columns=['sheet','colcode','col']) rowdf = pd.DataFrame(rowcodes,columns=['sheet','rowcode','row']) return coldf,rowdf
[docs] def getexcelmodel(name): ''' Creates a model instance from a excel sheet SUM is replaced by SUM_EXCEL which is a function in the modelclass In the excel formulars this function accepts ordinary operators and SUM in excel sheets input: :name: Location of a excel sheeet returns: :model: A model instance with the formulars of the excel sheet :para: A list of values in the sheet which matches exogeneous variables in the model ''' modelname = Path(name).stem eqdic = findequations(name) eqdic2 = {i : eq.replace('SUM(','SUM_EXCEL(') for i,eq in eqdic.items()} fdic = {i : 'Frml xx '+eqdic2[i] + r' $ ' for i,eq in eqdic2.items()} f = '\n'.join([eq for i,eq in fdic.items()]) _mmodel=mc.model(f,modelname=modelname) zz = findvalues(name) para = [z for z in zz if z[0] in _mmodel.exogene] # find all values which match a exogeneous variable in model return _mmodel,para
#%% now functions related to running xlsheets
[docs] def indextrans(index): ''' Transforms a period index to excel acceptable datatype ''' out = [i.year if type(index) == pd.core.indexes.period.PeriodIndex else int(i) for i in index] return out
[docs] def df_to_sheet(name,df,wb,after=None): ''' Dataframe to sheet Args: name (TYPE): DESCRIPTION. df (TYPE): DESCRIPTION. wb (TYPE): DESCRIPTION. after (TYPE, optional): DESCRIPTION. Defaults to None. Returns: sht (TYPE): DESCRIPTION. ''' try: wb.sheets[name].delete() except: pass try: sht = wb.sheets.add(name,after=after) except Exception as e : print('no sheet added',str(e)) df_ = df.copy() df_.index = indextrans(df.index) sht.range('A1').value = df_.T active_window = wb.app.api.ActiveWindow active_window.FreezePanes = False active_window.SplitColumn = 1 active_window.SplitRow = 1 active_window.FreezePanes = True sht.autofit(axis="columns") sht[(2,25)].select() return sht
[docs] def obj_to_sheet(name,obj,wb,after=None): ''' An python object to sheet Args: name (TYPE): DESCRIPTION. obj (TYPE): DESCRIPTION. wb (TYPE): DESCRIPTION. after (TYPE, optional): DESCRIPTION. Defaults to None. Returns: None. ''' # breakpoint() try: wb.sheets[name].delete() except: pass try: sht = wb.sheets.add(name,after=after) except Exception as e: print(str(e)) print('no sheet added ') sht.range('A1').value=obj
[docs] def sheet_to_df(wb,name): ''' Sheet to df Args: wb (TYPE): DESCRIPTION. name (TYPE): DESCRIPTION. Returns: df (TYPE): DESCRIPTION. ''' df = wb.sheets[name].range('A1').options(pd.DataFrame, expand='table').value.T df.index = indextrans(df.index) return df
[docs] def sheet_to_dict(wb,name,integers=None): ''' transform the named sheet to a python dict. If we need a integer it has to be in the integer set''' integers_ = {'max_iterations'} if isinstance(None,type(None)) else integers try: out = wb.sheets[name].range('A1').options(dict,expand='table').value out2 = {k : int(v) if k in integers_ else v for k,v in out.items()} except: out2={} return out2
if __name__ == '__main__': testxls=Path('exceltest/lcrberegning2.xlsx') mmodel,para = getexcelmodel(testxls) eq=mmodel.equations mmodel.draw('_LCR_C62',up=10,down=1,HR=0,pdf=1) # The LCR mmodel.draw('_LCR_C25',up=4,down=1,pdf=1) # liquid assets mmodel.draw('_LCR_C10',up=2,pdf=1) # Leel 1 covered bonds c,r = findcoordinates(testxls) xx = findequations(testxls)