Source code for modeldekom

# -*- coding: utf-8 -*-
"""
Module for making attribution analysis of a model. 

The main function is attribution 

Created on Wed May 31 08:50:51 2017

@author: hanseni

"""


import pandas as pd
import fnmatch 
import matplotlib.pyplot as plt 
import matplotlib as mpl
import matplotlib.dates as mdates

import numpy
import ipywidgets as ip
import pdb



from modelhelp import cutout
import modelclass as mc 
#import modeldekom as mk 
import modelvis as mv


idx= pd.IndexSlice
    

[docs]
def attribution(model,experiments,start='',end='',save='',maxexp=10000,showtime=False,
                summaryvar=['*']
                ,silent=False,msilent=True,type='level'):
    """ Calculates an attribution analysis on a model 
    accepts a dictionary with experiments. the key is experiment name, the value is a list 
    of variables which has to be reset to the values in the baseline dataframe. """  
    summaryout = model.vlist(summaryvar)
    adverseny = model.lastdf
    base = model.basedf
    if type == 'level':
        adverse0=adverseny[summaryout].loc[start:end,:].copy() 
    elif type ==  'growth':
        adverse0=adverseny[summaryout].pct_change().loc[start:end,:].copy() * 100.
    ret={}
    modelsave = model.save  # save the state of model.save 
    model.save = False      # no need to save the experiments in each run 
    with model.timer('Total dekomp',showtime):
        for i,(e,var) in enumerate(experiments.items()):
            if i >= maxexp : break     # when we are testing 
            oldvar=adverseny[var].copy()
            if not silent:
                print(i,'Experiment :',e,'\n','Touching: \n', var)
            adverseny[var] = base[var]
            tempdf = model(adverseny   ,start,end,samedata=True,
                silent=msilent)[summaryout]
            adverseny[var] = oldvar
            
            if type == 'level':
                ret[e] = tempdf[summaryout].loc[start:end,:]
            elif type ==  'growth':
                ret[e]  = tempdf.pct_change().loc[start:end,:] * 100.

        difret = {e : adverse0-ret[e]           for e in ret}
    
    df = pd.concat([difret[v] for v in difret],keys=difret.keys()).T
    if save:
         df.to_pickle('data\\' +save +r'.pc')    
         
    model.save = modelsave # restore the state of model.save 
    return df



[docs]
def attribution_new(model,experiments,start='',end='',save='',maxexp=10000,showtime=False,
                summaryvar=['*']
                ,silent=False,msilent=True,type='level'):
    """ Calculates an attribution analysis on a model 
    accepts a dictionary with experiments. the key is experiment name, the value is a list 
    of variables which has to be reset to the values in the baseline dataframe. """  
    summaryout = model.vlist(summaryvar)
    adverseny = model.lastdf
    base = model.basedf
    adverse0_level  = adverseny[summaryout].loc[start:end,:].copy() 
    adverse0_growth = adverseny[summaryout].pct_change().loc[start:end,:].copy() * 100.
    ret_level  = {}
    ret_growth = {}
    modelsave = model.save  # save the state of model.save 
    model.save = False      # no need to save the experiments in each run 
    with model.timer('Total dekomp',showtime):
        for i,(e,var) in enumerate(experiments.items()):
            if i >= maxexp : break     # when we are testing 
            oldvar=adverseny[var].copy()
            if not silent:
                print(i,'Experiment :',e,'\n','Touching: \n', var)
            adverseny[var] = base[var]
            tempdf = model(adverseny   ,start,end,samedata=True,
                silent=msilent)[summaryout]
            adverseny[var] = oldvar
            
            ret_level[e]   = tempdf.loc[start:end,:]
            ret_growth[e]  = tempdf.pct_change().loc[start:end,:] * 100.

        difret_level  = {e : adverse0_level  - ret_level[e]         for e in ret_level}
        difret_growth = {e : adverse0_growth - ret_growth[e]        for e in ret_growth}
    
    df_level = pd.concat([difret_level[v] for v in difret_level],keys=difret_level.keys()).T
    df_growth = pd.concat([difret_growth[v] for v in difret_growth],keys=difret_growth.keys()).T
    if save:
         df_level.to_pickle('data\\' +save +r'_level.pc')    
         df_level.to_pickle('data\\' +save +r'_growth.pc')    
         
    model.save = modelsave # restore the state of model.save 
    return {'level':df_level, 'growth':df_growth}




[docs]
def ilist(df,pat):
    '''returns a list of variable in the model matching the pattern, 
    the pattern can be a list of patterns of a sting with patterns seperated by 
    blanks
    
    This function operates on the index names of a dataframe. Relevant for attribution analysis
    '''
    if isinstance(pat,list):
           upat=pat
    else:
           upat = [pat]
           
    ipat = upat
    out = [v for  p in ipat for up in p.split() for v in sorted(fnmatch.filter(df.index,up.upper()))]  
    return out


    
def GetAllImpact(impact,sumaryvar):
    ''' get all the impact from at impact dataframe''' 
    exo = list({v for v,t in impact.columns})
    df = pd.concat([impact.loc[sumaryvar,c]  for c in exo],axis=1)
    df.columns = exo
    return df 


[docs]
def GetSumImpact(impact,pat='PD__*'):
    """Gets the accumulated differences attributet to each impact group """ 
    a = impact.loc[ilist(impact,pat),:].T.groupby(level=[0]).sum().T   
    return a 



[docs]
def GetLastImpact(impact,pat='RCET1__*'):
    """Gets the last differences attributet to each impact group """ 
    # assert 1==2
    a = impact.loc[ilist(impact,pat),:].T.groupby(level=[0]).last().T   
    return a

 

[docs]
def GetAllImpact(impact,pat='RCET1__*'):
    """Gets the last differences attributet to each impact group """ 
    a = impact.loc[ilist(impact,pat),:] 
    return a 



[docs]
def GetOneImpact(impact,pat='RCET1__*',per=''):
    """Gets differences attributet to each impact group in period:per """ 
    a = impact.loc[ilist(impact,pat),idx[:,per]] 
    a.columns = [v[0] for v in a.columns]
    return a 



[docs]
def AggImpact(impact):
    """ Calculates the sum of impacts and place in the last column
    
    This function is applied to the result iof a Get* function""" 
    asum= impact.sum(axis=1)
    asum.name = '_Sum'
    aout = pd.concat([impact,asum],axis=1)
    return aout 




[docs]
class totdif():
    ''' Class to make modelvide attribution analysis 
    
    '''
    
    def __init__(self, model,summaryvar='*',desdic={},experiments = None):
       
       self.diffdf  = model.exodif()
       self.diffvar = self.diffdf.columns
       if len(self.diffvar) == 0:
           print('No variables to attribute to ')
           self.go = False 
           self.typetext = 'Unknown'

       else: 
           self.go = True 
           self.experiments = {v:v for v in self.diffvar} if experiments == None else experiments
           self.model = model 
           self.start = self.model.current_per.tolist()[0]
           self.end = self.model.current_per.tolist()[-1]
           
           self.desdic = desdic 
           self.summaryvar = summaryvar
           self.summaryout = model.vlist(self.summaryvar)
           
           self.res = attribution_new(self.model,self.experiments,self.start,self.end,
            summaryvar=self.summaryvar,showtime=1,silent=1,type=type)
       

[docs]
    def explain_last(self,pat='',top=0.9,title='',use='level',threshold=0.0,ysize=5):  
        '''
        Explains last period 

        Args:
            pat (TYPE, optional): DESCRIPTION. Defaults to ''.
            top (TYPE, optional): DESCRIPTION. Defaults to 0.9.
            title (TYPE, optional): DESCRIPTION. Defaults to ''.
            use (TYPE, optional): DESCRIPTION. Defaults to 'level'.
            threshold (TYPE, optional): DESCRIPTION. Defaults to 0.0.
            ysize (TYPE, optional): DESCRIPTION. Defaults to 5.

        Returns:
            fig (TYPE): DESCRIPTION.

        '''
        # assert 1==2
        if self.go:         
            self.impact = GetLastImpact(self.res[use],pat=pat).T.rename(index=self.desdic)
            ntitle = f'Decomposition last period, {use}' if title == '' else title
            fig = mv.waterplot(self.impact,autosum=1,allsort=1,top=top,title= ntitle,desdic=self.desdic,
                               threshold=threshold,ysize=ysize)
            return fig

   

[docs]
    def explain_sum(self,pat='',top=0.9,title='',use='level',threshold=0.0,ysize=5): 
        '''
        Explains the sum
        

        Args:
            pat (TYPE, optional): DESCRIPTION. Defaults to ''.
            top (TYPE, optional): DESCRIPTION. Defaults to 0.9.
            title (TYPE, optional): DESCRIPTION. Defaults to ''.
            use (TYPE, optional): DESCRIPTION. Defaults to 'level'.
            threshold (TYPE, optional): DESCRIPTION. Defaults to 0.0.
            ysize (TYPE, optional): DESCRIPTION. Defaults to 5.

        Returns:
            fig (TYPE): DESCRIPTION.

        '''
        if self.go:          
           self.impact = GetSumImpact(self.res[use],pat=pat).T.rename(index=self.desdic)
           ntitle = f'Decomposition, sum over all periods, {use}' if title == '' else title
           fig = mv.waterplot(self.impact,autosum=1,allsort=1,top=top,title=ntitle,desdic=self.desdic,
                              threshold=threshold,ysize=ysize )
           return fig

   

[docs]
    def explain_per(self,pat='',per='',top=0.9,title='',use='level',threshold=0.0,ysize=5):   
        '''
        Explains a periode

        Args:
            pat (TYPE, optional): DESCRIPTION. Defaults to ''.
            per (TYPE, optional): DESCRIPTION. Defaults to ''.
            top (TYPE, optional): DESCRIPTION. Defaults to 0.9.
            title (TYPE, optional): DESCRIPTION. Defaults to ''.
            use (TYPE, optional): DESCRIPTION. Defaults to 'level'.
            threshold (TYPE, optional): DESCRIPTION. Defaults to 0.0.
            ysize (TYPE, optional): DESCRIPTION. Defaults to 5.

        Returns:
            fig (TYPE): DESCRIPTION.

        '''
        if self.go:        
           tper = self.res[use].columns.get_level_values(1)[0] if per == '' else per
           self.impact = GetOneImpact(self.res[use],pat=pat,per=tper).T.rename(index=self.desdic) 
           t2per = str(tper.date()) if type(tper) == pd._libs.tslibs.timestamps.Timestamp else tper
           ntitle = f'Decomposition, {use}: {t2per}' if title == '' else title
           fig = mv.waterplot(self.impact,autosum=1,allsort=1,top=top,title=ntitle,desdic=self.desdic ,
                              threshold=threshold,ysize=ysize)
           return fig

   
            

[docs]
    def explain_allold(self,pat='',stacked=True,kind='bar',top=0.9,title='',use='level',
                    threshold=0.0,resample='',axvline=None): 
        if self.go:
            years = mdates.YearLocator()   # every year
            months = mdates.MonthLocator()  # every month
            years_fmt = mdates.DateFormatter('%Y')
            
            selected =   GetAllImpact(self.res[use],pat) 
            grouped = selected.stack().groupby(level=[0])
            fig, axis = plt.subplots(nrows=len(grouped),ncols=1,figsize=(10,5*len(grouped)),constrained_layout=False)
            width = 0.5  # the width of the barsser
            ntitle = f'Decomposition, {use}' if title == '' else title
            laxis = axis if isinstance(axis,numpy.ndarray) else [axis]
            for j,((name,dfatt),ax) in enumerate(zip(grouped,laxis)):
                dfatt.index = [i[1] for i in dfatt.index]
                if resample=='':
                    tempdf=cutout(dfatt.T,threshold).T
                else:
                    tempdf=cutout(dfatt.T,threshold).T.resample(resample).mean()
#                pdb.set_trace()
                tempdf.plot(ax=ax,kind=kind,stacked=stacked,title=self.desdic.get(name,name))
                ax.set_ylabel(name,fontsize='x-large')
#                ax.set_xticklabels(tempdf.index.tolist(), rotation = 45,fontsize='x-large')
##                ax.xaxis.set_minor_locator(plt.NullLocator())
##                ax.tick_params(axis='x', labelleft=True)
#                ax.xaxis.set_major_locator(years)
#                ax.xaxis_date()
#                ax.xaxis.set_major_formatter(years_fmt)
#                ax.xaxis.set_minor_locator(months)
#                ax.tick_params(axis='x', labelrotation=45,right = True)
            if type(axvline) != type(None): axis.axvline(axvline)    
            fig.suptitle(ntitle,fontsize=20)
            if 1:
                # plt.tight_layout()
                # fig.subplots_adjust(top=top)
                fig.set_constrained_layout(True)

            return fig



[docs]
    def explain_all(self,pat='',stacked=True,kind='bar',top=0.9,title='',use='level',
                    threshold=0.0,resample='',axvline=None): 
        '''
        Explains all

        Args:
            pat (TYPE, optional): DESCRIPTION. Defaults to ''.
            stacked (TYPE, optional): DESCRIPTION. Defaults to True.
            kind (TYPE, optional): DESCRIPTION. Defaults to 'bar'.
            top (TYPE, optional): DESCRIPTION. Defaults to 0.9.
            title (TYPE, optional): DESCRIPTION. Defaults to ''.
            use (TYPE, optional): DESCRIPTION. Defaults to 'level'.
            threshold (TYPE, optional): DESCRIPTION. Defaults to 0.0.
            resample (TYPE, optional): DESCRIPTION. Defaults to ''.
            axvline (TYPE, optional): DESCRIPTION. Defaults to None.

        Returns:
            None.

        '''
        import warnings 
        if self.go:
            years = mdates.YearLocator()   # every year
            months = mdates.MonthLocator()  # every month
            years_fmt = mdates.DateFormatter('%Y')
            
            selected =   GetAllImpact(self.res[use],pat) 
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', FutureWarning)
            
                grouped = selected.stack().groupby(level=[0])
            fig, axis = plt.subplots(nrows=len(grouped),ncols=1,figsize=(10,5*len(grouped)),constrained_layout=False)
            width = 0.5  # the width of the barsser
            ntitle = f'Decomposition, {use}' if title == '' else title
            laxis = axis if isinstance(axis,numpy.ndarray) else [axis]
            with warnings.catch_warnings():
                warnings.simplefilter('ignore', FutureWarning)
                for j,((name,dfatt),ax) in enumerate(zip(grouped,laxis)):
                    dfatt.index = [i[1] for i in dfatt.index]
                    if resample=='':
                        tempdf=cutout(dfatt.T,threshold).T
                    else:
                        tempdf=cutout(dfatt.T,threshold).T.resample(resample).mean()
    #                pdb.set_trace()
                    selfstack = (kind == 'line' or kind == 'area') and stacked 
                    tempdf = tempdf.rename(columns=self.desdic)
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore", category=UserWarning)
                        if selfstack:
                            df_neg, df_pos =tempdf.clip(upper=0), tempdf.clip(lower=0)    
                            df_pos.plot(ax=ax,kind=kind,stacked=stacked,title=self.desdic.get(name,name))
                            ax.set_prop_cycle(None)
                            df_neg.plot(ax=ax,legend=False,kind=kind,stacked=stacked,title=self.desdic.get(name,name))
                            ax.set_ylim([df_neg.sum(axis=1).min(), df_pos.sum(axis=1).max()])
                        else:
                            tempdf.plot(ax=ax,kind=kind,stacked=stacked,title=self.desdic.get(name,name))
                            ax.xaxis.set_major_locator(plt.MaxNLocator(10))
                    ax.set_ylabel(name,fontsize='x-large')
#                ax.set_xticklabels(tempdf.index.tolist(), rotation = 45,fontsize='x-large')
##                ax.xaxis.set_minor_locator(plt.NullLocator())
##                ax.tick_params(axis='x', labelleft=True)
#                ax.xaxis.set_major_locator(years)
#                ax.xaxis_date()
#                ax.xaxis.set_major_formatter(years_fmt)
#                ax.xaxis.set_minor_locator(months)
#                ax.tick_params(axis='x', labelrotation=45,right = True)
            if type(axvline) != type(None): axis.axvline(axvline)    
            fig.suptitle(ntitle,fontsize=20)
            if 1:
                fig.set_constrained_layout(True)

                # plt.tight_layout()
                # fig.subplots_adjust(top=top)
                ...
            return fig

        
#

[docs]
    def totexplain(self,pat='*',vtype='all',stacked=True,kind='bar',per='',top=0.9,title=''
                   ,use='level',threshold=0.0,ysize=10,**kwargs):
        '''
        Wrapper for different explanations
         - :any:`explain_last` 
         - :any:`explain_per` 
         - :any:`explain_sum` 
         - :any:`explain_all` 
            

        Args:
            pat (TYPE, optional): DESCRIPTION. Defaults to '*'.
            vtype (per|all|last|sum, optional): what data to attribute. Defaults to 'all'.
            stacked (TYPE, optional): DESCRIPTION. Defaults to True.
            kind (TYPE, optional): DESCRIPTION. Defaults to 'bar'.
            per (TYPE, optional): DESCRIPTION. Defaults to ''.
            top (TYPE, optional): DESCRIPTION. Defaults to 0.9.
            title (TYPE, optional): DESCRIPTION. Defaults to ''.
            use (TYPE, optional): DESCRIPTION. Defaults to 'level'.
            threshold (TYPE, optional): DESCRIPTION. Defaults to 0.0.
            ysize (TYPE, optional): DESCRIPTION. Defaults to 10.
            **kwargs (TYPE): DESCRIPTION.

        Returns:
            fig (TYPE): DESCRIPTION.

        '''       
        if vtype.upper() == 'PER' : 
            fig = self.explain_per(pat=pat,per=per,top=top,use=use,title=title,threshold=threshold,ysize=ysize)
            
        elif vtype.upper() == 'LAST' : 
            fig = self.explain_last(pat=pat,top=top,use=use,title=title,threshold=threshold)
            
        elif vtype.upper() == 'SUM' : 
            fig = self.explain_sum(pat=pat,top=top,use=use,title=title,threshold=threshold)
            
        else:    
            fig = self.explain_all(pat=pat,stacked=stacked,kind=kind,top=top,use=use,title=title,threshold=threshold)
        return fig 



#    def get_att_gui(self,var='FY',spat = '*',desdic={},use='level'):
#        '''Creates a jupyter ipywidget to display model level 
#        attributions ''' 
#        def show_all2(Variable,Periode,Save,Use):
#             global fig1,fig2
#             fig1 = self.totexplain(pat=Variable,top=0.87,use=Use)
#             fig2 = self.totexplain(pat=Variable,vtype='per',per = Periode,top=0.85,use=Use) 
#             if Save:
#                fig1.savefig(f'Attribution-{Variable}-{use}.pdf')
#                fig2.savefig(f'Attribution-{Variable}-{Periode}-{use}.pdf')
#                print(f'Attribution-{Variable}-{use}.pdf and Attribution-{Variable}-{Periode}-{use}.pdf aare saved' )
#    
#        show = ip.interactive(show_all2,
#                  Variable = ip.Dropdown(options = sorted(self.model.endogene),value=var),
#                  Periode  = self.model.current_per,
#                  Use = ip.RadioButtons(options= ['level', 'growth'],description='Use'),
#                  Save = False,
#                  )
#        return show 


if __name__ == '__main__' :
    #%%
    # running withe the mtotal model 
    df2 = pd.DataFrame({'Z':[1., 22., 33,43] , 'TY':[10.,20.,30.,40.] ,'YD':[10.,20.,30.,40.]},index=[2017,2018,2019,2020])
    df3 = pd.DataFrame({'Z':[1., 22., 33,43] , 'TY':[10.,40.,60.,10.] ,'YD':[10.,49.,36.,40.]},index=[2017,2018,2019,2020])
    ftest = ''' 
    FRMl <>  ii = TY(-1)+c(-1)+Z*c(-1) $
    frml <>  c=0.8*yd+log(1) $
    frml <>  d = c +2*ii(-1) $
   frml <>  c2=0.8*yd+log(1) $
    frml <>  d2 = c + 42*ii $
   frml <>  c3=0.8*yd+log(1) $
    frml <>  d3 = c +ii $
 '''
     
    m2=mc.model(ftest,straight=True,modelname='m2 testmodel')
    df2=mc.insertModelVar(df2,m2)
    df3=mc.insertModelVar(df3,m2)
    z1 = m2(df2)
    z2 = m2(df3)
    ccc = m2.totexplain(pat='D2',per=2019,vtype='all',top=0.8)
    ccc = m2.totexplain('D2',vtype='last',top=0.8)
    ccc = m2.totexplain('D2',vtype='per',top=0.8)
#%%        
    ddd = totdif(m2)
    eee = totdif(m2)
    ddd.totexplain('D2',vtype='all',top=0.8,use='growth');
    eee.totexplain('D2',vtype='all',top=0.8);

    if  False and ( not 'mtotal' in locals() ) :
        # get the model  
        with open(r"models\mtotal.fru", "r") as text_file:
            ftotal = text_file.read()
       
        #get the data 
        base0   = pd.read_pickle(r'data\base0.pc')    
        base    = pd.read_pickle(r'data\base.pc')    
        adve0   = pd.read_pickle(r'data\adve0.pc')      
    #%%    
        mtotal  = mc.model(ftotal)
        
#        prune(mtotal,base)
        #%%    
        baseny        = mtotal(base0   ,'2016q1','2018q4',samedata=False)
        adverseny     = mtotal(adve0   ,'2016q1','2018q4',samedata=True)
    #%%
        diff  = mtotal.exodif()   # exogeneous variables which are different between baseny and adverseny 
    #%%
        assert 1==2  # just for stopping in test situations 
        #%%  
        adverseny            = mtotal(adve0   ,'2016q1','2018q4',samedata=True)   # to makew sure we have the right adverse.
        countries            = {c.split('__')[2]  for c in diff.columns}  # list of countries 
        countryexperiments   = {e: [c for c in diff.columns if ('__'+e+'__') in c]   for e in countries } # dic of experiments 
        assert len(diff.columns) == sum([len(c) for c in countryexperiments.values()]) , 'Not all exogeneous chocks variables are accountet for'
        countryimpact        = attribution(mtotal,countryexperiments,save='countryimpactxx',maxexp=30000,showtime = 1)    
        #%% 
        adverseny     = mtotal(adve0   ,'2016q1','2018q4',samedata=True)
        vartypes             = {c.split('__') [1]  for c in diff.columns}
        vartypeexperiments   = {e: [c for c in diff.columns if ('__'+e+'__') in c]   for e in vartypes }
        assert len(diff.columns) == sum([len(c) for c in vartypeexperiments.values()]) , 'Not all exogeneous chocks variables are accountet for'
        vartypeimpact        = attribution(mtotal,vartypeexperiments,save='vartypeimpactxx',maxexp=3000,showtime=1)
        ##%%
        #adverseny     = mtotal(adve0   ,'2016q1','2018q4',samedata=True)
        #allexo                  = {c[7:14] for c in diff.columns} 
        #allexoexperiments       = {e: [c for c in diff.columns if ('__'+e+'__') in c]   for e in allexo }
        #allexoimpact            = attribution(mtotal,allexoexperiments,base,adverseny,save='allexoimpact',maxexp=2000)
    #%% test of upddf    
        if 0:
            baseny        = mtotal(base0   ,'2016q1','2018q4',samedata=False)
            adverseny     = mtotal(adve0   ,'2016q1','2018q4',samedata=True)
    #%%
            e = 'EE'     
            var = countryexperiments['EE']
            vardiff = diff[var]
            temp = adverseny[var].copy()
            adverseny[var] = baseny[var]
            temp2 = adverseny[var].copy()
            _ = mc.upddf(adverseny,temp) 
            adverseny[var] = temp
            temp3 = adverseny[var].copy()