Source code for model_latex

# -*- coding: utf-8 -*-
"""
Created on Sun Dec  3 19:07:03 2017

@author: hanseni

Mostly to eat latex models and translate to business logic

The routines are specific to a style of latex and should be inspected before use 

"""
from IPython.display import display, Math, Latex, Markdown , Image
import re
from IPython.lib.latextools import latex_to_png
from pathlib import Path
import os

import modelmanipulation as mp
import modelclass        as mc
import modelpattern as pt 

[docs]
def rebank(model):
    ''' All variable names are decorated by a {bank}
    The {bank} is injected as the first dimension ''' 
    
    ypat    = re.compile(pt.namepat+pt.lagpat)
    funk    = set(pt.funkname) | {'SUM'}
    nobank  = set('N S T __{bank} NORM PPF CDF'.split())  # specific variable names not to be decorated 
    notouch = funk | nobank  # names not to be decorated 
    def trans(matchobj):
            '''  The function recieves a matchobj entity. The matching groups can be accesed by matchobj.group() 
            it returns a string with a bankname added at the end or at the first __ which marks dimensions  '''            
            var = matchobj.group(1)
            lag = '(' + matchobj.group(2) + ')'if matchobj.group(2) else ''
            if var.upper() in notouch:
                return var+lag
            else:
                if '__' in var:
                    pre,post = var.split('__',1)
                    post = ''+post
                else:
                    pre  = var
                    post = '' 

                return (pre+'__{bank}'+post + lag)

    banked  = ypat.sub(trans, model) 
    return banked


#print(rebank('a+b'))
#print(rebank('a(-1)+yyy+cc+bb__x'))


[docs]
def txttolatex(model):
    # takes a template model to latex
    tpoc2 = model[:]
    #tpoc2 = re.sub(r'((list)|(do)|(enddo)|(ppppend))[^$]+[$]','',tpoc2)
    tpoc2 = re.sub(r'((list)|(do)|(enddo)|(ppppend))([^$]*) [$]',r' \\mbox{\1 \6}  \\\\ \n',tpoc2)
    tpoc2 = re.sub(r'__{bank}',r'^{bank}',tpoc2)
    tpoc2 = re.sub(r'diff()',r'\Delta ',tpoc2)
    tpoc2 = re.sub(r'norm.ppf',r'\Phi ',tpoc2)
    tpoc2 = re.sub(r'norm.cdf',r'\Phi^{-1} ',tpoc2)
    tpoc2 = re.sub(r'__{CrCountry}__{PortSeg}' ,r'_{CrCountry,PortSeg}',tpoc2) # two subscribt indexes
    tpoc2 = re.sub(r'__{CrModelGeo}__{CrModel}',r'_{CrModelGeo,CrModel}',tpoc2) # two subscribt indexes
    tpoc2 = re.sub(r'\$',r' \\\\[10pt] ',tpoc2)    
    tpoc2 = re.sub(r'!([^\n]*\n)',r' & \mbox{ ! \1 } & \\\\[10pt]',tpoc2)
    #tpoc2 = re.sub(r'\n',r'',tpoc2)       # remove all linebreaks 
    tpoc2 = re.sub(r'frml <>','',tpoc2)
    tpoc2 = re.sub(r'sum\(([a-zA-Z{}]+),',r'\\sum_{\mbox{\1 }}',tpoc2)
    tpoc2 = re.sub(r'[(]-1[)]',r'{\small[t-1]}',tpoc2)
    tpoc2 = re.sub(r'=',r' &=',tpoc2)
    tpoc2 = re.sub(r'([a-zA-Z])_([a-zA-Z])',r'\1\_\2',tpoc2)
    tpoc2 = re.sub(r'([a-zA-Z])__([a-zA-Z])',r'\1\_\_\2',tpoc2)
    tpoc2 = r'\begin{align*}'+tpoc2+r'\end{align*}'
    return(tpoc2)    



[docs]
def defrack(streng):
    '''  
    \frac{xxx}{yyy} = ((xxx)/(yyy))
    
    '''
    tstreng = streng[:]
    tfunk = r'\frac{'
    while tfunk in tstreng:
        start = tstreng.find(tfunk)
        # first find the first matching {}
        match = tstreng[start + len(tfunk):]  # the rest of the string in which we have to match }
        open = 1                              # we already found the first {
        for index1 in range(len(match)):
            if match[index1] in '{}':
                open = (open + 1) if match[index1] == '{' else (open - 1)
            if not open:
                break
        # now find the second matching {}    
        match2 =  match[index1 + 1+1:] # the string from the location of second { to the end of string 
        open=1 
        for index2 in range(len(match2)):
            if match2[index2] in '{}':
                open = (open + 1) if match2[index2] == '{' else (open - 1)
            if not open:
                break
        tstreng = tstreng[:start]+ '(('+ match[:index1] +')/('+ match2[:index2]+'))'+match2[index2+1:]
    return tstreng 


[docs]
def debrace(streng):
    ''' 
    Eliminates  underbrace{xxx}_{yyy} in a string 
    underbrace{xxx}_{yyy}  => (xxx)
    As there can be nested {} we need to match the braces
    
    '''
    tstreng = streng[:]
    tfunk = r'\underbrace{'
    while tfunk in tstreng:
        start = tstreng.find(tfunk)
        match = tstreng[start + len(tfunk):]
        open = 1
        for index1 in range(len(match)):
            if match[index1] in '{}':
                open = (open + 1) if match[index1] == '{' else (open - 1)
            if not open:
                break
        goodstuf = tstreng[:start]+match[index1]    
        match2 =  match[index1 + 1+2:]
        open=1 
        for index2 in range(len(match2)):
            if match2[index2] in '{}':
                open = (open + 1) if match2[index2] == '{' else (open - 1)
            if not open:
                break
        tstreng = tstreng[:start]+ ''+ match[:index1] +''+ match2[index2+1:]    
    return tstreng



[docs]
def defunk(funk, subs , streng,startp='{',slutp='}'):
    ''' 
    \funk{xxx}  => subs(xxx) 
    
    in a string 
    '''
    tfunk, tstreng = funk[:] , streng[:]
    tfunk = tfunk + startp
    while tfunk in tstreng:
        start = tstreng.find(tfunk)
        match = tstreng[start + len(tfunk):]
        open = 1
        for index in range(len(match)):
            if match[index] in startp+slutp:
                open = (open + 1) if match[index] == startp else (open - 1)
            if not open:
                break
        tstreng = tstreng[:start]+subs+'(' + match[:index] +')' + match[index + 1:]
    return tstreng

#print(defunk(r'\sqrt',r'sqrt',r'a=\sqrt{b+ \sqrt{f+y}}'))
#print(defunk(r'\log',r'log',r'a=\log(b(-1)+ \log({f+y}))',startp='(',slutp=')'))


[docs]
def findindex(ind):
    ''' find the index variables on the left hand side. meaning variables braced by {} '''
    lhs=ind.split('=')[0]
    return  re.findall(r'\{([A-Za-z][\w]*)\}',lhs ) # all the index variables 



[docs]
def doable(ind,show=False):
    ''' find all dimensions in the left hand side of = and and decorate with the nessecary do .. enddo ''' 
    
    xxy = findindex(ind)  # all the index variables 
    xxx = ['n_{bank}' if index == 'n' else index for index in xxy]
    if xxx : 
        pre = ' $ '.join(['Do '+i for level,i in enumerate(xxx)])+' $ \n '
        post = '\n' + 'enddo $ '*len(xxx)
        out = pre+ind + post
        if show:
            print('Before doable',ind,sep='\n')
            print('After doable',out,sep='\n')
            print()
    else:
        out=ind
    return out



[docs]
def findlists(input):
    '''extracte liste from latex'''
    relevant = re.findall(r'LIST \s*\$[^$]*\$',input.upper())  
    temp1 = [l.replace('$','').replace('\\','').replace(',',' ') 
         .replace('{','').replace('}','')                                 
         for l in relevant]
    temp2 = [l.split('=')[0]+' = '
           + l.split('=')[0][4:]
           +' : '+ l.split('=')[1]+'$' for l in temp1]
    return temp2


if 0:
    listtest='''      
    List $stage=\{stage1, stage2,stage3\}$
    
    List $stage\_from=\{stage1, stage2,stage3\}$
    
    List $stage\_to=\{stage1, stage2,stage3\}$
    
    List $stage\_to2=\{stage1, stage2,stage3\}$
    '''
    print(findlistsold(listtest))
    

[docs]
def findlistsnew(input):
    '''extracte list with sublist from latex'''
    # relevant = re.findall(r'LIST \s*\$[^$]*\$',input.upper())  
    relevant = re.findall(r'\$LIST\s*\\;\s*[^$]*\$',input.upper())  

    print(f'{relevant=}')
    temp1 = [l.replace('$','').replace('\\','').replace(',',' ') 
         .replace('{','').replace('}','').replace('\n','/ \n')                                 
         for l in relevant]
    print(f'{temp1=}')
    temp2 = [l.split('=')[0]+' = '
           + l.split('=')[0][4:]
           +' : '+ l.split('=')[1]+'$' for l in temp1]
    return temp2



[docs]
def findlistsx(input):
    '''extracte list with sublist from latex'''
    relevant = re.findall(r'\$LIST\s*\\;\s*[^$]*\$',input.upper())  
    print(f'{relevant=}')
    temp1 = [l.replace('$','').replace('\\','')
             .replace(',',' ').replace(';',' ')  
            .replace('{','').replace('}','').replace('\n','/ \n')                                 
         for l in relevant]
    print(f'{temp1=}')
    temp2 = ['LIST ' + l.split('=')[0][4:].strip() +' = '
           + l.split('=')[0][4:]
           +' : '+ l.split('=')[1]+'$' for l in temp1]
    
    print(f'{temp2=}')
    return ('\n'.join(temp2)+'\n') 


if 0:
    listtest=r'''      
   $List \; stage=\{s1, s2,s3\} \\
      stagened:\{  0,    0,  1,\} $
    '''
    print(findlists(listtest))




[docs]
def latextotxt(input,dynare=False,bankadd=False):
    '''
    Translates a latex input to a BL output 
    
    '''
    # breakpoint()
    ex12 = re.findall(r'\\label\{eq:(.*?)\}\n(.*?)\\end\{',input,re.DOTALL) # select the relevant equations 
    org  = [(name,eq.replace('\n','')) for name,ex in ex12 for eq in ex.split('\\\\')
            if 2 == len(eq.split('='))]
#    ex15 = [('frml '+name+'  '+eq) for (name,ex) in ex12 for eq in ex.splitlines()]
    ex15 = [eq.strip() for (name,eq) in org]
    temp = '\n'.join(ex15)
    trans={r'\left':'',
           r'\right':'',
           r'\min':'min',
           r'\max':'max',   
           r'\rho':'rho',   
           r'\tau':'tau',   
           r'\sigma':'sigma',   
           r'&':'',
           r'\\':'',
           r'[':'(',
           r']':')',
           r'&':'',
           r'\nonumber'  : '',
           r'\_'      : '_',
           r'{n}'      : '__{n}',
           r'_{t}'      : '',
           r'{n,s}'      : '__{n}__{s}',
           r'{n,s,t}'    : '__{n}__{s}__{t}',
           'logit^{-1}' : 'logit_inverse',
           r'\{'      : '{',
           r'\}'      : '}',

          
           }
    ftrans = {       
           r'\sqrt':'sqrt',
           r'\Delta':'diff',
           r'\sum_':'sum',
           r'\Phi':'NORM.CDF',
           r'\Phi^{-1}':'NORM.PDF'
           }
    regtrans = {
           r'\\Delta ([A-Za-z_][\w{},\^]*)':r'diff(\1)', # \Delta xy => diff(xy)
           r'_{t-([1-9])}'                   : r'(-\1)',      # _{t-x}        => (-x)

           # r'\^([\w])'                   : r'_\1',      # ^x        => _x
           # r'\^\{([\w]+)\}(\w)'          : r'_\1_\2',   # ^{xx}y    => _xx_y
           r'\^\{([\w]+)\}'              : r'_{\1}',      # ^{xx}     => _xx
           r'\^\{([\w]+),([\w]+)\}'      : r'_{\1}_{\2}',    # ^{xx,yy}     => _xx_yy
           r'\s*\\times\s*':'*' ,
            }
   # breakpoint()
    for before,to in ftrans.items():
         temp = defunk(before,to,temp)
    for before,to in trans.items():
        temp = temp.replace(before,to)        
    for before,to in regtrans.items():
        temp = re.sub(before,to,temp)
    temp = debrace(temp)
    temp = defrack(temp)
    if bankadd:
        temp = rebank(temp)
    
    # breakpoint()
    temp = re.sub(r'sum\(n,s,t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp)
    temp = re.sub(r'sum\(n\)sum\(s\)sum\(t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp)
    temp = re.sub(r'sum\(n\)'+(pt.namepat),r'sum(n_{bank},\1)',temp)
    temp = re.sub(fr'sum\({pt.namepat}\)\(',r'sum(\1,',temp)
    
    ltemp  = [b.strip().split('=') for b in temp.splitlines()] # remove blanks in the ends and split each line at =   
    # breakpoint()
    ltemp  = [lhs + ' = '+  rhs.replace(' ','') for lhs,rhs in ltemp]      # change ' ' to * on the rhs. 
    ltemp  = ['Frml '+fname + ' ' + eq + ' $ 'for eq,(fname,__) in zip(ltemp,org)]
    
    ltemp  = [doable(l) for l in ltemp]
    
    
    out = '\n'.join(ltemp+findlists(input))
    return out



[docs]
def latextotxtnew(input):
    '''
    Translates a latex input to a BL output 
    
    '''
    # breakpoint()
    ex12 = re.findall(r'\\label\{eq:(.*?)\}\n(.*?)\\end\{',input,re.DOTALL) # select the relevant equations 
    org  = [(name,eq.replace('\n','')) for name,ex in ex12 for eq in ex.split('\\\\')
            if 2 == len(eq.split('='))]
#    ex15 = [('frml '+name+'  '+eq) for (name,ex) in ex12 for eq in ex.splitlines()]
    ex15 = [eq.strip() for (name,eq) in org]
    temp = '\n'.join(ex15)
    trans={r'\left':'',
           r'\right':'',
           r'\min':'min',
           r'\max':'max',   
           r'\rho':'rho',   
           r'&':'',
           r'\\':'',
           r'[':'(',
           r']':')',
           r'&':'',
           r'\nonumber'  : '',
           r'\_'      : '_',
           r'{n}'      : '__{n}',
           r'_{t}'      : '',
           r'{n,s}'      : '__{n}__{s}',
           r'{n,s,t}'    : '__{n}__{s}__{t}',
           'logit^{-1}' : 'logit_inverse',
           r'\{'      : '{',
           r'\}'      : '}',
           r'\sigma'  :'sigma',

          
           }
    ftrans = {       
           r'\sqrt':'sqrt',
           r'\Delta':'diff',
           r'\sum_':'sum',
           r'\Phi':'NORM.CDF',
           r'\Phi^{-1}':'NORM.PDF'
           }
    regtrans = {
           r'\\Delta ([A-Za-z_][\w{},\^]*)':r'diff(\1)', # \Delta xy => diff(xy)
           r'_{t-([1-9])}'                   : r'(-\1)',      # _{t-x}        => (-x)

           # r'\^([\w])'                   : r'_\1',      # ^x        => _x
           # r'\^\{([\w]+)\}(\w)'          : r'_\1_\2',   # ^{xx}y    => _xx_y
           r'\^\{([\w]+)\}'              : r'_{\1}',      # ^{xx}     => _xx
           r'\^\{([\w]+),([\w]+)\}'      : r'_{\1}_{\2}',    # ^{xx,yy}     => _xx_yy
           r'\s*\\times\s*':'*' ,
            }
   # breakpoint()
    for before,to in ftrans.items():
         temp = defunk(before,to,temp)
    for before,to in trans.items():
        temp = temp.replace(before,to)        
    for before,to in regtrans.items():
        temp = re.sub(before,to,temp)
    temp = debrace(temp)
    temp = defrack(temp)
    if bankadd:
        temp = rebank(temp)
    
    # breakpoint()
    temp = re.sub(r'sum\(n,s,t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp)
    temp = re.sub(r'sum\(n\)sum\(s\)sum\(t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp)
    temp = re.sub(r'sum\(n\)'+(pt.namepat),r'sum(n_{bank},\1)',temp)
    temp = re.sub(fr'sum\({pt.namepat}\)\(',r'sum(\1,',temp)
    
    ltemp  = [b.strip().split('=') for b in temp.splitlines()] # remove blanks in the ends and split each line at =   
    # breakpoint()
    ltemp  = [lhs + ' = '+  rhs.replace(' ','') for lhs,rhs in ltemp]      # change ' ' to * on the rhs. 
    ltemp  = ['Frml '+fname + ' ' + eq + ' $ 'for eq,(fname,__) in zip(ltemp,org)]
    
    ltemp  = [doable(l) for l in ltemp]
    
    
    out = '\n'.join(ltemp+findlists(input))
    return out



[docs]
def dynlatextotxt(input,show=False):
    '''
    Translates a latex input to a BL output 
    The latex input is the latex output of Dynare 
    
    '''
    with mc.ttimer('Findall',show):
        ex12 = re.findall(r'\\begin{dmath}\n(.*?)\n\\end{dmath}',input,re.DOTALL) # select the relevant equations 

    with mc.ttimer('Split',show):
        org  = [' = '.join([side[:] for side in e.split('=',1)]) for e in ex12]
#    ex15 = [('frml '+name+'  '+eq) for (name,ex) in ex12 for eq in ex.splitlines()]
    with mc.ttimer('Strip',show):
        ex15 = [defrack(eq.strip()) for eq in org]
    with mc.ttimer('join',show):
        temp = '\n'.join(ex15)


    trans={r'\left':'',
           r'\right':'',
           r'\min':'min',
           r'\max':'max',   
           r'&':'',
           r'\\':'',
           r'\_':'_',
           r'[':'(',
           r']':')',
           r'&':'',
           r'_{t}': '',
           r'\,': ' *',
           r'\leq': ' <= ',
           r'\neq': ' != ',
           r'\geq': ' >= ',
          
           }
    ftrans = {                    # before{expression} ==> after(expression)
           r'\sqrt':'sqrt',
           r'\Delta':'diff',
           r'^':'^',
           r'\sum_':'sum'
           }
    ftransp = {       
           r'\log':'log',
           }
    regtrans = {
           r'\\Delta ([A-Za-z][\w{},\^]*)':r'diff(\1)', # \Delta xy => diff(xy)
           r'_{t-([1-9])}'                   : r'(-\1)',      # _{t-x}        => (-x)
           '{'+pt.namepat+'}'   : r'\1',                 # 
           '{'+pt.namepat+'(?:\\(([+-][0-9]+)\\))}'   : r'\1(\2)', 
            }
    
#    with mc.ttimer('Defrac',show):
#        temp = defrack(temp) 
    for before,to in trans.items():
         with mc.ttimer(f'replace {before}',show):
             temp = temp.replace(before,to)   
        
    for before,to in ftrans.items():
         with mc.ttimer(f'defunk {before}',show):
             temp = defunk(before,to,temp)
         
    for before,to in ftransp.items():
         with mc.ttimer(f'defunk {before}',show):
             temp = defunk(before,to,temp,'(',')')
         
    for before,to in regtrans.items():
        with mc.ttimer(f'Regtrans {before}'):
            temp = re.sub(before,to,temp)
#    temp = debrace(temp)
    
    if show: print('Translation from Latex to BLL finished')
#    temp = re.sub(r'sum\(n,s,t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp)
#    temp = re.sub(r'sum\(n\)sum\(s\)sum\(t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp)
#    temp = re.sub(r'sum\(n\)'+(pt.namepat),r'sum(n_{bank},\1)',temp)
    
    ltemp  = ['Frml <> ' + eq + ' $ 'for eq in temp.splitlines() ]
    
#    ltemp  = [doable(l) for l in ltemp]
    
    out = '\n'.join(ltemp) 
    return out



if __name__ == '__main__'  :
    if 0:
         test = r'''\
    Loans can be in 3 stages, 1,2 3. 
    New loans will be generated and loans will mature. 
    
    
    \begin{equation}
    \label{eq:Norm}
    TR^{stage\_from,stage} =  \frac{TR\_U^{stage\_from,stage}}{1+0*\sum_{stage\_from2}(TR\_U^{stage\_from2,stage})}
    \times(1-M^{stage}-WRO^{stage})
    \end{equation}
    
    List $stage=\{s1, s2,s3\}$
    
    List $stage\_from=\{s1, s2,s3\}$
    
    List $stage\_from2=\{s1, s2,s3\}$
    
    List $stage\_to=\{s1, s2,s3\}$
    
    '''
         res = latextotxt(test)    
         
    test1 ='''
    list $agegroup=\{16,  17,  18,  19,  20,  99, 100 \} \\
          end     : \{0 ,     0  ,  0   , 0 ,   0 ,   0  , 1\}$
    
    
    \begin{equation}
    \label{eq:mod_another}
    \begin{split}
    \text{[NONEND]}\; & \left(QC^{agegroup}_t\right)^{-ECX_t} & &=  \left(\dfrac{PCTOT_t}{PCTOT_{t+1}}\right) * QC^{agegroup+1}_{t+1}
    \\
    \text{}\;& & &= 
    \dfrac{VB^{agegroup-1}_{t-1} *  \dfrac{NPOP^{agegroup-1}_{t-1}}{NPOP^{agegroup}_t} 
    * (1+R) + VY^{agegroup}_t - VB^{agegroup}_{t+1} )}{ PCTOT_{t}}   
    \end{split}
    \end{equation}
        '''        
    print(findlists(test1))