Source code for model_latex

# -*- coding: utf-8 -*-
"""
Created on Sun Dec  3 19:07:03 2017

@author: hanseni

Mostly to eat latex models and translate to business logic

The routines are specific to a style of latex and should be inspected before use 

"""
from IPython.display import display, Math, Latex, Markdown , Image
import re
from IPython.lib.latextools import latex_to_png
from pathlib import Path
import os

import modelmanipulation as mp
import modelclass        as mc
import modelpattern as pt 
[docs] def rebank(model): ''' All variable names are decorated by a {bank} The {bank} is injected as the first dimension ''' ypat = re.compile(pt.namepat+pt.lagpat) funk = set(pt.funkname) | {'SUM'} nobank = set('N S T __{bank} NORM PPF CDF'.split()) # specific variable names not to be decorated notouch = funk | nobank # names not to be decorated def trans(matchobj): ''' The function recieves a matchobj entity. The matching groups can be accesed by matchobj.group() it returns a string with a bankname added at the end or at the first __ which marks dimensions ''' var = matchobj.group(1) lag = '(' + matchobj.group(2) + ')'if matchobj.group(2) else '' if var.upper() in notouch: return var+lag else: if '__' in var: pre,post = var.split('__',1) post = ''+post else: pre = var post = '' return (pre+'__{bank}'+post + lag) banked = ypat.sub(trans, model) return banked
#print(rebank('a+b')) #print(rebank('a(-1)+yyy+cc+bb__x'))
[docs] def txttolatex(model): # takes a template model to latex tpoc2 = model[:] #tpoc2 = re.sub(r'((list)|(do)|(enddo)|(ppppend))[^$]+[$]','',tpoc2) tpoc2 = re.sub(r'((list)|(do)|(enddo)|(ppppend))([^$]*) [$]',r' \\mbox{\1 \6} \\\\ \n',tpoc2) tpoc2 = re.sub(r'__{bank}',r'^{bank}',tpoc2) tpoc2 = re.sub(r'diff()',r'\Delta ',tpoc2) tpoc2 = re.sub(r'norm.ppf',r'\Phi ',tpoc2) tpoc2 = re.sub(r'norm.cdf',r'\Phi^{-1} ',tpoc2) tpoc2 = re.sub(r'__{CrCountry}__{PortSeg}' ,r'_{CrCountry,PortSeg}',tpoc2) # two subscribt indexes tpoc2 = re.sub(r'__{CrModelGeo}__{CrModel}',r'_{CrModelGeo,CrModel}',tpoc2) # two subscribt indexes tpoc2 = re.sub(r'\$',r' \\\\[10pt] ',tpoc2) tpoc2 = re.sub(r'!([^\n]*\n)',r' & \mbox{ ! \1 } & \\\\[10pt]',tpoc2) #tpoc2 = re.sub(r'\n',r'',tpoc2) # remove all linebreaks tpoc2 = re.sub(r'frml <>','',tpoc2) tpoc2 = re.sub(r'sum\(([a-zA-Z{}]+),',r'\\sum_{\mbox{\1 }}',tpoc2) tpoc2 = re.sub(r'[(]-1[)]',r'{\small[t-1]}',tpoc2) tpoc2 = re.sub(r'=',r' &=',tpoc2) tpoc2 = re.sub(r'([a-zA-Z])_([a-zA-Z])',r'\1\_\2',tpoc2) tpoc2 = re.sub(r'([a-zA-Z])__([a-zA-Z])',r'\1\_\_\2',tpoc2) tpoc2 = r'\begin{align*}'+tpoc2+r'\end{align*}' return(tpoc2)
[docs] def defrack(streng): ''' \frac{xxx}{yyy} = ((xxx)/(yyy)) ''' tstreng = streng[:] tfunk = r'\frac{' while tfunk in tstreng: start = tstreng.find(tfunk) # first find the first matching {} match = tstreng[start + len(tfunk):] # the rest of the string in which we have to match } open = 1 # we already found the first { for index1 in range(len(match)): if match[index1] in '{}': open = (open + 1) if match[index1] == '{' else (open - 1) if not open: break # now find the second matching {} match2 = match[index1 + 1+1:] # the string from the location of second { to the end of string open=1 for index2 in range(len(match2)): if match2[index2] in '{}': open = (open + 1) if match2[index2] == '{' else (open - 1) if not open: break tstreng = tstreng[:start]+ '(('+ match[:index1] +')/('+ match2[:index2]+'))'+match2[index2+1:] return tstreng
[docs] def debrace(streng): ''' Eliminates underbrace{xxx}_{yyy} in a string underbrace{xxx}_{yyy} => (xxx) As there can be nested {} we need to match the braces ''' tstreng = streng[:] tfunk = r'\underbrace{' while tfunk in tstreng: start = tstreng.find(tfunk) match = tstreng[start + len(tfunk):] open = 1 for index1 in range(len(match)): if match[index1] in '{}': open = (open + 1) if match[index1] == '{' else (open - 1) if not open: break goodstuf = tstreng[:start]+match[index1] match2 = match[index1 + 1+2:] open=1 for index2 in range(len(match2)): if match2[index2] in '{}': open = (open + 1) if match2[index2] == '{' else (open - 1) if not open: break tstreng = tstreng[:start]+ ''+ match[:index1] +''+ match2[index2+1:] return tstreng
[docs] def defunk(funk, subs , streng,startp='{',slutp='}'): ''' \funk{xxx} => subs(xxx) in a string ''' tfunk, tstreng = funk[:] , streng[:] tfunk = tfunk + startp while tfunk in tstreng: start = tstreng.find(tfunk) match = tstreng[start + len(tfunk):] open = 1 for index in range(len(match)): if match[index] in startp+slutp: open = (open + 1) if match[index] == startp else (open - 1) if not open: break tstreng = tstreng[:start]+subs+'(' + match[:index] +')' + match[index + 1:] return tstreng
#print(defunk(r'\sqrt',r'sqrt',r'a=\sqrt{b+ \sqrt{f+y}}')) #print(defunk(r'\log',r'log',r'a=\log(b(-1)+ \log({f+y}))',startp='(',slutp=')'))
[docs] def findindex(ind): ''' find the index variables on the left hand side. meaning variables braced by {} ''' lhs=ind.split('=')[0] return re.findall(r'\{([A-Za-z][\w]*)\}',lhs ) # all the index variables
[docs] def doable(ind,show=False): ''' find all dimensions in the left hand side of = and and decorate with the nessecary do .. enddo ''' xxy = findindex(ind) # all the index variables xxx = ['n_{bank}' if index == 'n' else index for index in xxy] if xxx : pre = ' $ '.join(['Do '+i for level,i in enumerate(xxx)])+' $ \n ' post = '\n' + 'enddo $ '*len(xxx) out = pre+ind + post if show: print('Before doable',ind,sep='\n') print('After doable',out,sep='\n') print() else: out=ind return out
[docs] def findlists(input): '''extracte liste from latex''' relevant = re.findall(r'LIST \s*\$[^$]*\$',input.upper()) temp1 = [l.replace('$','').replace('\\','').replace(',',' ') .replace('{','').replace('}','') for l in relevant] temp2 = [l.split('=')[0]+' = ' + l.split('=')[0][4:] +' : '+ l.split('=')[1]+'$' for l in temp1] return temp2
if 0: listtest=''' List $stage=\{stage1, stage2,stage3\}$ List $stage\_from=\{stage1, stage2,stage3\}$ List $stage\_to=\{stage1, stage2,stage3\}$ List $stage\_to2=\{stage1, stage2,stage3\}$ ''' print(findlistsold(listtest))
[docs] def findlistsnew(input): '''extracte list with sublist from latex''' # relevant = re.findall(r'LIST \s*\$[^$]*\$',input.upper()) relevant = re.findall(r'\$LIST\s*\\;\s*[^$]*\$',input.upper()) print(f'{relevant=}') temp1 = [l.replace('$','').replace('\\','').replace(',',' ') .replace('{','').replace('}','').replace('\n','/ \n') for l in relevant] print(f'{temp1=}') temp2 = [l.split('=')[0]+' = ' + l.split('=')[0][4:] +' : '+ l.split('=')[1]+'$' for l in temp1] return temp2
[docs] def findlistsx(input): '''extracte list with sublist from latex''' relevant = re.findall(r'\$LIST\s*\\;\s*[^$]*\$',input.upper()) print(f'{relevant=}') temp1 = [l.replace('$','').replace('\\','') .replace(',',' ').replace(';',' ') .replace('{','').replace('}','').replace('\n','/ \n') for l in relevant] print(f'{temp1=}') temp2 = ['LIST ' + l.split('=')[0][4:].strip() +' = ' + l.split('=')[0][4:] +' : '+ l.split('=')[1]+'$' for l in temp1] print(f'{temp2=}') return ('\n'.join(temp2)+'\n')
if 0: listtest=r''' $List \; stage=\{s1, s2,s3\} \\ stagened:\{ 0, 0, 1,\} $ ''' print(findlists(listtest))
[docs] def latextotxt(input,dynare=False,bankadd=False): ''' Translates a latex input to a BL output ''' # breakpoint() ex12 = re.findall(r'\\label\{eq:(.*?)\}\n(.*?)\\end\{',input,re.DOTALL) # select the relevant equations org = [(name,eq.replace('\n','')) for name,ex in ex12 for eq in ex.split('\\\\') if 2 == len(eq.split('='))] # ex15 = [('frml '+name+' '+eq) for (name,ex) in ex12 for eq in ex.splitlines()] ex15 = [eq.strip() for (name,eq) in org] temp = '\n'.join(ex15) trans={r'\left':'', r'\right':'', r'\min':'min', r'\max':'max', r'\rho':'rho', r'\tau':'tau', r'\sigma':'sigma', r'&':'', r'\\':'', r'[':'(', r']':')', r'&':'', r'\nonumber' : '', r'\_' : '_', r'{n}' : '__{n}', r'_{t}' : '', r'{n,s}' : '__{n}__{s}', r'{n,s,t}' : '__{n}__{s}__{t}', 'logit^{-1}' : 'logit_inverse', r'\{' : '{', r'\}' : '}', } ftrans = { r'\sqrt':'sqrt', r'\Delta':'diff', r'\sum_':'sum', r'\Phi':'NORM.CDF', r'\Phi^{-1}':'NORM.PDF' } regtrans = { r'\\Delta ([A-Za-z_][\w{},\^]*)':r'diff(\1)', # \Delta xy => diff(xy) r'_{t-([1-9])}' : r'(-\1)', # _{t-x} => (-x) # r'\^([\w])' : r'_\1', # ^x => _x # r'\^\{([\w]+)\}(\w)' : r'_\1_\2', # ^{xx}y => _xx_y r'\^\{([\w]+)\}' : r'_{\1}', # ^{xx} => _xx r'\^\{([\w]+),([\w]+)\}' : r'_{\1}_{\2}', # ^{xx,yy} => _xx_yy r'\s*\\times\s*':'*' , } # breakpoint() for before,to in ftrans.items(): temp = defunk(before,to,temp) for before,to in trans.items(): temp = temp.replace(before,to) for before,to in regtrans.items(): temp = re.sub(before,to,temp) temp = debrace(temp) temp = defrack(temp) if bankadd: temp = rebank(temp) # breakpoint() temp = re.sub(r'sum\(n,s,t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp) temp = re.sub(r'sum\(n\)sum\(s\)sum\(t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp) temp = re.sub(r'sum\(n\)'+(pt.namepat),r'sum(n_{bank},\1)',temp) temp = re.sub(fr'sum\({pt.namepat}\)\(',r'sum(\1,',temp) ltemp = [b.strip().split('=') for b in temp.splitlines()] # remove blanks in the ends and split each line at = # breakpoint() ltemp = [lhs + ' = '+ rhs.replace(' ','') for lhs,rhs in ltemp] # change ' ' to * on the rhs. ltemp = ['Frml '+fname + ' ' + eq + ' $ 'for eq,(fname,__) in zip(ltemp,org)] ltemp = [doable(l) for l in ltemp] out = '\n'.join(ltemp+findlists(input)) return out
[docs] def latextotxtnew(input): ''' Translates a latex input to a BL output ''' # breakpoint() ex12 = re.findall(r'\\label\{eq:(.*?)\}\n(.*?)\\end\{',input,re.DOTALL) # select the relevant equations org = [(name,eq.replace('\n','')) for name,ex in ex12 for eq in ex.split('\\\\') if 2 == len(eq.split('='))] # ex15 = [('frml '+name+' '+eq) for (name,ex) in ex12 for eq in ex.splitlines()] ex15 = [eq.strip() for (name,eq) in org] temp = '\n'.join(ex15) trans={r'\left':'', r'\right':'', r'\min':'min', r'\max':'max', r'\rho':'rho', r'&':'', r'\\':'', r'[':'(', r']':')', r'&':'', r'\nonumber' : '', r'\_' : '_', r'{n}' : '__{n}', r'_{t}' : '', r'{n,s}' : '__{n}__{s}', r'{n,s,t}' : '__{n}__{s}__{t}', 'logit^{-1}' : 'logit_inverse', r'\{' : '{', r'\}' : '}', r'\sigma' :'sigma', } ftrans = { r'\sqrt':'sqrt', r'\Delta':'diff', r'\sum_':'sum', r'\Phi':'NORM.CDF', r'\Phi^{-1}':'NORM.PDF' } regtrans = { r'\\Delta ([A-Za-z_][\w{},\^]*)':r'diff(\1)', # \Delta xy => diff(xy) r'_{t-([1-9])}' : r'(-\1)', # _{t-x} => (-x) # r'\^([\w])' : r'_\1', # ^x => _x # r'\^\{([\w]+)\}(\w)' : r'_\1_\2', # ^{xx}y => _xx_y r'\^\{([\w]+)\}' : r'_{\1}', # ^{xx} => _xx r'\^\{([\w]+),([\w]+)\}' : r'_{\1}_{\2}', # ^{xx,yy} => _xx_yy r'\s*\\times\s*':'*' , } # breakpoint() for before,to in ftrans.items(): temp = defunk(before,to,temp) for before,to in trans.items(): temp = temp.replace(before,to) for before,to in regtrans.items(): temp = re.sub(before,to,temp) temp = debrace(temp) temp = defrack(temp) if bankadd: temp = rebank(temp) # breakpoint() temp = re.sub(r'sum\(n,s,t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp) temp = re.sub(r'sum\(n\)sum\(s\)sum\(t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp) temp = re.sub(r'sum\(n\)'+(pt.namepat),r'sum(n_{bank},\1)',temp) temp = re.sub(fr'sum\({pt.namepat}\)\(',r'sum(\1,',temp) ltemp = [b.strip().split('=') for b in temp.splitlines()] # remove blanks in the ends and split each line at = # breakpoint() ltemp = [lhs + ' = '+ rhs.replace(' ','') for lhs,rhs in ltemp] # change ' ' to * on the rhs. ltemp = ['Frml '+fname + ' ' + eq + ' $ 'for eq,(fname,__) in zip(ltemp,org)] ltemp = [doable(l) for l in ltemp] out = '\n'.join(ltemp+findlists(input)) return out
[docs] def dynlatextotxt(input,show=False): ''' Translates a latex input to a BL output The latex input is the latex output of Dynare ''' with mc.ttimer('Findall',show): ex12 = re.findall(r'\\begin{dmath}\n(.*?)\n\\end{dmath}',input,re.DOTALL) # select the relevant equations with mc.ttimer('Split',show): org = [' = '.join([side[:] for side in e.split('=',1)]) for e in ex12] # ex15 = [('frml '+name+' '+eq) for (name,ex) in ex12 for eq in ex.splitlines()] with mc.ttimer('Strip',show): ex15 = [defrack(eq.strip()) for eq in org] with mc.ttimer('join',show): temp = '\n'.join(ex15) trans={r'\left':'', r'\right':'', r'\min':'min', r'\max':'max', r'&':'', r'\\':'', r'\_':'_', r'[':'(', r']':')', r'&':'', r'_{t}': '', r'\,': ' *', r'\leq': ' <= ', r'\neq': ' != ', r'\geq': ' >= ', } ftrans = { # before{expression} ==> after(expression) r'\sqrt':'sqrt', r'\Delta':'diff', r'^':'^', r'\sum_':'sum' } ftransp = { r'\log':'log', } regtrans = { r'\\Delta ([A-Za-z][\w{},\^]*)':r'diff(\1)', # \Delta xy => diff(xy) r'_{t-([1-9])}' : r'(-\1)', # _{t-x} => (-x) '{'+pt.namepat+'}' : r'\1', # '{'+pt.namepat+'(?:\\(([+-][0-9]+)\\))}' : r'\1(\2)', } # with mc.ttimer('Defrac',show): # temp = defrack(temp) for before,to in trans.items(): with mc.ttimer(f'replace {before}',show): temp = temp.replace(before,to) for before,to in ftrans.items(): with mc.ttimer(f'defunk {before}',show): temp = defunk(before,to,temp) for before,to in ftransp.items(): with mc.ttimer(f'defunk {before}',show): temp = defunk(before,to,temp,'(',')') for before,to in regtrans.items(): with mc.ttimer(f'Regtrans {before}'): temp = re.sub(before,to,temp) # temp = debrace(temp) if show: print('Translation from Latex to BLL finished') # temp = re.sub(r'sum\(n,s,t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp) # temp = re.sub(r'sum\(n\)sum\(s\)sum\(t\)'+(pt.namepat),r'sum(n_{bank},sum(s,sum(t,\1)))',temp) # temp = re.sub(r'sum\(n\)'+(pt.namepat),r'sum(n_{bank},\1)',temp) ltemp = ['Frml <> ' + eq + ' $ 'for eq in temp.splitlines() ] # ltemp = [doable(l) for l in ltemp] out = '\n'.join(ltemp) return out
if __name__ == '__main__' : if 0: test = r'''\ Loans can be in 3 stages, 1,2 3. New loans will be generated and loans will mature. \begin{equation} \label{eq:Norm} TR^{stage\_from,stage} = \frac{TR\_U^{stage\_from,stage}}{1+0*\sum_{stage\_from2}(TR\_U^{stage\_from2,stage})} \times(1-M^{stage}-WRO^{stage}) \end{equation} List $stage=\{s1, s2,s3\}$ List $stage\_from=\{s1, s2,s3\}$ List $stage\_from2=\{s1, s2,s3\}$ List $stage\_to=\{s1, s2,s3\}$ ''' res = latextotxt(test) test1 =''' list $agegroup=\{16, 17, 18, 19, 20, 99, 100 \} \\ end : \{0 , 0 , 0 , 0 , 0 , 0 , 1\}$ \begin{equation} \label{eq:mod_another} \begin{split} \text{[NONEND]}\; & \left(QC^{agegroup}_t\right)^{-ECX_t} & &= \left(\dfrac{PCTOT_t}{PCTOT_{t+1}}\right) * QC^{agegroup+1}_{t+1} \\ \text{}\;& & &= \dfrac{VB^{agegroup-1}_{t-1} * \dfrac{NPOP^{agegroup-1}_{t-1}}{NPOP^{agegroup}_t} * (1+R) + VY^{agegroup}_t - VB^{agegroup}_{t+1} )}{ PCTOT_{t}} \end{split} \end{equation} ''' print(findlists(test1))