Source code for modelpattern

# -*- coding: utf-8 -*-
"""
Created on Mon Sep 02 19:32:22 2013

This module defines a number of pattern used in PYFS. 
If a new function is intruduced in the model definition language it should added to the 
function names in funkname 

All functions in the module modeluserfunk will be added to the language and incorporated in the Business 
Logic language 

@author: Ib
"""
import re 
import inspect 
from collections import namedtuple
from collections import defaultdict


import modelBLfunk

# names and lags 
namepat_ng  = r'(?:[A-Za-z_{][A-Za-z_{}0-9]*)'     # a name non grouped
namepat     = r'(' + namepat_ng + ')' # a name  grouped
lagpat      = r'(?:\(([+-][0-9]+)\))?'

# comments 
commentchar = '£' 
commentpat  = r'('+commentchar+r'.*)'

try: # import the names of functions defined in modeluserfunk 
    import modeluserfunk
    userfunk = [o.upper() for o,t in inspect.getmembers(modeluserfunk) if not o.startswith('__')]
except: 
    userfunk = []

BLfunk = [o.upper() for o,t in inspect.getmembers(modelBLfunk) if not o.startswith('__')  ]
classfunk = modelBLfunk.classfunk   
# Operators 
funkname    = 'DLOG SUM_EXCEL DIFF MIN MAX FLOAT NORM.CDF NORM.PPF ABS MOVAVG PCT_GROWTH'.split() + BLfunk+ userfunk + classfunk
funkname2   = [i+r'(?=\()' for i in funkname]               # a function is followed by a (
opname      = r'\*\*  != >=  <=  ==  [=+-/*@|()$><,.\]\[]'.split() # list of ordinary operators 
oppat       = '('+'|'.join(['(?:' + i + ')' for i in funkname2+opname])+')'

# Numbers 
numpat      = r'((?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]\d+)?)'
numpat      = r'((?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?)'


# Formulars 
dollarpat   = r'([$]'
upat        = r'([^$]*\$)'          # resten frem til $
frmlpat     = r'(FRML [^$]*\$)'     # A FORMULAR for splitting a model in formulars
optionpat   = r'(?:[<][^>]*[>])?'   # 0 eller en optioner omsluttet af <>

#White space 
ws          = r'[\s]+'              # 0 or more white spaces 
ws2         = r'[\s]*'              # 1 or more white spaces 

splitpat    = namepat + ws + \
    '(' + namepat_ng + '?' + ws2 + optionpat + ')' + ws + upat  # Splits a formular
# for splitting a model in commands and values
statementpat =  commentpat + '|' + namepat + ws2 + upat

#udtrykpat    =  commentpat + '|' + numpat + '|' + oppat + '|' + namepat + lagpat
udtrykpat    =   numpat + '|' + oppat + '|' + namepat + lagpat

udtrykre_old     =  re.compile(udtrykpat)
nterm = namedtuple('nterm', ['number', 'op', 'var', 'lag'])


[docs] def udtrykre(funks=[]): global funkname newfunks = [f.__name__.upper() for f in funks] funkname = 'DLOG SUM_EXCEL DIFF MIN MAX FLOAT NORM.CDF NORM.PPF ABS MOVAVG PCT_GROWTH'.split() + BLfunk+ userfunk + classfunk + newfunks # print(funkname) funkname2 = [i+r'(?=\()' for i in funkname] # a function is followed by a ( opname = r'\*\* != >= <= == [=+-/*@|()$><,.\]\[]'.split() # list of ordinary operators oppat = '('+'|'.join(['(?:' + i + ')' for i in funkname2+opname])+')' # udtrykpat = commentpat + '|' + numpat + '|' + oppat + '|' + namepat + lagpat udtrykpat = numpat + '|' + oppat + '|' + namepat + lagpat return re.compile(udtrykpat)
#udtrykpatnew([f1,f2])
[docs] def find_frml(equations): ''' Takes at modeltext and returns a list with where each element is a string starting with FRML and ending with $ It do not check if it is a valid FRML statement ''' return re.findall(frmlpat, equations, flags=re.IGNORECASE)
[docs] def split_frml(frml): ''' Splits a string with a frml into a tuple with 4 parts: 0. The unsplit frml statement 1. FRML 2. <Frml name> 3. <the frml expression> ''' m = re.search(splitpat, frml) if m: return m.group(0), m.group(1), m.group(2), m.group(3) else: return frml
[docs] def find_statements(a_model): ''' splits a modeltest into comments and statements * a *comment* starts with ! and ends at lineend * a *statement* starts with a name and ends with a $ all characters between are considerd part of the statement The statement is not chekked for meaningfulness returns a list of tuppels (comment,command,<rest of statement>) ''' return(re.findall(statementpat, a_model))
[docs] def model_parse_old(equations,funks=[]): '''Takes a model returns a list of tupels. Each tupel contains: :the compleete formular: :FRML: :formular name: :the expression: :list of terms from the expression: The purpose of this function is to make model analysis faster. this is 20 times faster than looping over espressions in a model ''' fatoms = namedtuple('fatoms', 'whole, frml ,frmlname, expression') # nterm = namedtuple('nterm', [ 'number', 'op', 'var', 'lag']) expressionre= udtrykre(funks) ibh = [(fatoms(*c),[nterm(*t) for t in expressionre.findall(c[3])]) for c in (split_frml(f) for f in find_frml(equations.upper()) )] return ibh
[docs] def model_parse(equations,funks=[]): '''Takes a model returns a list of tupels. Each tupel contains: :the compleete formular: :FRML: :formular name: :the expression: :list of terms from the expression: The purpose of this function is to make model analysis faster. this is 20 times faster than looping over espressions in a model This new model_parse handels lags of -0 or +0 which ocours in some models from world bank. ''' fatoms = namedtuple('fatoms', 'whole, frml ,frmlname, expression') # nterm = namedtuple('nterm', [ 'number', 'op', 'var', 'lag']) expressionre= udtrykre(funks) ibh = [(fatoms(*c), [ nterm(t[0],t[1],t[2], '' if t[3] == '-0' or t[3]=='+0' else t[3]) for t in expressionre.findall(c[3])]) for c in (split_frml(f) for f in find_frml(equations.upper()) )] return ibh
[docs] def list_extract(equations,silent=True): ''' creates lists used in a model returns a dictonary with the lists if a list is defined several times, the first definition is used''' liste_dict = defaultdict( list) # opretter modellens lister - skal laves til en klasse for comment, command, value in find_statements(equations): if command.upper() == 'LIST': stripvalue = value.replace('\n', '').upper() list_name, list_value = stripvalue[0:-1].split('=') list_name = list_name.strip() if list_name in liste_dict: if not silent: print('Warning ', list_name, 'Defined 2 times') print('Use ', list_name, liste_dict[list_name]) else: this_dict = defaultdict(list) for i in list_value.split('/'): name, items = i.split(':') if '*' in i: start,end = items.split('*') startitems = re.split(r'(^[A-Z0-9_]*[A-Z_])([0-9]+$)',start.strip()) enditems = re.split(r'(^[A-Z0-9_]*[A-Z_])([0-9]+$)',end.strip()) if len(startitems) != len(startitems): breakpoint() raise Exception(f'Range of lists wrong {startitems=} {enditems=}') if len(startitems) == 4: # we have a charactrer label label = startitems[1] startint = int( startitems[2]) endint = int(enditems[2]) elif len(startitems) == 1 : label ='' startint = int( startitems[0]) endint = int(enditems[0]) else: raise Exception(f'wrong range in list: {startitems=} {enditems=}') itemlist = [label+str(i) for i in range(startint,endint) ] else: itemlist = [t.strip() for t in re.split(r'[\s,]\s*',items) if t != ''] this_dict[name.strip()] = itemlist first_sublist_name = list(this_dict.keys())[0] first_sublist = this_dict[first_sublist_name] this_dict[first_sublist_name+'_END'] =(['0']* (len(first_sublist)-1))+['1'] this_dict[first_sublist_name+'_NOEND'] =(['1']*(len(first_sublist)-1))+['0'] this_dict[first_sublist_name+'_START'] =['1'] +(['0']*(len(first_sublist)-1)) this_dict[first_sublist_name+'_NOSTART'] =['0'] +(['1']*(len(first_sublist)-1)) this_dict[first_sublist_name+'_MIDDLE'] =['0'] + (['1']*(len(first_sublist)-2))+['0'] this_dict[first_sublist_name+'_BEFORE'] =['0'] + first_sublist[:-1] this_dict[first_sublist_name+'_AFTER'] =first_sublist[1:] + ['0'] list_len = len(first_sublist) for sublist,values in this_dict.items(): if len(values) != list_len: raise Exception(f'In {list_name} the length of sublist {sublist} is {len(values)} should be {list_len} ') liste_dict[list_name] = this_dict # print(f'\n{first_sublist_name=}\n{first_sublist=} ') # print(f'\n{liste_dict=}') return liste_dict
[docs] def check_syntax_model(equations,test=True): ''' cheks if equations have syntax errors by calling the python compile.parse ''' import ast error=True try: for frml in find_frml(equations): a, fr, n, udtryk = split_frml(frml) ast.parse(re.sub(r'\n','',re.sub(' ','',udtryk[:-1]))) except SyntaxError: print('Syntax error in:',frml) error=False assert test return error
[docs] def udtryk_parse(udtryk,funks=[]): '''returns a list of terms from an expression ie: lhs=rhs $ or just an expression like x+b ''' #nterm = namedtuple('nterm', ['comment', 'number', 'op', 'var', 'lag']) temp=re.sub(r'\s+', '', udtryk.upper()) # remove all blanks xxx = udtrykre(funks=funks).findall(temp) # the compiled re pattern is importet from pattern # her laver vi det til en named tuple ibh = [nterm(t[0],t[1],t[2], '' if t[3] == '-0' or t[3]=='+0' else t[3]) for t in xxx] # ibh = [nterm._make(t) for t in xxx] # Easier to remember by using named tupels . return ibh
[docs] def kw_frml_name(frml_name0, kw,default=None): ''' find keywords and associated value from string '<kw=xxx,res=kdkdk>' ''' out = None frml_name=frml_name0.replace(' ','') if '<' in frml_name: j = frml_name.find('<') # where is the < for s in frml_name[j + 1:-1].split(','): keyvalue = s.split('=') if keyvalue[0].upper() == kw.upper(): if len(keyvalue) == 2: out = keyvalue[1] else: out = 1 if type(out) == type(None) and type(default)!=type(None): out=default return out
[docs] def f1(): return 42
[docs] def f2(): return 103
if __name__ == '__main__' and 1 : #%% nterm = namedtuple('nterm', ['number', 'op', 'var', 'lag']) if 0: model_parse('frml <> a= b+c( + 0)+3.444 $') xx = model_parse('frml <> a+b+b+b=x(-1)+y(-33) $ frml <> a= b+c(-0)+3.444 $') for ((frml, fr, n, udtryk), nt) in xx: print(f'{udtryk=}') for t in nt: print(f'{t=} ') print(*udtryk_parse('frml <> a+b+b+b=x(+0)+y(-33) + b+c(-0)/ 1e4 +3.444 $'),sep=' \n') list_extract('list bankdic = bank : Danske , Nordea / danske : yes , no $') list_extract('list bankdic = bank : Danske , Nordea $') list_extract('list agedic = age : age0 * age101 $') list_extract('list yeardic = year : 2023 * 2031 / lag1 : 2022 * 2030 $')