//the beginnings of a regular expression library //currently very limited @global re_defined; @record regular_expression{ dfa }; if (re_defined == nil) { @local tokenize, node_str; @local nxt_nodes, dfa_match, dfa_match_helper; @local max_depth, debug; @global re_find, re_match, re_create, re_findend, re_findrange; re_defined = 1; debug = 0; @define tokenize(re) { @local tokens, len, i; tokens = []; len = length(re); for(i=0; i= last) for(x=last; x<=re[idx+1]; ++x) sub[x] = 1; else for(x=re[idx+1]; x<=last; ++x) sub[x] = 1; last = nil; ++idx; } else { sub[re[idx]] = 1; last = re[idx]; } } append(tokens,[type,sub,nil]); i = idx; break; } case '|': append(tokens,['or,nil]); break; case '*': append(tokens,['star,nil]); break; case '?': append(tokens,['maybe,nil]); break; case '.': append(tokens,['any,nil]); break; default: append(tokens,['chr,re[i],nil]); } } return tokens; } @define re_create(re) { @local tokens,fixup; tokens = tokenize(re); //printf("tokens = %a\n",tokens); fixup = @lambda(lst,par_nxt) { @local i,len,t; len = length(lst); if (debug) printf("FIXUP %a\n",lst); for(i=0; i"; if (length(depth) > 0) depth = depth[0]; else depth = 0; if (depth >= max_depth) return "D"; printf("%s\n",n[0]); switch(n[0]) { case 'star: return sprintfa("star %s %s", node_str(n[1],depth+1),node_str(n[2],depth+1)); case 'maybe: return sprintfa("maybe %s %s", node_str(n[1],depth+1),node_str(n[2],depth+1)); case 'or: return "or"; case 'sub: return "sub"; case 'match: return "match"; case 'any: return sprintfa("any %s",node_str(n[1],depth+1)); case 'chr: return sprintfa("%a",[n[0],n[1],node_str(n[2],depth+1)]); case 'class: case 'notclass: { @local str; str = "{ "; foreach(@lambda(k,v) { if (str != "{ ") str += ","; str += sprintfa("%c",k); },n[1]); str += " }"; return sprintfa("[%a, %s, %s]",n[0],str,node_str(n[2],depth+1)); } default: return sprintfa("%a",[n[0],n[1]]); } } @define nxt_nodes(chr,node) { if (debug) { printf("nxt_nodes with chr=%c and type=%s\n", chr == nil ? 0 : chr, node[0]); printf("%s next: %s\n",node_str(node), node[0]=='chr ? node_str(node[2]) : ""); } switch(node[0]) { case 'any: if (chr != nil) return [node[1]]; else return []; case 'chr: if (chr == node[1]) return [node[2]]; else return []; case 'class: if (node[1][chr] != nil) return [node[2]]; else return []; case 'notclass: if (node[1][chr] != nil) return []; else return [node[2]]; case 'maybe: case 'star: { @local ret; ret = nxt_nodes(chr,node[1]); ret = concat(ret,nxt_nodes(chr,node[2])); return ret; } case 'or: { @local ret; ret = nxt_nodes(chr,node[1]); ret = concat(ret,nxt_nodes(chr,node[2])); return ret; } case 'sub: return nxt_nodes(chr,node[1][0]); case 'match: if (chr == nil) return [node]; else return []; default: error("bad type: %s",node[0]); } } @define dfa_match(str,dfa) { @local tab; tab = mktabq(); tab[dfa.dfa] = 1; return dfa_match_helper(str,tab,0,1) >= 0; } //if entire_string is 1, only return true if the //entire string matches. @define dfa_match_helper(str,nodes,idx,entire_string) { @local len,i,ret,contains_match,idx2; if (idx == length(str)) { ret = mktabq(); foreach(@lambda(node,v) { ret[node] = 1; foreach(@lambda(n) { ret[n] = 1; },nxt_nodes(nil,node)); }, nodes); //check for matches. ret = tabkeys(ret); len = length(ret); for(i=0; i idx2 ? idx : idx2; } //printf("results: %a\n",map(@lambda(n){ return n[0];},ret)); if (debug) printf("ret_nodes for %c (char %d): \n %a\n",str[idx], idx,map(node_str,ret)); //printf("queue size: %d\n",length(nodes)); return dfa_match_helper(str,ret,idx+1,entire_string); } @define re_match(re,str) { @local dfa; if (debug) printf("re = %s str = %s\n",re,str); if (isregular_expression(re)) dfa = re; else dfa = re_create(re); if (debug) printf("dfa = %a\n",node_str(dfa.dfa)); return dfa_match(str,dfa); } //Returns the first index in the given string where the //given regular expression is found. @define re_find(re,str) { @local dfa,i,len,tab; if (isregular_expression(re)) dfa = re; else dfa = re_create(re); len = length(str); tab = mktabq(); tab[dfa.dfa] = 1; for(i=0; i= 0) return i; return -1; } //re_findend(re,str [, startidx]) //attempts to match re against str starting at startidx (default 0) //returns -1 if no match, or the index of the end of the //largest possible match if one exists @define re_findend(re,str,args...) { @local dfa,tab,start; if (isregular_expression(re)) dfa = re; else dfa = re_create(re); start = 0; if (length(args)>0) start=head(args); tab = mktabq(); tab[dfa.dfa] = 1; return dfa_match_helper(str,tab,start,0); } //Returns [idx_beg,idx_end], where idx_beg is the first index that //matches the re, and idx_end is the last index of the largest match //starting at idx_beg. //Returns nil if nothing in the string matches the re @define re_findrange(re,str) { @local dfa,i,len,tab,end; if (isregular_expression(re)) dfa = re; else dfa = re_create(re); len = length(str); tab = mktabq(); tab[dfa.dfa] = 1; for(i=0; i= 0) return [i,end]; } return nil; } }