#include #include /* exit() */ #include /* for strncpy() */ #include /* needed by regex */ #include /* regular expression library */ /* Sample of using POSIX regular expression library. This attempts to match a regular expression to the first command line argument. */ char * get_match(regmatch_t m,const char *input) { char *match=NULL; int len; /* if no match specified, return NULL */ if (m.rm_so==-1) { return(NULL); } /* len is the length of the substring that was matched */ len = m.rm_eo-m.rm_so; /* allocate enough memory for a copy of the resulting substring */ match = (char *) malloc(len + 1); if (match==NULL) { fprintf(stderr,"Error allocating memory in get_match\n"); exit(1); } /* copy the substring */ strncpy(match,input+m.rm_so,len); /* null terminate the copy of the substring! */ match[len]=0; return(match); } /* Example of using regular expression library from C. For details on the regular expression library, try "man regex" */ int main(int argc, char **argv) { char *s; int i; /* here the sample regular expression is defined. For more information about POSIX regular expressions you can use "man 7 regex" for a complete description, or google for POSIX regex and get more than you want... This regular expression will match any string that looks roughly like this: "name = value", where name can be anything containing alphabetic characters and value can be alphanumeric. There can be any number of spaces between the name and the '=', and between the '=' and the value. There must be nothing else in the string (or it won't match!). Here are some strings that will match: "PROMPT = Hello" "Count = 22" "fred=1234joe" strings that won't match: " noleadingspace = allowed" "123=456" Here is the breakdown if this regular expression: ^ matches the beginning of the string. This simply forces the next part of the regular expression to match the first character (otherwise there could be anything before the first alphabetic char). [[:alpha:]]+ this matches any sequence of alphabetic characters. the [[:alpha:]] actually says match one alphabetic character, and the + means match at least one. The [[:space:]]* means match any sequence of 0 or more spaces (whitespace). the * actually means "0 or more". The = matches '=' (only one). [[:alnum:]]+ this matches any sequence of alphanumeric characters. (+ means one or more). The $ matches the end of the string. This means the string must end in something that matches the [[:alnum:]]+ right before the $. The parentheses are special, they don't actually match any characters in the string, instead they tell the regular expression to "remember" the part of the string that matched the part of the regular expression that is in parentheses. This is actually the main reason we are using the regular expression, we want to know what part of the string matches each parenthesized section of the regular expression. The first parenthesized part will be the "name" and the second will be the "value" in "name = value". We also use the regular expression to find out if the entire string is of the right form (if not then there will be no matches - we can say the string is not legal). */ const char *regular_expression = "^([[:alpha:]]+)[[:space:]]*=[[:space:]]*([[:alnum:]]+)$"; regex_t pattbuf; /* where the 'compiled' regular expression is stored */ regmatch_t matches[10]; /* where we will get the offsets of all matches */ /* make sure we got a command line argument! */ if (argc<2) { printf("You must supply and argument (the string to be matched).\n"); printf("For example: %s \"path = hello123\"\n",argv[0]); exit(1); } /* compile the regular expression (POSIX extended regular expression syntax */ if (regcomp(&pattbuf, regular_expression,REG_EXTENDED)) { /* some problem with the regular expression - this is fatal... */ fprintf(stderr,"Error - pattern won't compile\n"); exit(1); } if (REG_NOMATCH == regexec(&pattbuf,argv[1],10,matches,0)) { printf("No match found - illegal input\n"); } else { /* some matches found - print them out */ /* first match is for the whole string, we don't care about that one! remaining matches are for the parts of the regular expression that are in parentheses */ i=1; while (s = get_match(matches[i],argv[1])) { printf("Match %d: <%s>\n",i,s); free(s); i++; } } /* free up the compiled regular expression */ regfree(&pattbuf); return(0); }