00001 #include "regExpMatch.h"
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043 int matche_after_star (register char *pattern, register char *text);
00044 int fast_match_after_star (register char *pattern, register char *text);
00045
00046
00047
00048
00049
00050 BOOLEAN is_pattern (char *p)
00051
00052
00053 {
00054 while (*p)
00055
00056
00057 {
00058 switch (*p++)
00059
00060
00061 {
00062 case '?':
00063 case '*':
00064 case '[':
00065 case '\\':
00066 return TRUE;
00067 }
00068 }
00069 return FALSE;
00070 }
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087 BOOLEAN is_valid_pattern (char *p, int *error_type)
00088
00089
00090 {
00091
00092 *error_type = PATTERN_VALID;
00093
00094 while (*p)
00095
00096
00097 {
00098
00099 switch (*p)
00100
00101
00102 {
00103
00104 case '\\':
00105 if (!*++p)
00106
00107
00108 {
00109 *error_type = PATTERN_ESC;
00110 return FALSE;
00111 }
00112 p++;
00113 break;
00114
00115
00116 case '[':
00117 p++;
00118
00119 if (*p == ']')
00120
00121
00122 {
00123 *error_type = PATTERN_EMPTY;
00124 return FALSE;
00125 }
00126
00127 if (!*p)
00128
00129
00130 {
00131 *error_type = PATTERN_CLOSE;
00132 return FALSE;
00133 }
00134
00135 while (*p != ']')
00136
00137
00138 {
00139
00140 if (*p == '\\')
00141
00142
00143 {
00144 p++;
00145
00146 if (!*p++)
00147
00148
00149 {
00150 *error_type = PATTERN_ESC;
00151 return FALSE;
00152 }
00153 }
00154 else p++;
00155
00156 if (!*p)
00157
00158
00159 {
00160 *error_type = PATTERN_CLOSE;
00161 return FALSE;
00162 }
00163
00164 if (*p == '-')
00165
00166
00167 {
00168
00169 if (!*++p || *p == ']')
00170
00171
00172 {
00173 *error_type = PATTERN_RANGE;
00174 return FALSE;
00175 }
00176 else
00177
00178
00179 {
00180
00181 if (*p == '\\')
00182 p++;
00183
00184
00185 if (!*p++)
00186
00187
00188 {
00189 *error_type = PATTERN_ESC;
00190 return FALSE;
00191 }
00192 }
00193 }
00194 }
00195 break;
00196
00197 case '*':
00198 case '?':
00199 default:
00200 p++;
00201 break;
00202 }
00203 }
00204 return TRUE;
00205 }
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239 int matche (register char *p, register char *t)
00240
00241
00242 {
00243 register char range_start, range_end;
00244 BOOLEAN invert;
00245 BOOLEAN member_match;
00246 BOOLEAN loop;
00247 for ( ; *p; p++, t++)
00248
00249
00250 {
00251
00252
00253 if (!*t)
00254
00255
00256 {
00257 return ( *p == '*' && *++p == '\0' ) ?
00258 MATCH_VALID : MATCH_ABORT;
00259 }
00260
00261 switch (*p)
00262
00263
00264 {
00265 case '?':
00266 break;
00267 case '*':
00268 return matche_after_star (p, t);
00269
00270 case '[':
00271
00272
00273 {
00274
00275 p++;
00276
00277 invert = FALSE;
00278 if (*p == '!' || *p == '^')
00279
00280
00281 {
00282 invert = TRUE;
00283 p++;
00284 }
00285
00286
00287 if (*p == ']')
00288
00289
00290 {
00291 return MATCH_PATTERN;
00292 }
00293 member_match = FALSE;
00294 loop = TRUE;
00295 while (loop)
00296
00297
00298 {
00299
00300 if (*p == ']')
00301
00302
00303 {
00304 loop = FALSE;
00305 continue;
00306 }
00307
00308 if (*p == '\\')
00309
00310
00311 {
00312 range_start = range_end = *++p;
00313 }
00314 else range_start = range_end = *p;
00315
00316 if (!*p)
00317 return MATCH_PATTERN;
00318
00319 if (*++p == '-')
00320
00321
00322 {
00323
00324 range_end = *++p;
00325
00326
00327
00328 if (range_end == '\0' || range_end == ']')
00329 return MATCH_PATTERN;
00330
00331 if (range_end == '\\')
00332
00333
00334 {
00335 range_end = *++p;
00336
00337
00338 if (!range_end)
00339 return MATCH_PATTERN;
00340 }
00341
00342 p++;
00343 }
00344
00345
00346
00347 if (range_start < range_end)
00348
00349
00350 {
00351 if (*t >= range_start && *t <= range_end)
00352
00353
00354 {
00355 member_match = TRUE;
00356 loop = FALSE;
00357 }
00358 }
00359 else
00360
00361
00362 {
00363 if (*t >= range_end && *t <= range_start)
00364
00365
00366 {
00367 member_match = TRUE;
00368 loop = FALSE;
00369 }
00370 }
00371 }
00372
00373
00374 if ((invert && member_match) || !(invert || member_match))
00375 return MATCH_RANGE;
00376
00377
00378 if (member_match)
00379
00380
00381 {
00382 while (*p != ']')
00383
00384
00385 {
00386
00387 if (!*p)
00388 return MATCH_PATTERN;
00389
00390 if (*p == '\\')
00391
00392
00393 {
00394 p++;
00395
00396
00397 if (!*p)
00398 return MATCH_PATTERN;
00399 }
00400
00401 p++;
00402 }
00403 }
00404 break;
00405 }
00406 case '\\':
00407
00408 p++;
00409
00410 if (!*p)
00411 return MATCH_PATTERN;
00412
00413 default:
00414 if (*p != *t)
00415 return MATCH_LITERAL;
00416 }
00417 }
00418
00419 if (*t)
00420 return MATCH_END;
00421 else return MATCH_VALID;
00422 }
00423
00424
00425
00426
00427
00428 int matche_after_star (register char *p, register char *t)
00429
00430
00431 {
00432 register int match = 0;
00433 register char nextp;
00434
00435 while ( *p == '?' || *p == '*' )
00436
00437
00438 {
00439
00440 if (*p == '?')
00441
00442
00443 {
00444
00445 if (!*t++)
00446 return MATCH_ABORT;
00447 }
00448
00449 p++;
00450 }
00451
00452 if (!*p)
00453 return MATCH_VALID;
00454
00455 nextp = *p;
00456 if (nextp == '\\')
00457
00458
00459 {
00460 nextp = p[1];
00461
00462 if (!nextp)
00463 return MATCH_PATTERN;
00464 }
00465
00466 do
00467
00468
00469 {
00470
00471
00472
00473
00474 if (nextp == *t || nextp == '[')
00475 match = matche(p, t);
00476
00477 if (!*t++)
00478 match = MATCH_ABORT;
00479 } while ( match != MATCH_VALID &&
00480 match != MATCH_ABORT &&
00481 match != MATCH_PATTERN);
00482
00483 return match;
00484 }
00485
00486
00487
00488
00489
00490 BOOLEAN match( char *p, char *t )
00491
00492
00493 {
00494 int error_type;
00495 error_type = matche(p,t);
00496 return (error_type == MATCH_VALID ) ? TRUE : FALSE;
00497 }
00498 #ifdef TEST
00499
00500
00501
00502
00503
00504 #include <stdio.h>
00505 int main(int argc, char *argv[])
00506 {
00507 int error;
00508 int is_valid_error;
00509 if (argc != 3)
00510 printf("Usage: %s Pattern Text\n",argv[0]);
00511 else
00512
00513
00514 {
00515 printf("Pattern: %s\n", argv[1]);
00516 printf("Text: %s\n", argv[2]);
00517 if (!is_pattern(argv[1]))
00518 printf("First Argument Is Not A Pattern\n");
00519 else
00520
00521
00522 {
00523 error = matche(argv[1],argv[2]);
00524 is_valid_pattern(argv[1],&is_valid_error);
00525 switch (error)
00526
00527
00528 {
00529 case MATCH_VALID:
00530 printf("Match Successful");
00531 if (is_valid_error != PATTERN_VALID)
00532 printf(" -- is_valid_pattern() "
00533 "is complaining\n");
00534 else printf("\n");
00535 break;
00536 case MATCH_LITERAL:
00537 printf("Match Failed on Literal\n");
00538 break;
00539 case MATCH_RANGE:
00540 printf("Match Failed on [..]\n");
00541 break;
00542 case MATCH_ABORT:
00543 printf("Match Failed on Early "
00544 "Text Termination\n");
00545 break;
00546 case MATCH_END:
00547 printf("Match Failed on Early "
00548 "Pattern Termination\n");
00549 break;
00550 case MATCH_PATTERN:
00551 switch (is_valid_error)
00552
00553
00554 {
00555 case PATTERN_VALID:
00556 printf("Internal Disagreement "
00557 "On Pattern\n");
00558 break;
00559 case PATTERN_ESC:
00560 printf("Literal Escape at "
00561 "End of Pattern\n");
00562 break;
00563 case PATTERN_RANGE:
00564 printf("No End of Range in "
00565 "[..] Construct\n");
00566 break;
00567 case PATTERN_CLOSE:
00568 printf("[..] Construct is Open\n");
00569 break;
00570 case PATTERN_EMPTY:
00571 printf("[..] Construct is Empty\n");
00572 break;
00573 default:
00574 printf("Internal Error in "
00575 "is_valid_pattern()\n");
00576 }
00577 break;
00578 default:
00579 printf("Internal Error in matche()\n");
00580 break;
00581 }
00582 }
00583 }
00584 return(0);
00585 }
00586
00587 #endif