00001
00002
00003
00004
00005
00006
00007
00008
00009 #include "mit-copyright.h"
00010
00011
00012
00013
00014
00015
00016
00017 #include "main.h"
00018 #include "new_string.h"
00019 #include "int_dictionary.h"
00020 #include "lexer.h"
00021 #include "parser.h"
00022
00023
00024
00025
00026
00027
00028 int yylineno;
00029 int yybufferpos;
00030
00031
00032
00033
00034
00035 static int_dictionary keyword_dict = NULL;
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 static FILE *input_file;
00048 static char *input_buffer;
00049
00050
00051
00052
00053
00054
00055 static int pushback = -1;
00056
00057 static char
00058 input()
00059 {
00060 int c;
00061
00062 if (pushback != -1) {
00063 c = pushback;
00064 pushback = -1;
00065 if (c == '\n')
00066 yylineno++;
00067 return (c);
00068 }
00069
00070 if (input_file) {
00071 c = getc(input_file);
00072 }
00073 else {
00074 c = input_buffer[yybufferpos++];
00075 }
00076 if (c == '\n')
00077 yylineno++;
00078 if (c == EOF)
00079 c = 0;
00080
00081 return (c);
00082 }
00083
00084 static void
00085 unput(c)
00086 int c;
00087 {
00088 pushback = c;
00089 if (c == '\n')
00090 yylineno--;
00091 }
00092
00093
00094
00095
00096
00097
00098
00099 struct keyword_info {
00100 string keyword;
00101 int keyword_number;
00102 };
00103
00104
00105
00106
00107
00108
00109 static struct keyword_info keywords[] = {
00110 {"and", '&'},
00111 {"appendport", APPENDPORT},
00112 {"buffer", BUFFER},
00113 {"break", BREAK},
00114 {"closeinput", CLOSEINPUT},
00115 {"closeoutput", CLOSEOUTPUT},
00116 {"closeport", CLOSEPORT},
00117 {"case", CASE},
00118 {"clearbuf", CLEARBUF},
00119 {"default", DEFAULT},
00120 {"do", DO},
00121 {"downcase", DOWNCASE},
00122 {"else", ELSE},
00123 {"elseif", ELSEIF},
00124 {"endcase", ENDCASE},
00125 {"endif", ENDIF},
00126 {"endwhile", ENDWHILE},
00127 {"exec", EXEC},
00128 {"execport", EXECPORT},
00129 {"exit", EXIT},
00130 {"fields", FIELDS},
00131 {"get", GET},
00132 {"getenv", GETENV},
00133 {"if", IF},
00134 {"inputport", INPUTPORT},
00135 {"lany", LANY},
00136 {"lbreak", LBREAK},
00137 {"lspan", LSPAN},
00138 {"match", MATCH},
00139 {"noop", NOOP},
00140 {"not", '!'},
00141 {"or", '|'},
00142 {"outputport", OUTPUTPORT},
00143 {"print", PRINT},
00144 {"protect", PROTECT},
00145 {"put", PUT},
00146 {"rany", RANY},
00147 {"rbreak", RBREAK},
00148 {"rspan", RSPAN},
00149 {"set", SET},
00150 {"show", SHOW},
00151 {"substitute", SUBSTITUTE},
00152 {"then", THEN},
00153 {"upcase", UPCASE},
00154 {"while", WHILE},
00155 {"verbatim", VERBATIM},
00156 {"jvar", JVAR},
00157 {"paragraph", PARAGRAPH}};
00158
00159
00160
00161
00162
00163
00164 void
00165 lex_open(file)
00166 FILE *file;
00167 {
00168
00169
00170
00171 input_file = file;
00172 input_buffer = NULL;
00173 yylineno = 1;
00174 pushback = -1;
00175
00176
00177
00178
00179 if (!keyword_dict) {
00180 int i;
00181
00182 keyword_dict = int_dictionary_Create(101);
00183
00184 for (i = 0; i < sizeof(keywords) / sizeof(struct keyword_info); i++)
00185 int_dictionary_Define(keyword_dict, keywords[i].keyword,
00186 0)->value = keywords[i].keyword_number;
00187 }
00188 }
00189
00190 void
00191 lex_open_buffer(buffer)
00192 char *buffer;
00193 {
00194
00195
00196
00197 input_buffer = buffer;
00198 input_file = NULL;
00199 yylineno = 1;
00200 yybufferpos = 0;
00201 pushback = -1;
00202
00203
00204
00205
00206 if (!keyword_dict) {
00207 int i;
00208
00209 keyword_dict = int_dictionary_Create(101);
00210
00211 for (i = 0; i < sizeof(keywords) / sizeof(struct keyword_info); i++)
00212 int_dictionary_Define(keyword_dict, keywords[i].keyword,
00213 0)->value = keywords[i].keyword_number;
00214 }
00215 }
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239 #define is_octal_digit(c) (((c)>='0') && ((c)<='7'))
00240
00241 static char
00242 eat_escape_code()
00243 {
00244 int c, coded_char;
00245
00246 c = input();
00247
00248 switch (c) {
00249 case 0:
00250 unput(c);
00251 return (c);
00252 case '\n':
00253 return (0);
00254 case 'n':
00255 return ('\n');
00256 case 't':
00257 return ('\t');
00258 case 'b':
00259 return ('\b');
00260 case '0':
00261 case '1':
00262 case '2':
00263 case '3':
00264 case '4':
00265 case '5':
00266 case '6':
00267 case '7':
00268 coded_char = c - '0';
00269 c = input();
00270 if (!is_octal_digit(c)) {
00271 unput(c);
00272 return (coded_char);
00273 }
00274 coded_char = coded_char * 8 + c - '0';
00275 c = input();
00276 if (!is_octal_digit(c)) {
00277 unput(c);
00278 return (coded_char);
00279 }
00280 return (coded_char * 8 + c - '0');
00281 default:
00282 return (c);
00283 }
00284 }
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298 static char *
00299 eat_string(starting_line)
00300 int starting_line;
00301 {
00302 int c;
00303 char buffer[500];
00304 char *ptr = buffer;
00305
00306 for (;;) {
00307
00308
00309
00310 c = input();
00311 if (!c) {
00312 unput(c);
00313 report_parse_error("unterminated string found beginning",
00314 starting_line);
00315 return (0);
00316 }
00317
00318
00319
00320
00321 if (c == '\\') {
00322 c = eat_escape_code();
00323 if (!c)
00324 continue;
00325 }
00326 else if (c == '"') {
00327 *ptr = 0;
00328 return (string_Copy(buffer));
00329 }
00330 else if (c == '\n') {
00331 unput(c);
00332
00333 report_parse_error("carriage return found in string", yylineno);
00334 return (0);
00335 }
00336
00337
00338
00339
00340 *ptr = c;
00341 ptr++;
00342
00343
00344
00345
00346
00347
00348 if (ptr > buffer + sizeof(buffer) - 20) {
00349 string rest_of_string, result;
00350
00351 rest_of_string = eat_string(starting_line);
00352 if (!rest_of_string)
00353 return (0);
00354
00355 *ptr = 0;
00356 result = string_Concat(buffer, rest_of_string);
00357 free(rest_of_string);
00358 return (result);
00359 }
00360 }
00361 }
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376 static char *
00377 eat_show_line(test_for_endshow)
00378 int test_for_endshow;
00379 {
00380 int c;
00381 int saw_escape_code = 0;
00382 int starting_line = yylineno;
00383 char buffer[200];
00384
00385 char *ptr = buffer;
00386
00387 while (yylineno == starting_line) {
00388 c = input();
00389 if (!c) {
00390 unput(c);
00391 *ptr = '\0';
00392 return (string_Copy(buffer));
00393 }
00394 else if (c == '\\') {
00395 saw_escape_code = 1;
00396 c = eat_escape_code();
00397 if (!c)
00398 continue;
00399 }
00400
00401 *ptr = c;
00402 ptr++;
00403
00404 if ((ptr == buffer + strlen("endshow")) && test_for_endshow)
00405 if (!strncmp(buffer, "endshow", strlen("endshow"))
00406 && !saw_escape_code) {
00407 c = input();
00408 unput(c);
00409 if (!is_identifier_char(c))
00410 return (0);
00411 }
00412
00413 if (ptr > buffer + sizeof(buffer) - 2) {
00414 string the_line;
00415 string rest_of_line = eat_show_line(0);
00416
00417 *ptr = '\0';
00418 the_line = string_Concat(buffer, rest_of_line);
00419 free(rest_of_line);
00420 return (the_line);
00421 }
00422 }
00423
00424 *ptr = '\0';
00425 return (string_Copy(buffer));
00426 }
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441
00442
00443
00444 static char *
00445 eat_til_endshow(start_line_no)
00446 int start_line_no;
00447 {
00448 register int c;
00449 string text_so_far = string_Copy("");
00450 string next_line;
00451
00452 for (;;) {
00453
00454
00455
00456 while ((c = input()), c == ' ' || c == '\t');
00457 unput(c);
00458
00459
00460
00461
00462 if (!c) {
00463 report_parse_error("unterminated show beginning", start_line_no);
00464 free(text_so_far);
00465 return (0);
00466 }
00467
00468
00469
00470
00471
00472
00473
00474 next_line = eat_show_line(1);
00475
00476 if (!next_line)
00477 return (text_so_far);
00478
00479 text_so_far = string_Concat2(text_so_far, next_line);
00480 free(next_line);
00481 }
00482 }
00483
00484
00485
00486
00487
00488
00489
00490 static int
00491 handle_show()
00492 {
00493 int c;
00494 int start_line_no = yylineno;
00495
00496
00497
00498
00499
00500
00501 while (c = input(), c == ' ' || c == '\t');
00502 if (c != '\n')
00503 unput(c);
00504
00505 if ((yylval.text = eat_til_endshow(start_line_no)))
00506 return (SHOW);
00507 else
00508 return (ERROR);
00509 }
00510
00511
00512
00513
00514
00515
00516
00517
00518
00519
00520
00521 int
00522 yylex()
00523 {
00524 register int c, last_char;
00525 register char *ptr;
00526 int start_line_no;
00527 int_dictionary_binding *binding;
00528 char varname[MAX_IDENTIFIER_LENGTH + 1];
00529
00530 for (;;) {
00531 switch (c = input()) {
00532
00533
00534
00535
00536 case ' ':
00537 case '\t':
00538 case '\n':
00539 continue;
00540
00541
00542
00543
00544
00545 case '#':
00546 while ((c = input()) && (c != '\n'));
00547 if (!c)
00548 unput(c);
00549 continue;
00550
00551
00552
00553
00554
00555 case '/':
00556 start_line_no = yylineno;
00557
00558
00559 if ((c = input()) != '*')
00560 return (ERROR);
00561
00562
00563 for (last_char = 0;; last_char = c) {
00564 c = input();
00565 if (c == '/' && (last_char == '*'))
00566 break;
00567 if (!c) {
00568 unput(c);
00569 report_parse_error("unterminated c style comment found beginning", start_line_no);
00570 return (ERROR);
00571 }
00572 }
00573 continue;
00574
00575
00576
00577
00578
00579 case 0:
00580 case '+':
00581 case '|':
00582 case '&':
00583 case '(':
00584 case ')':
00585 case '.':
00586 case ',':
00587 return (c);
00588
00589
00590
00591
00592 case '=':
00593 switch (c = input()) {
00594 case '~':
00595 return (REGEQ);
00596 case '=':
00597 return (EQ);
00598 default:
00599 unput(c);
00600 return ('=');
00601 }
00602
00603
00604
00605
00606 case '!':
00607 switch (c = input()) {
00608 case '~':
00609 return (REGNEQ);
00610 case '=':
00611 return (NEQ);
00612 default:
00613 unput(c);
00614 return ('!');
00615 }
00616
00617
00618
00619
00620
00621
00622
00623 case 'a':
00624 case 'b':
00625 case 'c':
00626 case 'd':
00627 case 'e':
00628 case 'f':
00629 case 'g':
00630 case 'h':
00631 case 'i':
00632 case 'j':
00633 case 'k':
00634 case 'l':
00635 case 'm':
00636 case 'n':
00637 case 'o':
00638 case 'p':
00639 case 'q':
00640 case 'r':
00641 case 's':
00642 case 't':
00643 case 'u':
00644 case 'v':
00645 case 'w':
00646 case 'x':
00647 case 'y':
00648 case 'z':
00649 case 'A':
00650 case 'B':
00651 case 'C':
00652 case 'D':
00653 case 'E':
00654 case 'F':
00655 case 'G':
00656 case 'H':
00657 case 'I':
00658 case 'J':
00659 case 'K':
00660 case 'L':
00661 case 'M':
00662 case 'N':
00663 case 'O':
00664 case 'P':
00665 case 'Q':
00666 case 'R':
00667 case 'S':
00668 case 'T':
00669 case 'U':
00670 case 'V':
00671 case 'W':
00672 case 'X':
00673 case 'Y':
00674 case 'Z':
00675 case '0':
00676 case '1':
00677 case '2':
00678 case '3':
00679 case '4':
00680 case '5':
00681 case '6':
00682 case '7':
00683 case '8':
00684 case '9':
00685 case '_':
00686
00687
00688
00689
00690
00691 for (ptr = varname;;) {
00692 if (ptr < varname + MAX_IDENTIFIER_LENGTH)
00693 *(ptr++) = c;
00694 c = input();
00695 if (!is_identifier_char(c))
00696 break;
00697 }
00698 unput(c);
00699 *ptr = '\0';
00700
00701
00702
00703
00704
00705
00706
00707 binding = int_dictionary_Lookup(keyword_dict, varname);
00708 if (!binding) {
00709 yylval.text = string_Copy(varname);
00710 return (VARNAME);
00711 }
00712 if (binding->value == SHOW)
00713 return (handle_show());
00714 else
00715 return (binding->value);
00716
00717
00718
00719
00720
00721 case '$':
00722 c = input();
00723 if (!is_identifier_char(c))
00724 return (ERROR);
00725
00726
00727
00728
00729
00730
00731 for (ptr = varname;;) {
00732 if (ptr < varname + MAX_IDENTIFIER_LENGTH)
00733 *(ptr++) = c;
00734 c = input();
00735 if (!is_identifier_char(c))
00736 break;
00737 }
00738 unput(c);
00739 *ptr = '\0';
00740
00741 yylval.text = string_Copy(varname);
00742 return (VARREF);
00743
00744
00745
00746
00747 case '"':
00748 if ((yylval.text = eat_string(yylineno)))
00749 return (STRING);
00750 else
00751 return (ERROR);
00752
00753
00754
00755
00756 default:
00757 return (ERROR);
00758 }
00759 }
00760 }