#ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include #include #include #define PCRE2_CODE_UNIT_WIDTH 32 #include #include #include #include #include #include #include #include #include #include int lineno = 0; int colno = 0; static int maxtoken; static wchar_t *token_buffer; static int max8token; static utf8_t *token_utf8_buffer; int indent_level = 0; /* Number of '{' minus number of '}'. */ static int end_of_file = 0; static int nextchar = -1; static char *locale; #define GETC(c) ({ wint_t ret; ++colno; ret = fgetwc( config ); ret; }) #define UNGETC(c) ({ wint_t ret; --colno; ret = ungetwc( c, config ); ret; }) static wchar_t *extend_token_buffer( wchar_t *p ) { int offset = p - token_buffer; maxtoken = maxtoken * 2 + 10; token_buffer = (wchar_t *)xrealloc( token_buffer, (maxtoken + 2)*sizeof(wchar_t) ); return( token_buffer + offset ); } static utf8_t *extend_token_utf8_buffer( utf8_t *p ) { int offset = p - token_utf8_buffer; max8token = max8token * 2 + 10; token_utf8_buffer = (utf8_t *)xrealloc( token_utf8_buffer, (max8token + 2)*6 ); return( token_utf8_buffer + offset ); } void yyerror( char const *s ) { error( "%s", s ); } void init_lex( void ) { locale = setlocale( LC_ALL, "en_US.utf8" ); lineno = 0; colno = 0; nextchar = -1; maxtoken = 40; max8token = 40; indent_level = 0; end_of_file = 0; token_buffer = (wchar_t *)xmalloc( maxtoken * sizeof(wchar_t) + 2 ); token_utf8_buffer = (utf8_t *)xmalloc( max8token * 6 + 2 ); } void fini_lex( void ) { locale = setlocale( LC_ALL, locale ); if( token_buffer ) { free( token_buffer ); token_buffer = NULL; } if( token_utf8_buffer ) { free( token_utf8_buffer ); token_utf8_buffer = NULL; } indent_level = 0; end_of_file = 0; max8token = 0; maxtoken = 0; nextchar = -1; lineno = 0; colno = 0; } static wint_t check_newline( void ) { wint_t c; ++lineno; colno = 0; /* считает GETC()/UNGETC(); здесь надо только обнулить */ /***************************************** Read first nonwhite char on the line. *****************************************/ c = GETC(); while( c == ' ' || c == '\t' ) c = GETC(); if( c == '#' ) goto skipline; else return( c ); /* skip the rest of this line */ skipline: while( c != '\n' && c != WEOF ) c = GETC(); return( c ); } static wint_t skip_comment( int c ) { if( c == '*' ) { do1: do { c = GETC(); if( c == '\n' ) { ++lineno; colno = 0; } } while( c != '*' && c != WEOF ); if( c == WEOF ) { unterminated_comment(); return( WEOF ); } c = GETC(); if( c == '/' ) { c = GETC(); if( c == '\n' ) c = check_newline(); return( c ); } else { UNGETC( c ); goto do1; } } else if( c == '/' || c == '#' ) { do { c = GETC(); } while( c != '\n' && c != WEOF ); if( c == WEOF ) { unterminated_comment(); return( WEOF ); } else c = check_newline(); return( c ); } return( c ); } /* End skip_commemnt() */ static wint_t skip_white_space( wint_t c ) { for( ;; ) { switch( c ) { case '\n': c = check_newline(); break; case '#': c = skip_comment( c ); return( skip_white_space( c ) ); break; case '/': c = GETC(); if( c == '/' || c == '*' ) { c = skip_comment( c ); return( skip_white_space( c ) ); } else { UNGETC( c ); return( '/' ); } break; case ' ': case '\t': case '\f': case '\v': case '\b': case '\r': c = GETC(); break; case '\\': c = GETC(); if( c == '\n' ) { ++lineno; colno = 0; } else { warning( "%s", "Stray '\\' in program" ); } c = GETC(); break; default: return( c ); } /* End switch( c ) */ } /* End for( ;; ) */ } /* End skip_white_space() */ static wint_t readescape( int *ignore_ptr ) /* read escape sequence, returning a char, or store 1 in *ignore_ptr if it is backslash-newline */ { wint_t c = GETC(); wint_t code; unsigned count; unsigned firstdig = 0; int nonull; switch( c ) { case 'x': code = 0; count = 0; nonull = 0; while( 1 ) { c = GETC(); if( !(c >= 'a' && c <= 'f') && !(c >= 'A' && c <= 'F') && !(c >= '0' && c <= '9') ) { UNGETC( c ); break; } code *= 16; if( c >= 'a' && c <= 'f' ) code += c - 'a' + 10; if( c >= 'A' && c <= 'F' ) code += c - 'A' + 10; if( c >= '0' && c <= '9' ) code += c - '0'; if( code != 0 || count != 0 ) { if( count == 0 ) firstdig = code; count++; } nonull = 1; } /* End while( 1 ) */ if( !nonull ) { error( "%s", "\\x used with no following hex digits" ); } else if( count == 0 ) /* Digits are all 0's. Ok. */ ; else if( (count - 1) * 4 >= 32 || /* 32 == bits per INT */ (count > 1 && ((1 << (32 - (count-1) * 4)) <= firstdig ))) { warning( "%s", "Hex escape out of range" ); } return( code ); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': code = 0; count = 0; while( (c <= '7') && (c >= '0') && (count++ < 6) ) { code = (code * 8) + (c - '0'); c = GETC(); } UNGETC( c ); return( code ); case '\\': case '\'': case '"': return( c ); case '\n': lineno++; colno = 0; *ignore_ptr = 1; return( 0 ); case 'n': return( '\n' ); case 't': return( '\t' ); case 'r': return( '\r' ); case 'f': return( '\f' ); case 'b': return( '\b' ); case 'a': return( '\a' ); case 'v': return( '\v' ); } return( c ); } /* End of readescape() */ int html_symbol_name( wchar_t *str ) { int rc = 0, error = 0; PCRE2_SIZE offset = 0; wchar_t pattern[] = L"^(&[#A-Za-z0-9]*;)"; pcre2_match_data *match; pcre2_code *regexp = pcre2_compile( (PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED, 0, &error, &offset, NULL ); if( regexp == NULL ) { return 0; /* PCRE compilation failed */ } match = pcre2_match_data_create_from_pattern( regexp, NULL ); rc = pcre2_match( regexp, (PCRE2_SPTR)str, (int)wcslen(str), 0, 0, match, NULL ); if( rc < 0 ) { /* not match */ pcre2_match_data_free( match ); pcre2_code_free( regexp ); return 0; } else { /* match */ pcre2_match_data_free( match ); pcre2_code_free( regexp ); return 1; } } int yylex( void ) { wint_t c; wchar_t *p; int value; if( nextchar >= 0 ) c = nextchar, nextchar = -1; else c = GETC(); while( 1 ) { switch( c ) { case ' ': case '\t': case '\f': case '\v': case '\b': c = skip_white_space( c ); break; case '\r': case '\n': case '/': case '#': case '\\': c = skip_white_space( c ); default: goto found_nonwhite; } /* End switch( c ) */ found_nonwhite: token_buffer[0] = c; token_buffer[1] = 0; switch( c ) { case WEOF: end_of_file = 1; token_buffer[0] = 0; value = 0; goto done; break; case '$': /* dollar in identifier */ if( 1 ) goto letter; return '$'; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': case '_': /* RUSSIAN */ case L'А': case L'Б': case L'В': case L'Г': case L'Д': case L'Е': case L'Ё': case L'Ж': case L'З': case L'И': case L'Й': case L'К': case L'Л': case L'М': case L'Н': case L'О': case L'П': case L'Р': case L'С': case L'Т': case L'У': case L'Ф': case L'Х': case L'Ц': case L'Ч': case L'Ш': case L'Щ': case L'Ъ': case L'Ы': case L'Ь': case L'Э': case L'Ю': case L'Я': case L'а': case L'б': case L'в': case L'г': case L'д': case L'е': case L'ё': case L'ж': case L'з': case L'и': case L'й': case L'к': case L'л': case L'м': case L'н': case L'о': case L'п': case L'р': case L'с': case L'т': case L'у': case L'ф': case L'х': case L'ц': case L'ч': case L'ш': case L'щ': case L'ъ': case L'ы': case L'ь': case L'э': case L'ю': case L'я': letter: p = token_buffer; while( iswalnum( c ) || c == '_' || c == '$' || c == '@' || c == '-' || c == '.' || c == ':' ) { if( p >= token_buffer + maxtoken ) { p = extend_token_buffer( p ); extend_token_utf8_buffer( token_utf8_buffer ); } *p++ = c; c = GETC(); } *p = 0; nextchar = c; value = VARIABLE; (void)copy_ucs4_to_utf8( (utf8_t *)token_utf8_buffer, (const ucs4_t *)token_buffer ); /********************* install into symtab *********************/ { if( !strcmp( "section", (const char *)token_utf8_buffer ) ) { value = SECTION; yylval.sym = install( NULL, SECTION, NULL ); } else if( !strcmp( "repo", (const char *)token_utf8_buffer ) ) { value = REPO; yylval.sym = install( NULL, REPO, NULL ); } else { SYMBOL *sp = NULL; if( (sp = lookup( (const char *)token_utf8_buffer )) == (SYMBOL *)0 ) sp = install( (const char *)token_utf8_buffer, VARIABLE, 0 ); /****************************************************************** Если переменная уже в таблице, то мы предполагаем, что она имеет тип равный одному из допустимых: NUMERICAL, STRING, или PATH. ******************************************************************/ if( sp->type != VARIABLE ) { switch( sp->type ) { case NUMERICAL: case STRING: case PATH: value = sp->type; break; default: /* error */ break; } } yylval.sym = sp; } } token_buffer[0] = 0; token_utf8_buffer[0] = 0; goto done; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int constant = 0; /* integer: */ p = token_buffer; while( iswdigit( c ) ) { if( p >= token_buffer + maxtoken ) { p = extend_token_buffer( p ); extend_token_utf8_buffer( token_utf8_buffer ); } *p++ = c; c = GETC(); } *p = 0; nextchar = c; value = NUMERICAL; (void)copy_ucs4_to_utf8( (utf8_t *)token_utf8_buffer, (const ucs4_t *)token_buffer ); /********************* install into symtab *********************/ { (void)swscanf( (const wchar_t *)token_buffer, L"%d", &constant ); yylval.sym = install( NULL, NUMERICAL, constant ); } token_buffer[0] = 0; token_utf8_buffer[0] = 0; goto done; break; } case '\'': /* path_constant: */ { int num_chars = 0; unsigned int width = 8; /* to allow non asscii in path set width = 16 */ while( 1 ) { tryagain: c = GETC(); if( c == '\'' || c == WEOF ) break; if( c == '\\' ) { int ignore = 0; c = readescape( &ignore ); if( ignore ) goto tryagain; if( (unsigned)c >= (1 << width) ) { warning( "%s", "Escape sequence out of range" ); } } else if( c == '\n' ) { lineno++; colno = 0; } num_chars++; if( num_chars > maxtoken - 4 ) { extend_token_buffer( token_buffer ); extend_token_utf8_buffer( token_utf8_buffer ); } token_buffer[num_chars] = c; } /* End while( 1 ) */ token_buffer[num_chars + 1] = '\''; token_buffer[num_chars + 2] = 0; if( c != '\'' ) { error( "%s", "Malformated path constant" ); } else if( num_chars == 0 ) { error( "%s", "Empty path constant" ); } /* build path: */ { wchar_t *s, *string = NULL; wchar_t *p = &token_buffer[0]; while( *p ) { if( *p == '\n' || *p == '\t' ) *p = ' '; ++p; } string = (wchar_t *)malloc( maxtoken * 4 + 10 ); p = &token_buffer[1]; s = &string[0]; while( *p == ' ' ) ++p; while( *p ) { if( *p != ' ' ) *s++ = *p++; else ++p; } --s; *s = 0; while( *(s-1) == ' ' ) --s; *s = 0; (void)copy_ucs4_to_utf8( (utf8_t *)token_utf8_buffer, (const ucs4_t *)string ); free( string ); } /********************* install into symtab *********************/ { yylval.sym = install( NULL, PATH, (char *)token_utf8_buffer ); } token_buffer[0] = 0; token_utf8_buffer[0] = 0; value = PATH; goto done; } case '"': /* string_constant: */ { c = GETC(); p = token_buffer + 1; while( c != '"' && c >= 0 ) { if( c == '\\' ) { int ignore = 0; c = readescape( &ignore ); if( ignore ) goto skipnewline; } else if( c == '\n' ) lineno++; if( p == token_buffer + maxtoken ) { p = extend_token_buffer( p ); extend_token_utf8_buffer( token_utf8_buffer ); } *p++ = c; skipnewline: c = GETC(); } /* End while( " ) */ *p = 0; if( c < 0 ) { error( "%s", "Unterminated string constant" ); } *p++ = '"'; *p = 0; /* build string: */ { wchar_t *s, *string = NULL; wchar_t *p = &token_buffer[0]; while( *p ) { if( *p == '\n' || *p == '\t' ) *p = ' '; ++p; } string = (wchar_t *)malloc( maxtoken * 4 + 10 ); p = &token_buffer[1]; s = &string[0]; while( *p == ' ' ) ++p; while( *p ) { if( *p != ' ' ) { switch( *p ) { case '&': /************************************************ Skip HTML symbol names such as  ,... etc.: */ if( ! html_symbol_name( p ) ) { *s++ = '&'; *s++ = 'a'; *s++ = 'm'; *s++ = 'p'; *s++ = ';'; ++p; } else { *s++ = *p++; } break; case '<': *s++ = '&'; *s++ = 'l'; *s++ = 't'; *s++ = ';'; ++p; break; case '>': *s++ = '&'; *s++ = 'g'; *s++ = 't'; *s++ = ';'; ++p; break; default: *s++ = *p++; break; } } else { /* skip multiple spaces */ if( *(p+1) != ' ' ) *s++ = *p++; else ++p; } } --s; *s = 0; while( *(s-1) == ' ' ) --s; *s = 0; (void)copy_ucs4_to_utf8( (utf8_t *)token_utf8_buffer, (const ucs4_t *)string ); free( string ); } /********************* install into symtab *********************/ { yylval.sym = install( NULL, STRING, (char *)token_utf8_buffer ); } token_buffer[0] = 0; token_utf8_buffer[0] = 0; value = STRING; goto done; } case 0: value = 1; goto done; break; case '{': indent_level++; value = c; goto done; break; case '}': indent_level--; value = c; goto done; break; default: value = c; goto done; break; } /* End switch( c ) */ } /* End while( 1 ) */ done: return( value ); }