diff options
| author | Rokas Puzonas <[email protected]> | 2023-08-26 14:36:04 +0300 |
|---|---|---|
| committer | Rokas Puzonas <[email protected]> | 2023-08-26 14:36:04 +0300 |
| commit | c797f8a500874a2032ed744317cf03c37dfcddbe (patch) | |
| tree | 20dc886343adbf3767bb0339ab8cd760dbfad105 | |
| parent | 00e5fe5c1877800a19ae4a22fc7c3b4b33ef32d7 (diff) | |
| download | raylib.com-c797f8a500874a2032ed744317cf03c37dfcddbe.tar.gz raylib.com-c797f8a500874a2032ed744317cf03c37dfcddbe.zip | |
move example-indexer into tools
| -rw-r--r-- | tools/cheatsheet-generator/README.md | 3 | ||||
| -rw-r--r-- | tools/example-indexer/.gitignore | 1 | ||||
| -rw-r--r-- | tools/example-indexer/Makefile | 20 | ||||
| -rw-r--r-- | tools/example-indexer/README.md | 14 | ||||
| -rw-r--r-- | tools/example-indexer/main.c | 304 | ||||
| -rw-r--r-- | tools/example-indexer/raylib_parser.c | 502 | ||||
| -rw-r--r-- | tools/example-indexer/stb_c_lexer.h | 940 |
7 files changed, 1784 insertions, 0 deletions
diff --git a/tools/cheatsheet-generator/README.md b/tools/cheatsheet-generator/README.md new file mode 100644 index 0000000..37eef3a --- /dev/null +++ b/tools/cheatsheet-generator/README.md @@ -0,0 +1,3 @@ +# Cheatsheet generator + +// TODO: Move cheatsheet generator here and update it so it works diff --git a/tools/example-indexer/.gitignore b/tools/example-indexer/.gitignore new file mode 100644 index 0000000..378eac2 --- /dev/null +++ b/tools/example-indexer/.gitignore @@ -0,0 +1 @@ +build diff --git a/tools/example-indexer/Makefile b/tools/example-indexer/Makefile new file mode 100644 index 0000000..0dce0c2 --- /dev/null +++ b/tools/example-indexer/Makefile @@ -0,0 +1,20 @@ +BUILD_DIR := build + +.DEFAULT_GOAL := all +.PHONY := raylib_example_indexer all clean + +example_indexer: main.c + mkdir -p $(BUILD_DIR) + gcc main.c -o $(BUILD_DIR)/example_indexer -DSTB_C_LEXER_IMPLEMENTATION + +$(BUILD_DIR)/raylib: + mkdir -p $(BUILD_DIR) + git clone [email protected]:raysan5/raylib.git $(BUILD_DIR)/raylib + +run: example_indexer + ./$(BUILD_DIR)/example_indexer $(BUILD_DIR)/raylib/src $(BUILD_DIR)/raylib/examples $(BUILD_DIR)/output.json + +all: $(BUILD_DIR)/raylib example_indexer run + +clean: + rm -rf $(BUILD_DIR) diff --git a/tools/example-indexer/README.md b/tools/example-indexer/README.md new file mode 100644 index 0000000..d740cd2 --- /dev/null +++ b/tools/example-indexer/README.md @@ -0,0 +1,14 @@ +# Raylib example indexer + +Go through all raylib examples and extract places were API functions were used. + +```shell +make all +``` + +Or + +```shell +make example_indexer +./build/example_indexer <raylib-src-dir> <examples-dir> <output-file> +``` diff --git a/tools/example-indexer/main.c b/tools/example-indexer/main.c new file mode 100644 index 0000000..adebd6e --- /dev/null +++ b/tools/example-indexer/main.c @@ -0,0 +1,304 @@ +#include <stdio.h> +#include <limits.h> +#include <stdlib.h> +#include <stdbool.h> +#include <sys/param.h> +#include <dirent.h> +#include <string.h> +#include <assert.h> + +#include "stb_c_lexer.h" +#include "raylib_parser.c" + +#define MAX_FUNCS_TO_PARSE 1024 // Maximum number of functions to parse +#define MAX_FUNCS_PER_EXAMPLE 1024 // Maximum number of usages per function per file + +typedef struct { + char filename[256]; + // TODO: Track where function usage was found and display it? +} FunctionUsage; + +typedef struct { + int from, to; // [from, to) - from inclusive, to exclusive +} LineRange; + +static bool StartsWith(char *text, int textSize, char *prefix, int prefixSize) +{ + return textSize >= prefixSize && !strncmp(text, prefix, prefixSize); +} + +static bool EndsSith(char *text, int textSize, char *suffix, int suffixSize) +{ + return textSize >= suffixSize && !strncmp(text+textSize-suffixSize, suffix, suffixSize); +} + +static bool GetNextLine(LineRange *line, char *text, int textSize, int from) +{ + for (int i = from; i < textSize; i++) { + if (text[i] == '\n') { + line->from = from; + line->to = i; + return true; + } + } + return false; +} + +static int GetFunctionFromIdentifier(char *id, FunctionInfo *functions, int functionCount) +{ + int idSize = strlen(id); + + for (int i = 0; i < functionCount; i++) { + FunctionInfo *function = &functions[i]; + if (idSize > sizeof(function->name)) continue; + if (!strcmp(id, function->name)) { + return i; + } + } + return -1; +} + +static bool ParseFunctionUsagesFromFile(char *directory, char *filePath, FunctionUsage *usages[], int *usageCounts, FunctionInfo *functions, int functionCount) +{ + char fullPath[PATH_MAX] = { 0 }; + snprintf(fullPath, sizeof(fullPath), "%s/%s", directory, filePath); + + int fileSize = 0; + char *exampleCode = LoadFileText(fullPath, &fileSize); + if (exampleCode == NULL) { + return false; + } + + stb_lexer lexer; + char stringStore[512]; + stb_c_lexer_init(&lexer, exampleCode, exampleCode+fileSize, stringStore, sizeof(stringStore)); + + while (stb_c_lexer_get_token(&lexer)) { + if (lexer.token != CLEX_id) continue; + + int functionIndex = GetFunctionFromIdentifier(lexer.string, functions, functionCount); + if (functionIndex != -1) { + int *usageCount = &usageCounts[functionIndex]; + assert(*usageCount < MAX_FUNCS_PER_EXAMPLE); + FunctionUsage *usage = &usages[functionIndex][*usageCount]; + strncpy(usage->filename, filePath, strlen(filePath)); + (*usageCount)++; + } + } + + free(exampleCode); + + return true; +} + +static void ParseFunctionsUsagesFromFolder(char *cwd, char *dir, FunctionUsage *usages[], int *usageCounts, FunctionInfo *functions, int functionCount) +{ + char dirPath[PATH_MAX]; + snprintf(dirPath, sizeof(dirPath), "%s/%s", cwd, dir); + DIR *dirp = opendir(dirPath); + if (dirp == NULL) { + fprintf(stderr, "Failed to open directory '%s'\n", dirPath); + return; + } + + struct dirent *entry; + while ((entry = readdir(dirp)) != NULL) { + if (entry->d_type != DT_REG) continue; + + char *extension = strrchr(entry->d_name, '.'); + if (!strcmp(extension, ".c")) { + char filePath[PATH_MAX]; + snprintf(filePath, sizeof(filePath), "%s/%s", dir, entry->d_name); + ParseFunctionUsagesFromFile(cwd, filePath, usages, usageCounts, functions, functionCount); + } + } + + closedir(dirp); +} + +// Checks if the line is in the format "#if defined(*_IMPLEMENTATION)" +static bool IsLineImplementationIfdef(char *line, int line_size) { + char *prefix = "#if defined("; + char *suffix = "_IMPLEMENTATION)"; + return StartsWith(line, line_size, prefix, strlen(prefix)) && + EndsSith(line, line_size, suffix, strlen(suffix)); +} + +static int ParseFunctionsDefinitionsFromHeader(char *path, FunctionInfo *functions, int maxFunctions) +{ + int fileSize; + char *contents = LoadFileText(path, &fileSize); + + int count = 0; + + int nextLineFrom = 0; + LineRange curr = { 0 }; + while (GetNextLine(&curr, contents, fileSize, nextLineFrom)) { + int lineSize = curr.to - curr.from; + char line[512] = { 0 }; + strncpy(line, &contents[curr.from], lineSize); // `raylib_parser.c` expects lines to be null-terminated + if (IsLineImplementationIfdef(line, lineSize)) break; + + if (IsLineAPIFunction(line, lineSize)) { + ParseAPIFunctionInfo(line, lineSize, &functions[count]); + count++; + if (count == maxFunctions) break; + } + + nextLineFrom = curr.to+1; + } + + free(contents); + + return count; +} + +static int ParseFunctionsDefinitionsFromFolder(char *dir, FunctionInfo *functions, int maxFunctions) +{ + DIR *dirp = opendir(dir); + if (dirp == NULL) { + fprintf(stderr, "Failed to open directory '%s'\n", dir); + return -1; + } + + int count = 0; + struct dirent *entry; + while ((entry = readdir(dirp)) != NULL) { + if (entry->d_type != DT_REG) continue; + + char *fileExtension = strrchr(entry->d_name, '.'); + if (fileExtension == NULL) continue; + if (strcmp(fileExtension, ".h")) continue; + + char path[256]; + snprintf(path, sizeof(path), "%s/%s", dir, entry->d_name); + count += ParseFunctionsDefinitionsFromHeader(path, functions + count, maxFunctions - count); + } + closedir(dirp); + + return count; +} + +static int GetUniqueFilenames(FunctionUsage *usages, int usageCount, char *uniqueFilenames[]) +{ + int count = 0; + + for (int i = 0; i < usageCount; i++) { + FunctionUsage *usage = &usages[i]; + + bool found = false; + for (int j = 0; j < count; j++) { + if (!strcmp(uniqueFilenames[j], usage->filename)) { + found = true; + break; + } + } + + if (!found) { + uniqueFilenames[count] = strdup(usage->filename); + count++; + } + } + + return count; +} + +static int OutputFunctionUsagesJSON(char *output, FunctionInfo *functions, int functionCount, FunctionUsage **usages, int *usageCounts) +{ + FILE *outputFile = fopen(output, "w"); + if (outputFile == NULL) { + fprintf(stderr, "Failed to open file '%s\n'", output); + return -1; + } + + fwrite("{", sizeof(char), 1, outputFile); + for (int functionIndex = 0; functionIndex < functionCount; functionIndex++) { + FunctionInfo *info = &functions[functionIndex]; + + fwrite("\"", sizeof(char), 1, outputFile); + fwrite(info->name, sizeof(char), strlen(info->name), outputFile); + fwrite("\":[", sizeof(char), 3, outputFile); + + int usageCount = usageCounts[functionIndex]; + if (usageCount > 0) { + char *uniqueFilenames[usageCount]; + int uniqueCount = GetUniqueFilenames(usages[functionIndex], usageCount, uniqueFilenames); + + for (int i = 0; i < uniqueCount; i++) { + char *filename = uniqueFilenames[i]; + char *example_name = strchr(filename, '/')+1; + int example_name_size = strchr(filename, '.') - example_name; + + fwrite("\"", sizeof(char), 1, outputFile); + fwrite(example_name, sizeof(char), example_name_size, outputFile); + fwrite("\"", sizeof(char), 1, outputFile); + if (i < uniqueCount-1) { + fwrite(",", sizeof(char), 1, outputFile); + } + } + + for (int i = 0; i < uniqueCount; i++) { + free(uniqueFilenames[i]); + } + } + + fwrite("]", sizeof(char), 1, outputFile); + if (functionIndex < functionCount-1) { + fwrite(",", sizeof(char), 1, outputFile); + } + } + + fwrite("}", sizeof(char), 1, outputFile); + fclose(outputFile); + + return 0; +} + +int main(int argc, char **argv) +{// Maximum number of functions to parse + if (argc != 4) { + printf("Usage: %s <raylib-src-dir> <examples-dir> <output-file>\n", argv[0]); + return -1; + } + + char *raylibSrc = argv[1]; + char *raylibExamplesPath = argv[2]; + char *outputPath = argv[3]; + + FunctionInfo functions[MAX_FUNCS_TO_PARSE]; + int functionCount = ParseFunctionsDefinitionsFromFolder(raylibSrc, functions, MAX_FUNCS_TO_PARSE); + if (functionCount < 0) { + return -1; + } + + FunctionUsage *usages[MAX_FUNCS_TO_PARSE] = { 0 }; + for (int i = 0; i < functionCount; i++) { + usages[i] = malloc(MAX_FUNCS_PER_EXAMPLE * sizeof(FunctionUsage)); + } + int usageCounts[MAX_FUNCS_TO_PARSE] = { 0 }; + + { // Collect function usages from examples + DIR *dirp = opendir(raylibExamplesPath); + if (dirp == NULL) { + fprintf(stderr, "Failed to open directory '%s'\n", raylibExamplesPath); + return -1; + } + struct dirent *entry; + while ((entry = readdir(dirp)) != NULL) { + if (entry->d_type != DT_DIR) continue; + if (entry->d_name[0] == '.') continue; + + ParseFunctionsUsagesFromFolder(raylibExamplesPath, entry->d_name, usages, usageCounts, functions, functionCount); + } + closedir(dirp); + } + + // Output function usages + OutputFunctionUsagesJSON(outputPath, functions, functionCount, usages, usageCounts); + + for (int i = 0; i < functionCount; i++) { + free(usages[i]); + } + + return 0; +} diff --git a/tools/example-indexer/raylib_parser.c b/tools/example-indexer/raylib_parser.c new file mode 100644 index 0000000..d4b4568 --- /dev/null +++ b/tools/example-indexer/raylib_parser.c @@ -0,0 +1,502 @@ +/********************************************************************************************** + + raylib API parser + + This parser scans raylib.h to get API information about defines, structs, aliases, enums, callbacks and functions. + All data is divided into pieces, usually as strings. The following types are used for data: + + - struct DefineInfo + - struct StructInfo + - struct AliasInfo + - struct EnumInfo + - struct FunctionInfo + + CONSTRAINTS: + + This parser is specifically designed to work with raylib.h, so, it has some constraints: + + - Functions are expected as a single line with the following structure: + + <retType> <name>(<paramType[0]> <paramName[0]>, <paramType[1]> <paramName[1]>); <desc> + + Be careful with functions broken into several lines, it breaks the process! + + - Structures are expected as several lines with the following form: + + <desc> + typedef struct <name> { + <fieldType[0]> <fieldName[0]>; <fieldDesc[0]> + <fieldType[1]> <fieldName[1]>; <fieldDesc[1]> + <fieldType[2]> <fieldName[2]>; <fieldDesc[2]> + } <name>; + + - Enums are expected as several lines with the following form: + + <desc> + typedef enum { + <valueName[0]> = <valueInteger[0]>, <valueDesc[0]> + <valueName[1]>, + <valueName[2]>, <valueDesc[2]> + <valueName[3]> <valueDesc[3]> + } <name>; + + NOTE: Multiple options are supported for enums: + - If value is not provided, (<valueInteger[i -1]> + 1) is assigned + - Value description can be provided or not + + OTHER NOTES: + + - This parser could work with other C header files if mentioned constraints are followed. + - This parser does not require <string.h> library, all data is parsed directly from char buffers. + - This is a modified/stripped down version of the original parser + Done so to make it usable as a library, and not a CLI tool. + This does not implement a function for every type to parse + + LICENSE: zlib/libpng + + raylib-parser is licensed under an unmodified zlib/libpng license, which is an OSI-certified, + BSD-like license that allows static linking with closed source software: + + Copyright (c) 2021-2023 Ramon Santamaria (@raysan5) + +**********************************************************************************************/ + + +#define _CRT_SECURE_NO_WARNINGS + +#include <stdlib.h> // Required for: malloc(), calloc(), realloc(), free(), atoi(), strtol() +#include <stdio.h> // Required for: printf(), fopen(), fseek(), ftell(), fread(), fclose() +#include <stdbool.h> // Required for: bool +#include <ctype.h> // Required for: isdigit() + +#define MAX_LINE_LENGTH 512 // Maximum length of one line (including comments) + +#define MAX_STRUCT_FIELDS 64 // Maximum number of struct fields +#define MAX_ENUM_VALUES 512 // Maximum number of enum values +#define MAX_FUNCTION_PARAMETERS 12 // Maximum number of function parameters + +//---------------------------------------------------------------------------------- +// Types and Structures Definition +//---------------------------------------------------------------------------------- + +// Type of parsed define +typedef enum { + UNKNOWN = 0, + MACRO, + GUARD, + INT, + INT_MATH, + LONG, + LONG_MATH, + FLOAT, + FLOAT_MATH, + DOUBLE, + DOUBLE_MATH, + CHAR, + STRING, + COLOR +} DefineType; + +// Define info data +typedef struct DefineInfo { + char name[64]; // Define name + int type; // Define type + char value[256]; // Define value + char desc[128]; // Define description + bool isHex; // Define is hex number (for types INT, LONG) +} DefineInfo; + +// Struct info data +typedef struct StructInfo { + char name[64]; // Struct name + char desc[128]; // Struct type description + int fieldCount; // Number of fields in the struct + char fieldType[MAX_STRUCT_FIELDS][64]; // Field type + char fieldName[MAX_STRUCT_FIELDS][64]; // Field name + char fieldDesc[MAX_STRUCT_FIELDS][128]; // Field description +} StructInfo; + +// Alias info data +typedef struct AliasInfo { + char type[64]; // Alias type + char name[64]; // Alias name + char desc[128]; // Alias description +} AliasInfo; + +// Enum info data +typedef struct EnumInfo { + char name[64]; // Enum name + char desc[128]; // Enum description + int valueCount; // Number of values in enumerator + char valueName[MAX_ENUM_VALUES][64]; // Value name definition + int valueInteger[MAX_ENUM_VALUES]; // Value integer + char valueDesc[MAX_ENUM_VALUES][128]; // Value description +} EnumInfo; + +// Function info data +typedef struct FunctionInfo { + char name[64]; // Function name + char desc[128]; // Function description (comment at the end) + char retType[32]; // Return value type + int paramCount; // Number of function parameters + char paramType[MAX_FUNCTION_PARAMETERS][32]; // Parameters type + char paramName[MAX_FUNCTION_PARAMETERS][32]; // Parameters name + char paramDesc[MAX_FUNCTION_PARAMETERS][128]; // Parameters description +} FunctionInfo; + +//---------------------------------------------------------------------------------- +// Global Variables Definition +//---------------------------------------------------------------------------------- + +#ifndef RAYLIB_API_DEFINE + #define RAYLIB_API_DEFINE "RLAPI" +#endif + +//---------------------------------------------------------------------------------- +// Module Functions Declaration +//---------------------------------------------------------------------------------- + +static bool IsLineAPIFunction(char *line, int lineSize); +static void ParseAPIFunctionInfo(char *linePtr, int lineSize, FunctionInfo *functionInfo); + +static char *LoadFileText(const char *fileName, int *length); +static char **GetTextLines(const char *buffer, int length, int *linesCount); +static void GetDataTypeAndName(const char *typeName, int typeNameLen, char *type, char *name); +static void GetDescription(const char *source, char *description); +static void MoveArraySize(char *name, char *type); // Move array size from name to type +static unsigned int TextLength(const char *text); // Get text length in bytes, check for \0 character +static bool IsTextEqual(const char *text1, const char *text2, unsigned int count); +static int TextFindIndex(const char *text, const char *find); // Find first text occurrence within a string +static void MemoryCopy(void *dest, const void *src, unsigned int count); +static char *EscapeBackslashes(char *text); // Replace '\' by "\\" when exporting to JSON and XML +static const char *StrDefineType(DefineType type); // Get string of define type + +//---------------------------------------------------------------------------------- +// Additional functions for use as a library +//---------------------------------------------------------------------------------- + +static bool IsLineAPIFunction(char *line, int lineSize) +{ + int apiDefineSize = TextLength(RAYLIB_API_DEFINE); + + // Read function line (starting with `define`, i.e. for raylib.h "RLAPI") + return lineSize >= apiDefineSize && IsTextEqual(line, RAYLIB_API_DEFINE, apiDefineSize); +} + +// Assumes that `func_info` is zero initialized +static void ParseAPIFunctionInfo(char *linePtr, int lineSize, FunctionInfo *functionInfo) +{ + int funcParamsStart = 0; + int funcEnd = 0; + + // Get return type and function name from func line + for (int c = 0; c < lineSize; c++) + { + if (linePtr[c] == '(') // Starts function parameters + { + funcParamsStart = c + 1; + + // At this point we have function return type and function name + char funcRetTypeName[128] = { 0 }; + int dc = TextLength(RAYLIB_API_DEFINE) + 1; + int funcRetTypeNameLen = c - dc; // Substract `define` ("RLAPI " for raylib.h) + MemoryCopy(funcRetTypeName, &linePtr[dc], funcRetTypeNameLen); + + GetDataTypeAndName(funcRetTypeName, funcRetTypeNameLen, functionInfo->retType, functionInfo->name); + break; + } + } + + // Get parameters from func line + for (int c = funcParamsStart; c < lineSize; c++) + { + int paramIndex = functionInfo->paramCount; + + if (linePtr[c] == ',') // Starts function parameters + { + // Get parameter type + name, extract info + char funcParamTypeName[128] = { 0 }; + int funcParamTypeNameLen = c - funcParamsStart; + MemoryCopy(funcParamTypeName, &linePtr[funcParamsStart], funcParamTypeNameLen); + + GetDataTypeAndName(funcParamTypeName, funcParamTypeNameLen, functionInfo->paramType[paramIndex], functionInfo->paramName[paramIndex]); + + funcParamsStart = c + 1; + if (linePtr[c + 1] == ' ') funcParamsStart += 1; + functionInfo->paramCount++; // Move to next parameter + } + else if (linePtr[c] == ')') + { + funcEnd = c + 2; + + // Check if previous word is void + if ((linePtr[c - 4] == 'v') && (linePtr[c - 3] == 'o') && (linePtr[c - 2] == 'i') && (linePtr[c - 1] == 'd')) break; + + // Get parameter type + name, extract info + char funcParamTypeName[128] = { 0 }; + int funcParamTypeNameLen = c - funcParamsStart; + MemoryCopy(funcParamTypeName, &linePtr[funcParamsStart], funcParamTypeNameLen); + + GetDataTypeAndName(funcParamTypeName, funcParamTypeNameLen, functionInfo->paramType[paramIndex], functionInfo->paramName[paramIndex]); + + functionInfo->paramCount++; // Move to next parameter + break; + } + } + + // Get function description + GetDescription(&linePtr[funcEnd], functionInfo->desc); + + // Move array sizes from name to type + for (int j = 0; j < functionInfo->paramCount; j++) + { + MoveArraySize(functionInfo->paramName[j], functionInfo->paramType[j]); + } +} + +//---------------------------------------------------------------------------------- +// Module Functions Definition +//---------------------------------------------------------------------------------- + +// Load text data from file, returns a '\0' terminated string +// NOTE: text chars array should be freed manually +static char *LoadFileText(const char *fileName, int *length) +{ + char *text = NULL; + + if (fileName != NULL) + { + FILE *file = fopen(fileName, "rt"); + + if (file != NULL) + { + // WARNING: When reading a file as 'text' file, + // text mode causes carriage return-linefeed translation... + // ...but using fseek() should return correct byte-offset + fseek(file, 0, SEEK_END); + int size = ftell(file); + fseek(file, 0, SEEK_SET); + + if (size > 0) + { + text = (char *)calloc((size + 1), sizeof(char)); + unsigned int count = (unsigned int)fread(text, sizeof(char), size, file); + + // WARNING: \r\n is converted to \n on reading, so, + // read bytes count gets reduced by the number of lines + if (count < (unsigned int)size) + { + text = realloc(text, count + 1); + *length = count; + } + else *length = size; + + // Zero-terminate the string + text[count] = '\0'; + } + + fclose(file); + } + } + + return text; +} + +// Get all lines from a text buffer (expecting lines ending with '\n') +static char **GetTextLines(const char *buffer, int length, int *linesCount) +{ + // Get the number of lines in the text + int count = 0; + for (int i = 0; i < length; i++) if (buffer[i] == '\n') count++; + + printf("Number of text lines in buffer: %i\n", count); + + // Allocate as many pointers as lines + char **lines = (char **)malloc(count*sizeof(char **)); + + char *bufferPtr = (char *)buffer; + + for (int i = 0; (i < count) || (bufferPtr[0] != '\0'); i++) + { + lines[i] = (char *)calloc(MAX_LINE_LENGTH, sizeof(char)); + + // Remove line leading spaces + // Find last index of space/tab character + int index = 0; + while ((bufferPtr[index] == ' ') || (bufferPtr[index] == '\t')) index++; + + int j = 0; + while (bufferPtr[index + j] != '\n') + { + lines[i][j] = bufferPtr[index + j]; + j++; + } + + bufferPtr += (index + j + 1); + } + + *linesCount = count; + return lines; +} + +// Get data type and name from a string containing both +// NOTE: Useful to parse function parameters and struct fields +static void GetDataTypeAndName(const char *typeName, int typeNameLen, char *type, char *name) +{ + for (int k = typeNameLen; k > 0; k--) + { + if ((typeName[k] == ' ') && (typeName[k - 1] != ',')) + { + // Function name starts at this point (and ret type finishes at this point) + MemoryCopy(type, typeName, k); + MemoryCopy(name, typeName + k + 1, typeNameLen - k - 1); + break; + } + else if (typeName[k] == '*') + { + MemoryCopy(type, typeName, k + 1); + MemoryCopy(name, typeName + k + 1, typeNameLen - k - 1); + break; + } + else if ((typeName[k] == '.') && (typeNameLen == 3)) // Handle varargs ...); + { + MemoryCopy(type, "...", 3); + MemoryCopy(name, "args", 4); + break; + } + } +} + +// Get comment from a line, do nothing if no comment in line +static void GetDescription(const char *line, char *description) +{ + int c = 0; + int descStart = -1; + int lastSlash = -2; + bool isValid = false; + while (line[c] != '\0') + { + if (isValid && (descStart == -1) && (line[c] != ' ')) descStart = c; + else if (line[c] == '/') + { + if (lastSlash == c - 1) isValid = true; + lastSlash = c; + } + c++; + } + if (descStart != -1) MemoryCopy(description, &line[descStart], c - descStart); +} + +// Move array size from name to type +static void MoveArraySize(char *name, char *type) +{ + int nameLength = TextLength(name); + if (name[nameLength - 1] == ']') + { + for (int k = nameLength; k > 0; k--) + { + if (name[k] == '[') + { + int sizeLength = nameLength - k; + MemoryCopy(&type[TextLength(type)], &name[k], sizeLength); + name[k] = '\0'; + } + } + } +} + +// Get text length in bytes, check for \0 character +static unsigned int TextLength(const char *text) +{ + unsigned int length = 0; + + if (text != NULL) while (*text++) length++; + + return length; +} + +// Compare two text strings, requires number of characters to compare +static bool IsTextEqual(const char *text1, const char *text2, unsigned int count) +{ + bool result = true; + + for (unsigned int i = 0; i < count; i++) + { + if (text1[i] != text2[i]) + { + result = false; + break; + } + } + + return result; +} + +// Find first text occurrence within a string +int TextFindIndex(const char *text, const char *find) +{ + int textLen = TextLength(text); + int findLen = TextLength(find); + + for (int i = 0; i <= textLen - findLen; i++) + { + if (IsTextEqual(&text[i], find, findLen)) return i; + } + + return -1; +} + +// Custom memcpy() to avoid <string.h> +static void MemoryCopy(void *dest, const void *src, unsigned int count) +{ + char *srcPtr = (char *)src; + char *destPtr = (char *)dest; + + for (unsigned int i = 0; i < count; i++) destPtr[i] = srcPtr[i]; +} + +// Escape backslashes in a string, writing the escaped string into a static buffer +static char *EscapeBackslashes(char *text) +{ + static char buffer[256] = { 0 }; + + int count = 0; + + for (int i = 0; (text[i] != '\0') && (i < 255); i++, count++) + { + buffer[count] = text[i]; + + if (text[i] == '\\') + { + buffer[count + 1] = '\\'; + count++; + } + } + + buffer[count] = '\0'; + + return buffer; +} + +// Get string of define type +static const char *StrDefineType(DefineType type) +{ + switch (type) + { + case UNKNOWN: return "UNKNOWN"; + case GUARD: return "GUARD"; + case MACRO: return "MACRO"; + case INT: return "INT"; + case INT_MATH: return "INT_MATH"; + case LONG: return "LONG"; + case LONG_MATH: return "LONG_MATH"; + case FLOAT: return "FLOAT"; + case FLOAT_MATH: return "FLOAT_MATH"; + case DOUBLE: return "DOUBLE"; + case DOUBLE_MATH: return "DOUBLE_MATH"; + case CHAR: return "CHAR"; + case STRING: return "STRING"; + case COLOR: return "COLOR"; + } + return ""; +} diff --git a/tools/example-indexer/stb_c_lexer.h b/tools/example-indexer/stb_c_lexer.h new file mode 100644 index 0000000..bf89dca --- /dev/null +++ b/tools/example-indexer/stb_c_lexer.h @@ -0,0 +1,940 @@ +// stb_c_lexer.h - v0.12 - public domain Sean Barrett 2013 +// lexer for making little C-like languages with recursive-descent parsers +// +// This file provides both the interface and the implementation. +// To instantiate the implementation, +// #define STB_C_LEXER_IMPLEMENTATION +// in *ONE* source file, before #including this file. +// +// The default configuration is fairly close to a C lexer, although +// suffixes on integer constants are not handled (you can override this). +// +// History: +// 0.12 fix compilation bug for NUL support; better support separate inclusion +// 0.11 fix clang static analysis warning +// 0.10 fix warnings +// 0.09 hex floats, no-stdlib fixes +// 0.08 fix bad pointer comparison +// 0.07 fix mishandling of hexadecimal constants parsed by strtol +// 0.06 fix missing next character after ending quote mark (Andreas Fredriksson) +// 0.05 refixed get_location because github version had lost the fix +// 0.04 fix octal parsing bug +// 0.03 added STB_C_LEX_DISCARD_PREPROCESSOR option +// refactor API to simplify (only one struct instead of two) +// change literal enum names to have 'lit' at the end +// 0.02 first public release +// +// Status: +// - haven't tested compiling as C++ +// - haven't tested the float parsing path +// - haven't tested the non-default-config paths (e.g. non-stdlib) +// - only tested default-config paths by eyeballing output of self-parse +// +// - haven't implemented multiline strings +// - haven't implemented octal/hex character constants +// - haven't implemented support for unicode CLEX_char +// - need to expand error reporting so you don't just get "CLEX_parse_error" +// +// Contributors: +// Arpad Goretity (bugfix) +// Alan Hickman (hex floats) +// +// LICENSE +// +// See end of file for license information. + +#ifdef STB_C_LEXER_IMPLEMENTATION +#ifndef STB_C_LEXER_DEFINITIONS +// to change the default parsing rules, copy the following lines +// into your C/C++ file *before* including this, and then replace +// the Y's with N's for the ones you don't want. This needs to be +// set to the same values for every place in your program where +// stb_c_lexer.h is included. +// --BEGIN-- + +#if defined(Y) || defined(N) +#error "Can only use stb_c_lexer in contexts where the preprocessor symbols 'Y' and 'N' are not defined" +#endif + +#define STB_C_LEX_C_DECIMAL_INTS Y // "0|[1-9][0-9]*" CLEX_intlit +#define STB_C_LEX_C_HEX_INTS Y // "0x[0-9a-fA-F]+" CLEX_intlit +#define STB_C_LEX_C_OCTAL_INTS Y // "[0-7]+" CLEX_intlit +#define STB_C_LEX_C_DECIMAL_FLOATS Y // "[0-9]*(.[0-9]*([eE][-+]?[0-9]+)?) CLEX_floatlit +#define STB_C_LEX_C99_HEX_FLOATS N // "0x{hex}+(.{hex}*)?[pP][-+]?{hex}+ CLEX_floatlit +#define STB_C_LEX_C_IDENTIFIERS Y // "[_a-zA-Z][_a-zA-Z0-9]*" CLEX_id +#define STB_C_LEX_C_DQ_STRINGS Y // double-quote-delimited strings with escapes CLEX_dqstring +#define STB_C_LEX_C_SQ_STRINGS N // single-quote-delimited strings with escapes CLEX_ssstring +#define STB_C_LEX_C_CHARS Y // single-quote-delimited character with escape CLEX_charlits +#define STB_C_LEX_C_COMMENTS Y // "/* comment */" +#define STB_C_LEX_CPP_COMMENTS Y // "// comment to end of line\n" +#define STB_C_LEX_C_COMPARISONS Y // "==" CLEX_eq "!=" CLEX_noteq "<=" CLEX_lesseq ">=" CLEX_greatereq +#define STB_C_LEX_C_LOGICAL Y // "&&" CLEX_andand "||" CLEX_oror +#define STB_C_LEX_C_SHIFTS Y // "<<" CLEX_shl ">>" CLEX_shr +#define STB_C_LEX_C_INCREMENTS Y // "++" CLEX_plusplus "--" CLEX_minusminus +#define STB_C_LEX_C_ARROW Y // "->" CLEX_arrow +#define STB_C_LEX_EQUAL_ARROW N // "=>" CLEX_eqarrow +#define STB_C_LEX_C_BITWISEEQ Y // "&=" CLEX_andeq "|=" CLEX_oreq "^=" CLEX_xoreq +#define STB_C_LEX_C_ARITHEQ Y // "+=" CLEX_pluseq "-=" CLEX_minuseq + // "*=" CLEX_muleq "/=" CLEX_diveq "%=" CLEX_modeq + // if both STB_C_LEX_SHIFTS & STB_C_LEX_ARITHEQ: + // "<<=" CLEX_shleq ">>=" CLEX_shreq + +#define STB_C_LEX_PARSE_SUFFIXES N // letters after numbers are parsed as part of those numbers, and must be in suffix list below +#define STB_C_LEX_DECIMAL_SUFFIXES "" // decimal integer suffixes e.g. "uUlL" -- these are returned as-is in string storage +#define STB_C_LEX_HEX_SUFFIXES "" // e.g. "uUlL" +#define STB_C_LEX_OCTAL_SUFFIXES "" // e.g. "uUlL" +#define STB_C_LEX_FLOAT_SUFFIXES "" // + +#define STB_C_LEX_0_IS_EOF N // if Y, ends parsing at '\0'; if N, returns '\0' as token +#define STB_C_LEX_INTEGERS_AS_DOUBLES N // parses integers as doubles so they can be larger than 'int', but only if STB_C_LEX_STDLIB==N +#define STB_C_LEX_MULTILINE_DSTRINGS N // allow newlines in double-quoted strings +#define STB_C_LEX_MULTILINE_SSTRINGS N // allow newlines in single-quoted strings +#define STB_C_LEX_USE_STDLIB Y // use strtod,strtol for parsing #s; otherwise inaccurate hack +#define STB_C_LEX_DOLLAR_IDENTIFIER Y // allow $ as an identifier character +#define STB_C_LEX_FLOAT_NO_DECIMAL Y // allow floats that have no decimal point if they have an exponent + +#define STB_C_LEX_DEFINE_ALL_TOKEN_NAMES N // if Y, all CLEX_ token names are defined, even if never returned + // leaving it as N should help you catch config bugs + +#define STB_C_LEX_DISCARD_PREPROCESSOR Y // discard C-preprocessor directives (e.g. after prepocess + // still have #line, #pragma, etc) + +//#define STB_C_LEX_ISWHITE(str) ... // return length in bytes of whitespace characters if first char is whitespace + +#define STB_C_LEXER_DEFINITIONS // This line prevents the header file from replacing your definitions +// --END-- +#endif +#endif + +#ifndef INCLUDE_STB_C_LEXER_H +#define INCLUDE_STB_C_LEXER_H + +typedef struct +{ + // lexer variables + char *input_stream; + char *eof; + char *parse_point; + char *string_storage; + int string_storage_len; + + // lexer parse location for error messages + char *where_firstchar; + char *where_lastchar; + + // lexer token variables + long token; + double real_number; + long int_number; + char *string; + int string_len; +} stb_lexer; + +typedef struct +{ + int line_number; + int line_offset; +} stb_lex_location; + +#ifdef __cplusplus +extern "C" { +#endif + +extern void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length); +// this function initialize the 'lexer' structure +// Input: +// - input_stream points to the file to parse, loaded into memory +// - input_stream_end points to the end of the file, or NULL if you use 0-for-EOF +// - string_store is storage the lexer can use for storing parsed strings and identifiers +// - store_length is the length of that storage + +extern int stb_c_lexer_get_token(stb_lexer *lexer); +// this function returns non-zero if a token is parsed, or 0 if at EOF +// Output: +// - lexer->token is the token ID, which is unicode code point for a single-char token, < 0 for a multichar or eof or error +// - lexer->real_number is a double constant value for CLEX_floatlit, or CLEX_intlit if STB_C_LEX_INTEGERS_AS_DOUBLES +// - lexer->int_number is an integer constant for CLEX_intlit if !STB_C_LEX_INTEGERS_AS_DOUBLES, or character for CLEX_charlit +// - lexer->string is a 0-terminated string for CLEX_dqstring or CLEX_sqstring or CLEX_identifier +// - lexer->string_len is the byte length of lexer->string + +extern void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc); +// this inefficient function returns the line number and character offset of a +// given location in the file as returned by stb_lex_token. Because it's inefficient, +// you should only call it for errors, not for every token. +// For error messages of invalid tokens, you typically want the location of the start +// of the token (which caused the token to be invalid). For bugs involving legit +// tokens, you can report the first or the range. +// Output: +// - loc->line_number is the line number in the file, counting from 1, of the location +// - loc->line_offset is the char-offset in the line, counting from 0, of the location + + +#ifdef __cplusplus +} +#endif + +enum +{ + CLEX_eof = 256, + CLEX_parse_error, + CLEX_intlit , + CLEX_floatlit , + CLEX_id , + CLEX_dqstring , + CLEX_sqstring , + CLEX_charlit , + CLEX_eq , + CLEX_noteq , + CLEX_lesseq , + CLEX_greatereq , + CLEX_andand , + CLEX_oror , + CLEX_shl , + CLEX_shr , + CLEX_plusplus , + CLEX_minusminus , + CLEX_pluseq , + CLEX_minuseq , + CLEX_muleq , + CLEX_diveq , + CLEX_modeq , + CLEX_andeq , + CLEX_oreq , + CLEX_xoreq , + CLEX_arrow , + CLEX_eqarrow , + CLEX_shleq, CLEX_shreq, + + CLEX_first_unused_token + +}; +#endif // INCLUDE_STB_C_LEXER_H + +#ifdef STB_C_LEXER_IMPLEMENTATION + +// Hacky definitions so we can easily #if on them +#define Y(x) 1 +#define N(x) 0 + +#if STB_C_LEX_INTEGERS_AS_DOUBLES(x) +typedef double stb__clex_int; +#define intfield real_number +#define STB__clex_int_as_double +#else +typedef long stb__clex_int; +#define intfield int_number +#endif + +// Convert these config options to simple conditional #defines so we can more +// easily test them once we've change the meaning of Y/N + +#if STB_C_LEX_PARSE_SUFFIXES(x) +#define STB__clex_parse_suffixes +#endif + +#if STB_C_LEX_C99_HEX_FLOATS(x) +#define STB__clex_hex_floats +#endif + +#if STB_C_LEX_C_HEX_INTS(x) +#define STB__clex_hex_ints +#endif + +#if STB_C_LEX_C_DECIMAL_INTS(x) +#define STB__clex_decimal_ints +#endif + +#if STB_C_LEX_C_OCTAL_INTS(x) +#define STB__clex_octal_ints +#endif + +#if STB_C_LEX_C_DECIMAL_FLOATS(x) +#define STB__clex_decimal_floats +#endif + +#if STB_C_LEX_DISCARD_PREPROCESSOR(x) +#define STB__clex_discard_preprocessor +#endif + +#if STB_C_LEX_USE_STDLIB(x) && (!defined(STB__clex_hex_floats) || __STDC_VERSION__ >= 199901L) +#define STB__CLEX_use_stdlib +#include <stdlib.h> +#endif + +// Now for the rest of the file we'll use the basic definition where +// where Y expands to its contents and N expands to nothing +#undef Y +#define Y(a) a +#undef N +#define N(a) + +// API function +void stb_c_lexer_init(stb_lexer *lexer, const char *input_stream, const char *input_stream_end, char *string_store, int store_length) +{ + lexer->input_stream = (char *) input_stream; + lexer->eof = (char *) input_stream_end; + lexer->parse_point = (char *) input_stream; + lexer->string_storage = string_store; + lexer->string_storage_len = store_length; +} + +// API function +void stb_c_lexer_get_location(const stb_lexer *lexer, const char *where, stb_lex_location *loc) +{ + char *p = lexer->input_stream; + int line_number = 1; + int char_offset = 0; + while (*p && p < where) { + if (*p == '\n' || *p == '\r') { + p += (p[0]+p[1] == '\r'+'\n' ? 2 : 1); // skip newline + line_number += 1; + char_offset = 0; + } else { + ++p; + ++char_offset; + } + } + loc->line_number = line_number; + loc->line_offset = char_offset; +} + +// main helper function for returning a parsed token +static int stb__clex_token(stb_lexer *lexer, int token, char *start, char *end) +{ + lexer->token = token; + lexer->where_firstchar = start; + lexer->where_lastchar = end; + lexer->parse_point = end+1; + return 1; +} + +// helper function for returning eof +static int stb__clex_eof(stb_lexer *lexer) +{ + lexer->token = CLEX_eof; + return 0; +} + +static int stb__clex_iswhite(int x) +{ + return x == ' ' || x == '\t' || x == '\r' || x == '\n' || x == '\f'; +} + +static const char *stb__strchr(const char *str, int ch) +{ + for (; *str; ++str) + if (*str == ch) + return str; + return 0; +} + +// parse suffixes at the end of a number +static int stb__clex_parse_suffixes(stb_lexer *lexer, long tokenid, char *start, char *cur, const char *suffixes) +{ + #ifdef STB__clex_parse_suffixes + lexer->string = lexer->string_storage; + lexer->string_len = 0; + + while ((*cur >= 'a' && *cur <= 'z') || (*cur >= 'A' && *cur <= 'Z')) { + if (stb__strchr(suffixes, *cur) == 0) + return stb__clex_token(lexer, CLEX_parse_error, start, cur); + if (lexer->string_len+1 >= lexer->string_storage_len) + return stb__clex_token(lexer, CLEX_parse_error, start, cur); + lexer->string[lexer->string_len++] = *cur++; + } + #else + suffixes = suffixes; // attempt to suppress warnings + #endif + return stb__clex_token(lexer, tokenid, start, cur-1); +} + +#ifndef STB__CLEX_use_stdlib +static double stb__clex_pow(double base, unsigned int exponent) +{ + double value=1; + for ( ; exponent; exponent >>= 1) { + if (exponent & 1) + value *= base; + base *= base; + } + return value; +} + +static double stb__clex_parse_float(char *p, char **q) +{ + char *s = p; + double value=0; + int base=10; + int exponent=0; + +#ifdef STB__clex_hex_floats + if (*p == '0') { + if (p[1] == 'x' || p[1] == 'X') { + base=16; + p += 2; + } + } +#endif + + for (;;) { + if (*p >= '0' && *p <= '9') + value = value*base + (*p++ - '0'); +#ifdef STB__clex_hex_floats + else if (base == 16 && *p >= 'a' && *p <= 'f') + value = value*base + 10 + (*p++ - 'a'); + else if (base == 16 && *p >= 'A' && *p <= 'F') + value = value*base + 10 + (*p++ - 'A'); +#endif + else + break; + } + + if (*p == '.') { + double pow, addend = 0; + ++p; + for (pow=1; ; pow*=base) { + if (*p >= '0' && *p <= '9') + addend = addend*base + (*p++ - '0'); +#ifdef STB__clex_hex_floats + else if (base == 16 && *p >= 'a' && *p <= 'f') + addend = addend*base + 10 + (*p++ - 'a'); + else if (base == 16 && *p >= 'A' && *p <= 'F') + addend = addend*base + 10 + (*p++ - 'A'); +#endif + else + break; + } + value += addend / pow; + } +#ifdef STB__clex_hex_floats + if (base == 16) { + // exponent required for hex float literal + if (*p != 'p' && *p != 'P') { + *q = s; + return 0; + } + exponent = 1; + } else +#endif + exponent = (*p == 'e' || *p == 'E'); + + if (exponent) { + int sign = p[1] == '-'; + unsigned int exponent=0; + double power=1; + ++p; + if (*p == '-' || *p == '+') + ++p; + while (*p >= '0' && *p <= '9') + exponent = exponent*10 + (*p++ - '0'); + +#ifdef STB__clex_hex_floats + if (base == 16) + power = stb__clex_pow(2, exponent); + else +#endif + power = stb__clex_pow(10, exponent); + if (sign) + value /= power; + else + value *= power; + } + *q = p; + return value; +} +#endif + +static int stb__clex_parse_char(char *p, char **q) +{ + if (*p == '\\') { + *q = p+2; // tentatively guess we'll parse two characters + switch(p[1]) { + case '\\': return '\\'; + case '\'': return '\''; + case '"': return '"'; + case 't': return '\t'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case '0': return '\0'; // @TODO ocatal constants + case 'x': case 'X': return -1; // @TODO hex constants + case 'u': return -1; // @TODO unicode constants + } + } + *q = p+1; + return (unsigned char) *p; +} + +static int stb__clex_parse_string(stb_lexer *lexer, char *p, int type) +{ + char *start = p; + char delim = *p++; // grab the " or ' for later matching + char *out = lexer->string_storage; + char *outend = lexer->string_storage + lexer->string_storage_len; + while (*p != delim) { + int n; + if (*p == '\\') { + char *q; + n = stb__clex_parse_char(p, &q); + if (n < 0) + return stb__clex_token(lexer, CLEX_parse_error, start, q); + p = q; + } else { + // @OPTIMIZE: could speed this up by looping-while-not-backslash + n = (unsigned char) *p++; + } + if (out+1 > outend) + return stb__clex_token(lexer, CLEX_parse_error, start, p); + // @TODO expand unicode escapes to UTF8 + *out++ = (char) n; + } + *out = 0; + lexer->string = lexer->string_storage; + lexer->string_len = (int) (out - lexer->string_storage); + return stb__clex_token(lexer, type, start, p); +} + +int stb_c_lexer_get_token(stb_lexer *lexer) +{ + char *p = lexer->parse_point; + + // skip whitespace and comments + for (;;) { + #ifdef STB_C_LEX_ISWHITE + while (p != lexer->stream_end) { + int n; + n = STB_C_LEX_ISWHITE(p); + if (n == 0) break; + if (lexer->eof && lexer->eof - lexer->parse_point < n) + return stb__clex_token(tok, CLEX_parse_error, p,lexer->eof-1); + p += n; + } + #else + while (p != lexer->eof && stb__clex_iswhite(*p)) + ++p; + #endif + + STB_C_LEX_CPP_COMMENTS( + if (p != lexer->eof && p[0] == '/' && p[1] == '/') { + while (p != lexer->eof && *p != '\r' && *p != '\n') + ++p; + continue; + } + ) + + STB_C_LEX_C_COMMENTS( + if (p != lexer->eof && p[0] == '/' && p[1] == '*') { + char *start = p; + p += 2; + while (p != lexer->eof && (p[0] != '*' || p[1] != '/')) + ++p; + if (p == lexer->eof) + return stb__clex_token(lexer, CLEX_parse_error, start, p-1); + p += 2; + continue; + } + ) + + #ifdef STB__clex_discard_preprocessor + // @TODO this discards everything after a '#', regardless + // of where in the line the # is, rather than requiring it + // be at the start. (because this parser doesn't otherwise + // check for line breaks!) + if (p != lexer->eof && p[0] == '#') { + while (p != lexer->eof && *p != '\r' && *p != '\n') + ++p; + continue; + } + #endif + + break; + } + + if (p == lexer->eof) + return stb__clex_eof(lexer); + + switch (*p) { + default: + if ( (*p >= 'a' && *p <= 'z') + || (*p >= 'A' && *p <= 'Z') + || *p == '_' || (unsigned char) *p >= 128 // >= 128 is UTF8 char + STB_C_LEX_DOLLAR_IDENTIFIER( || *p == '$' ) ) + { + int n = 0; + lexer->string = lexer->string_storage; + lexer->string_len = n; + do { + if (n+1 >= lexer->string_storage_len) + return stb__clex_token(lexer, CLEX_parse_error, p, p+n); + lexer->string[n] = p[n]; + ++n; + } while ( + (p[n] >= 'a' && p[n] <= 'z') + || (p[n] >= 'A' && p[n] <= 'Z') + || (p[n] >= '0' && p[n] <= '9') // allow digits in middle of identifier + || p[n] == '_' || (unsigned char) p[n] >= 128 + STB_C_LEX_DOLLAR_IDENTIFIER( || p[n] == '$' ) + ); + lexer->string[n] = 0; + return stb__clex_token(lexer, CLEX_id, p, p+n-1); + } + + // check for EOF + STB_C_LEX_0_IS_EOF( + if (*p == 0) + return stb__clex_eof(lexer); + ) + + single_char: + // not an identifier, return the character as itself + return stb__clex_token(lexer, *p, p, p); + + case '+': + if (p+1 != lexer->eof) { + STB_C_LEX_C_INCREMENTS(if (p[1] == '+') return stb__clex_token(lexer, CLEX_plusplus, p,p+1);) + STB_C_LEX_C_ARITHEQ( if (p[1] == '=') return stb__clex_token(lexer, CLEX_pluseq , p,p+1);) + } + goto single_char; + case '-': + if (p+1 != lexer->eof) { + STB_C_LEX_C_INCREMENTS(if (p[1] == '-') return stb__clex_token(lexer, CLEX_minusminus, p,p+1);) + STB_C_LEX_C_ARITHEQ( if (p[1] == '=') return stb__clex_token(lexer, CLEX_minuseq , p,p+1);) + STB_C_LEX_C_ARROW( if (p[1] == '>') return stb__clex_token(lexer, CLEX_arrow , p,p+1);) + } + goto single_char; + case '&': + if (p+1 != lexer->eof) { + STB_C_LEX_C_LOGICAL( if (p[1] == '&') return stb__clex_token(lexer, CLEX_andand, p,p+1);) + STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_andeq , p,p+1);) + } + goto single_char; + case '|': + if (p+1 != lexer->eof) { + STB_C_LEX_C_LOGICAL( if (p[1] == '|') return stb__clex_token(lexer, CLEX_oror, p,p+1);) + STB_C_LEX_C_BITWISEEQ(if (p[1] == '=') return stb__clex_token(lexer, CLEX_oreq, p,p+1);) + } + goto single_char; + case '=': + if (p+1 != lexer->eof) { + STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_eq, p,p+1);) + STB_C_LEX_EQUAL_ARROW( if (p[1] == '>') return stb__clex_token(lexer, CLEX_eqarrow, p,p+1);) + } + goto single_char; + case '!': + STB_C_LEX_C_COMPARISONS(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_noteq, p,p+1);) + goto single_char; + case '^': + STB_C_LEX_C_BITWISEEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_xoreq, p,p+1)); + goto single_char; + case '%': + STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_modeq, p,p+1)); + goto single_char; + case '*': + STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_muleq, p,p+1)); + goto single_char; + case '/': + STB_C_LEX_C_ARITHEQ(if (p+1 != lexer->eof && p[1] == '=') return stb__clex_token(lexer, CLEX_diveq, p,p+1)); + goto single_char; + case '<': + if (p+1 != lexer->eof) { + STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_lesseq, p,p+1);) + STB_C_LEX_C_SHIFTS( if (p[1] == '<') { + STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=') + return stb__clex_token(lexer, CLEX_shleq, p,p+2);) + return stb__clex_token(lexer, CLEX_shl, p,p+1); + } + ) + } + goto single_char; + case '>': + if (p+1 != lexer->eof) { + STB_C_LEX_C_COMPARISONS(if (p[1] == '=') return stb__clex_token(lexer, CLEX_greatereq, p,p+1);) + STB_C_LEX_C_SHIFTS( if (p[1] == '>') { + STB_C_LEX_C_ARITHEQ(if (p+2 != lexer->eof && p[2] == '=') + return stb__clex_token(lexer, CLEX_shreq, p,p+2);) + return stb__clex_token(lexer, CLEX_shr, p,p+1); + } + ) + } + goto single_char; + + case '"': + STB_C_LEX_C_DQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_dqstring);) + goto single_char; + case '\'': + STB_C_LEX_C_SQ_STRINGS(return stb__clex_parse_string(lexer, p, CLEX_sqstring);) + STB_C_LEX_C_CHARS( + { + char *start = p; + lexer->int_number = stb__clex_parse_char(p+1, &p); + if (lexer->int_number < 0) + return stb__clex_token(lexer, CLEX_parse_error, start,start); + if (p == lexer->eof || *p != '\'') + return stb__clex_token(lexer, CLEX_parse_error, start,p); + return stb__clex_token(lexer, CLEX_charlit, start, p+1); + }) + goto single_char; + + case '0': + #if defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats) + if (p+1 != lexer->eof) { + if (p[1] == 'x' || p[1] == 'X') { + char *q; + + #ifdef STB__clex_hex_floats + for (q=p+2; + q != lexer->eof && ((*q >= '0' && *q <= '9') || (*q >= 'a' && *q <= 'f') || (*q >= 'A' && *q <= 'F')); + ++q); + if (q != lexer->eof) { + if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'p' || *q == 'P')) { + #ifdef STB__CLEX_use_stdlib + lexer->real_number = strtod((char *) p, (char**) &q); + #else + lexer->real_number = stb__clex_parse_float(p, &q); + #endif + + if (p == q) + return stb__clex_token(lexer, CLEX_parse_error, p,q); + return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES); + + } + } + #endif // STB__CLEX_hex_floats + + #ifdef STB__clex_hex_ints + #ifdef STB__CLEX_use_stdlib + lexer->int_number = strtol((char *) p, (char **) &q, 16); + #else + { + stb__clex_int n=0; + for (q=p+2; q != lexer->eof; ++q) { + if (*q >= '0' && *q <= '9') + n = n*16 + (*q - '0'); + else if (*q >= 'a' && *q <= 'f') + n = n*16 + (*q - 'a') + 10; + else if (*q >= 'A' && *q <= 'F') + n = n*16 + (*q - 'A') + 10; + else + break; + } + lexer->int_number = n; + } + #endif + if (q == p+2) + return stb__clex_token(lexer, CLEX_parse_error, p-2,p-1); + return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_HEX_SUFFIXES); + #endif + } + } + #endif // defined(STB__clex_hex_ints) || defined(STB__clex_hex_floats) + // can't test for octal because we might parse '0.0' as float or as '0' '.' '0', + // so have to do float first + + /* FALL THROUGH */ + case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': + + #ifdef STB__clex_decimal_floats + { + char *q = p; + while (q != lexer->eof && (*q >= '0' && *q <= '9')) + ++q; + if (q != lexer->eof) { + if (*q == '.' STB_C_LEX_FLOAT_NO_DECIMAL(|| *q == 'e' || *q == 'E')) { + #ifdef STB__CLEX_use_stdlib + lexer->real_number = strtod((char *) p, (char**) &q); + #else + lexer->real_number = stb__clex_parse_float(p, &q); + #endif + + return stb__clex_parse_suffixes(lexer, CLEX_floatlit, p,q, STB_C_LEX_FLOAT_SUFFIXES); + + } + } + } + #endif // STB__clex_decimal_floats + + #ifdef STB__clex_octal_ints + if (p[0] == '0') { + char *q = p; + #ifdef STB__CLEX_use_stdlib + lexer->int_number = strtol((char *) p, (char **) &q, 8); + #else + stb__clex_int n=0; + while (q != lexer->eof) { + if (*q >= '0' && *q <= '7') + n = n*8 + (*q - '0'); + else + break; + ++q; + } + if (q != lexer->eof && (*q == '8' || *q=='9')) + return stb__clex_token(lexer, CLEX_parse_error, p, q); + lexer->int_number = n; + #endif + return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES); + } + #endif // STB__clex_octal_ints + + #ifdef STB__clex_decimal_ints + { + char *q = p; + #ifdef STB__CLEX_use_stdlib + lexer->int_number = strtol((char *) p, (char **) &q, 10); + #else + stb__clex_int n=0; + while (q != lexer->eof) { + if (*q >= '0' && *q <= '9') + n = n*10 + (*q - '0'); + else + break; + ++q; + } + lexer->int_number = n; + #endif + return stb__clex_parse_suffixes(lexer, CLEX_intlit, p,q, STB_C_LEX_OCTAL_SUFFIXES); + } + #endif // STB__clex_decimal_ints + goto single_char; + } +} +#endif // STB_C_LEXER_IMPLEMENTATION + +#ifdef STB_C_LEXER_SELF_TEST +#define _CRT_SECURE_NO_WARNINGS +#include <stdio.h> +#include <stdlib.h> + +static void print_token(stb_lexer *lexer) +{ + switch (lexer->token) { + case CLEX_id : printf("_%s", lexer->string); break; + case CLEX_eq : printf("=="); break; + case CLEX_noteq : printf("!="); break; + case CLEX_lesseq : printf("<="); break; + case CLEX_greatereq : printf(">="); break; + case CLEX_andand : printf("&&"); break; + case CLEX_oror : printf("||"); break; + case CLEX_shl : printf("<<"); break; + case CLEX_shr : printf(">>"); break; + case CLEX_plusplus : printf("++"); break; + case CLEX_minusminus: printf("--"); break; + case CLEX_arrow : printf("->"); break; + case CLEX_andeq : printf("&="); break; + case CLEX_oreq : printf("|="); break; + case CLEX_xoreq : printf("^="); break; + case CLEX_pluseq : printf("+="); break; + case CLEX_minuseq : printf("-="); break; + case CLEX_muleq : printf("*="); break; + case CLEX_diveq : printf("/="); break; + case CLEX_modeq : printf("%%="); break; + case CLEX_shleq : printf("<<="); break; + case CLEX_shreq : printf(">>="); break; + case CLEX_eqarrow : printf("=>"); break; + case CLEX_dqstring : printf("\"%s\"", lexer->string); break; + case CLEX_sqstring : printf("'\"%s\"'", lexer->string); break; + case CLEX_charlit : printf("'%s'", lexer->string); break; + #if defined(STB__clex_int_as_double) && !defined(STB__CLEX_use_stdlib) + case CLEX_intlit : printf("#%g", lexer->real_number); break; + #else + case CLEX_intlit : printf("#%ld", lexer->int_number); break; + #endif + case CLEX_floatlit : printf("%g", lexer->real_number); break; + default: + if (lexer->token >= 0 && lexer->token < 256) + printf("%c", (int) lexer->token); + else { + printf("<<<UNKNOWN TOKEN %ld >>>\n", lexer->token); + } + break; + } +} + +/* Force a test +of parsing +multiline comments */ + +/*/ comment /*/ +/**/ extern /**/ + +void dummy(void) +{ + double some_floats[] = { + 1.0501, -10.4e12, 5E+10, +#if 0 // not supported in C++ or C-pre-99, so don't try to compile it, but let our parser test it + 0x1.0p+24, 0xff.FP-8, 0x1p-23, +#endif + 4. + }; + (void) sizeof(some_floats); + (void) some_floats[1]; + + printf("test %d",1); // https://github.com/nothings/stb/issues/13 +} + +int main(int argc, char **argv) +{ + FILE *f = fopen("stb_c_lexer.h","rb"); + char *text = (char *) malloc(1 << 20); + int len = f ? (int) fread(text, 1, 1<<20, f) : -1; + stb_lexer lex; + if (len < 0) { + fprintf(stderr, "Error opening file\n"); + free(text); + fclose(f); + return 1; + } + fclose(f); + + stb_c_lexer_init(&lex, text, text+len, (char *) malloc(0x10000), 0x10000); + while (stb_c_lexer_get_token(&lex)) { + if (lex.token == CLEX_parse_error) { + printf("\n<<<PARSE ERROR>>>\n"); + break; + } + print_token(&lex); + printf(" "); + } + return 0; +} +#endif +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License +Copyright (c) 2017 Sean Barrett +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ |
