1 #ifndef ANTLR3COLLECTIONS_H
\r
2 #define ANTLR3COLLECTIONS_H
\r
4 // [The "BSD licence"]
\r
5 // Copyright (c) 2005-2009 Jim Idle, Temporal Wave LLC
\r
6 // http://www.temporal-wave.com
\r
7 // http://www.linkedin.com/in/jimidle
\r
9 // All rights reserved.
\r
11 // Redistribution and use in source and binary forms, with or without
\r
12 // modification, are permitted provided that the following conditions
\r
14 // 1. Redistributions of source code must retain the above copyright
\r
15 // notice, this list of conditions and the following disclaimer.
\r
16 // 2. Redistributions in binary form must reproduce the above copyright
\r
17 // notice, this list of conditions and the following disclaimer in the
\r
18 // documentation and/or other materials provided with the distribution.
\r
19 // 3. The name of the author may not be used to endorse or promote products
\r
20 // derived from this software without specific prior written permission.
\r
22 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
\r
23 // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
\r
24 // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
\r
25 // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
\r
26 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
\r
27 // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
\r
28 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
\r
29 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
\r
30 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
\r
31 // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\r
33 #include <antlr3defs.h>
\r
34 #include <antlr3bitset.h>
\r
36 #define ANTLR3_HASH_TYPE_INT 0 /**< Indicates the hashed file has integer keys */
\r
37 #define ANTLR3_HASH_TYPE_STR 1 /**< Indicates the hashed file has numeric keys */
\r
43 typedef struct ANTLR3_HASH_KEY_struct
\r
45 ANTLR3_UINT8 type; /**< One of ##ANTLR3_HASH_TYPE_INT or ##ANTLR3_HASH_TYPE_STR */
\r
49 pANTLR3_UINT8 sKey; /**< Used if type is ANTLR3_HASH_TYPE_STR */
\r
50 ANTLR3_INTKEY iKey; /**< used if type is ANTLR3_HASH_TYPE_INT */
\r
54 } ANTLR3_HASH_KEY, *pANTLR3_HASH_KEY;
\r
56 /** Internal structure representing an element in a hash bucket.
\r
57 * Stores the original key so that duplicate keys can be rejected
\r
58 * if necessary, and contains function can be supported. If the hash key
\r
59 * could be unique I would have invented the perfect compression algorithm ;-)
\r
61 typedef struct ANTLR3_HASH_ENTRY_struct
\r
63 /** Key that created this particular entry
\r
65 ANTLR3_HASH_KEY keybase;
\r
67 /** Pointer to the data for this particular entry
\r
71 /** Pointer to routine that knows how to release the memory
\r
72 * structure pointed at by data. If this is NULL then we assume
\r
73 * that the data pointer does not need to be freed when the entry
\r
74 * is deleted from the table.
\r
76 void (ANTLR3_CDECL *free)(void * data);
\r
78 /** Pointer to the next entry in this bucket if there
\r
79 * is one. Sometimes different keys will hash to the same bucket (especially
\r
80 * if the number of buckets is small). We could implement dual hashing algorithms
\r
81 * to minimize this, but that seems over the top for what this is needed for.
\r
83 struct ANTLR3_HASH_ENTRY_struct * nextEntry;
\r
87 /** Internal structure of a hash table bucket, which tracks
\r
88 * all keys that hash to the same bucket.
\r
90 typedef struct ANTLR3_HASH_BUCKET_struct
\r
92 /** Pointer to the first entry in the bucket (if any, it
\r
93 * may be NULL). Duplicate entries are chained from
\r
96 pANTLR3_HASH_ENTRY entries;
\r
101 /** Structure that tracks a hash table
\r
103 typedef struct ANTLR3_HASH_TABLE_struct
\r
105 /** Indicates whether the table allows duplicate keys
\r
109 /** Number of buckets available in this table
\r
111 ANTLR3_UINT32 modulo;
\r
113 /** Points to the memory where the array of buckets
\r
116 pANTLR3_HASH_BUCKET buckets;
\r
118 /** How many elements currently exist in the table.
\r
120 ANTLR3_UINT32 count;
\r
122 /** Whether the hash table should strdup the keys it is given or not.
\r
124 ANTLR3_BOOLEAN doStrdup;
\r
126 /** Pointer to function to completely delete this table
\r
128 void (*free) (struct ANTLR3_HASH_TABLE_struct * table);
\r
130 /* String keyed hashtable functions */
\r
131 void (*del) (struct ANTLR3_HASH_TABLE_struct * table, void * key);
\r
132 pANTLR3_HASH_ENTRY (*remove) (struct ANTLR3_HASH_TABLE_struct * table, void * key);
\r
133 void * (*get) (struct ANTLR3_HASH_TABLE_struct * table, void * key);
\r
134 ANTLR3_INT32 (*put) (struct ANTLR3_HASH_TABLE_struct * table, void * key, void * element, void (ANTLR3_CDECL *freeptr)(void *));
\r
136 /* Integer based hash functions */
\r
137 void (*delI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key);
\r
138 pANTLR3_HASH_ENTRY (*removeI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key);
\r
139 void * (*getI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key);
\r
140 ANTLR3_INT32 (*putI) (struct ANTLR3_HASH_TABLE_struct * table, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *));
\r
142 ANTLR3_UINT32 (*size) (struct ANTLR3_HASH_TABLE_struct * table);
\r
147 /** Internal structure representing an enumeration of a table.
\r
148 * This is returned by antlr3Enumeration()
\r
149 * Allows the programmer to traverse the table in hash order without
\r
150 * knowing what is in the actual table.
\r
152 * Note that it is up to the caller to ensure that the table
\r
153 * structure does not change in the hash bucket that is currently being
\r
154 * enumerated as this structure just tracks the next pointers in the
\r
157 typedef struct ANTLR3_HASH_ENUM_struct
\r
159 /* Pointer to the table we are enumerating
\r
161 pANTLR3_HASH_TABLE table;
\r
163 /* Bucket we are currently enumerating (if NULL then we are done)
\r
165 ANTLR3_UINT32 bucket;
\r
167 /* Next entry to return, if NULL, then move to next bucket if any
\r
169 pANTLR3_HASH_ENTRY entry;
\r
173 int (*next) (struct ANTLR3_HASH_ENUM_struct * en, pANTLR3_HASH_KEY *key, void ** data);
\r
174 void (*free) (struct ANTLR3_HASH_ENUM_struct * table);
\r
178 /** Structure that represents a LIST collection
\r
180 typedef struct ANTLR3_LIST_struct
\r
182 /** Hash table that is storing the list elements
\r
184 pANTLR3_HASH_TABLE table;
\r
186 void (*free) (struct ANTLR3_LIST_struct * list);
\r
187 void (*del) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key);
\r
188 void * (*get) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key);
\r
189 void * (*remove) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key);
\r
190 ANTLR3_INT32 (*add) (struct ANTLR3_LIST_struct * list, void * element, void (ANTLR3_CDECL *freeptr)(void *));
\r
191 ANTLR3_INT32 (*put) (struct ANTLR3_LIST_struct * list, ANTLR3_INTKEY key, void * element, void (ANTLR3_CDECL *freeptr)(void *));
\r
192 ANTLR3_UINT32 (*size) (struct ANTLR3_LIST_struct * list);
\r
197 /** Structure that represents a Stack collection
\r
199 typedef struct ANTLR3_STACK_struct
\r
201 /** List that supports the stack structure
\r
203 pANTLR3_VECTOR vector;
\r
205 /** Used for quick access to the top of the stack
\r
208 void (*free) (struct ANTLR3_STACK_struct * stack);
\r
209 void * (*pop) (struct ANTLR3_STACK_struct * stack);
\r
210 void * (*get) (struct ANTLR3_STACK_struct * stack, ANTLR3_INTKEY key);
\r
211 ANTLR3_BOOLEAN (*push) (struct ANTLR3_STACK_struct * stack, void * element, void (ANTLR3_CDECL *freeptr)(void *));
\r
212 ANTLR3_UINT32 (*size) (struct ANTLR3_STACK_struct * stack);
\r
213 void * (*peek) (struct ANTLR3_STACK_struct * stack);
\r
218 /* Structure that represents a vector element
\r
220 typedef struct ANTLR3_VECTOR_ELEMENT_struct
\r
223 void (ANTLR3_CDECL *freeptr)(void *);
\r
225 ANTLR3_VECTOR_ELEMENT, *pANTLR3_VECTOR_ELEMENT;
\r
227 #define ANTLR3_VECTOR_INTERNAL_SIZE 16
\r
228 /* Structure that represents a vector collection. A vector is a simple list
\r
229 * that contains a pointer to the element and a pointer to a function that
\r
230 * that can free the element if it is removed. It auto resizes but does not
\r
231 * use hash techniques as it is referenced by a simple numeric index. It is not a
\r
232 * sparse list, so if any element is deleted, then the ones following are moved
\r
233 * down in memory and the count is adjusted.
\r
235 typedef struct ANTLR3_VECTOR_struct
\r
237 /** Array of pointers to vector elements
\r
239 pANTLR3_VECTOR_ELEMENT elements;
\r
241 /** Number of entries currently in the list;
\r
243 ANTLR3_UINT32 count;
\r
245 /** Many times, a vector holds just a few nodes in an AST and it
\r
246 * is too much overhead to malloc the space for elements so
\r
247 * at the expense of a few bytes of memory, we hold the first
\r
248 * few elements internally. It means we must copy them when
\r
249 * we grow beyond this initial size, but that is less overhead than
\r
250 * the malloc/free callas we would otherwise require.
\r
252 ANTLR3_VECTOR_ELEMENT internal[ANTLR3_VECTOR_INTERNAL_SIZE];
\r
254 /** Indicates if the structure was made by a factory, in which
\r
255 * case only the factory can free the memory for the actual vector,
\r
256 * though the vector free function is called and will recurse through its
\r
257 * entries calling any free pointers for each entry.
\r
259 ANTLR3_BOOLEAN factoryMade;
\r
261 /** Total number of entries in elements at any point in time
\r
263 ANTLR3_UINT32 elementsSize;
\r
265 void (ANTLR3_CDECL *free) (struct ANTLR3_VECTOR_struct * vector);
\r
266 void (*del) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry);
\r
267 void * (*get) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry);
\r
268 void * (*remove) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry);
\r
269 void (*clear) (struct ANTLR3_VECTOR_struct * vector);
\r
270 ANTLR3_BOOLEAN (*swap) (struct ANTLR3_VECTOR_struct *, ANTLR3_UINT32 entry1, ANTLR3_UINT32 entry2);
\r
271 ANTLR3_UINT32 (*add) (struct ANTLR3_VECTOR_struct * vector, void * element, void (ANTLR3_CDECL *freeptr)(void *));
\r
272 ANTLR3_UINT32 (*set) (struct ANTLR3_VECTOR_struct * vector, ANTLR3_UINT32 entry, void * element, void (ANTLR3_CDECL *freeptr)(void *), ANTLR3_BOOLEAN freeExisting);
\r
273 ANTLR3_UINT32 (*size) (struct ANTLR3_VECTOR_struct * vector);
\r
277 /** Default vector pool size if otherwise unspecified
\r
279 #define ANTLR3_FACTORY_VPOOL_SIZE 256
\r
281 /** Structure that tracks vectors in a vector and auto deletes the vectors
\r
282 * in the vector factory when closed.
\r
284 typedef struct ANTLR3_VECTOR_FACTORY_struct
\r
287 /** List of all vector pools allocated so far
\r
289 pANTLR3_VECTOR *pools;
\r
291 /** Count of the vector pools allocated so far (current active pool)
\r
293 ANTLR3_INT32 thisPool;
\r
295 /** The next vector available in the pool
\r
297 ANTLR3_UINT32 nextVector;
\r
299 /** Trick to quickly initialize a new vector via memcpy and not a function call
\r
301 ANTLR3_VECTOR unTruc;
\r
303 /** Consumers from the factory can release a factory produced vector
\r
304 * back to the factory so that it may be reused (and thus conserve memory)
\r
305 * by another caller. The available vectors are stored here. Note that
\r
306 * the only vectors avaible in the free chain are produced by this factory, so they
\r
307 * need not be explicitly freed when the factory is closed.
\r
309 pANTLR3_STACK freeStack;
\r
311 /** Function to close the vector factory
\r
313 void (*close) (struct ANTLR3_VECTOR_FACTORY_struct * factory);
\r
315 /** Function to supply a new vector
\r
317 pANTLR3_VECTOR (*newVector) (struct ANTLR3_VECTOR_FACTORY_struct * factory);
\r
319 /// Function to return a vector to the factory for reuse
\r
321 void (*returnVector) (struct ANTLR3_VECTOR_FACTORY_struct * factory, pANTLR3_VECTOR vector);
\r
324 ANTLR3_VECTOR_FACTORY;
\r
327 /* -------------- TRIE Interfaces ---------------- */
\r
330 /** Structure that holds the payload entry in an ANTLR3_INT_TRIE or ANTLR3_STRING_TRIE
\r
332 typedef struct ANTLR3_TRIE_ENTRY_struct
\r
334 ANTLR3_UINT32 type;
\r
335 void (ANTLR3_CDECL *freeptr)(void *);
\r
338 ANTLR3_INTKEY intVal;
\r
342 struct ANTLR3_TRIE_ENTRY_struct * next; /* Allows duplicate entries for same key in insertion order */
\r
344 ANTLR3_TRIE_ENTRY, * pANTLR3_TRIE_ENTRY;
\r
347 /** Structure that defines an element/node in an ANTLR3_INT_TRIE
\r
349 typedef struct ANTLR3_INT_TRIE_NODE_struct
\r
351 ANTLR3_UINT32 bitNum; /**< This is the left/right bit index for traversal along the nodes */
\r
352 ANTLR3_INTKEY key; /**< This is the actual key that the entry represents if it is a terminal node */
\r
353 pANTLR3_TRIE_ENTRY buckets; /**< This is the data bucket(s) that the key indexes, which may be NULL */
\r
354 struct ANTLR3_INT_TRIE_NODE_struct * leftN; /**< Pointer to the left node from here when sKey & bitNum = 0 */
\r
355 struct ANTLR3_INT_TRIE_NODE_struct * rightN; /**< Pointer to the right node from here when sKey & bitNum, = 1 */
\r
357 ANTLR3_INT_TRIE_NODE, * pANTLR3_INT_TRIE_NODE;
\r
359 /** Structure that defines an ANTLR3_INT_TRIE. For this particular implementation,
\r
360 * as you might expect, the key is turned into a "string" by looking at bit(key, depth)
\r
361 * of the integer key. Using 64 bit keys gives us a depth limit of 64 (or bit 0..63)
\r
362 * and potentially a huge trie. This is the algorithm for a Patricia Trie.
\r
363 * Note also that this trie [can] accept multiple entries for the same key and is
\r
364 * therefore a kind of elastic bucket patricia trie.
\r
366 * If you find this code useful, please feel free to 'steal' it for any purpose
\r
367 * as covered by the BSD license under which ANTLR is issued. You can cut the code
\r
368 * but as the ANTLR library is only about 50K (Windows Vista), you might find it
\r
369 * easier to just link the library. Please keep all comments and licenses and so on
\r
370 * in any version of this you create of course.
\r
375 typedef struct ANTLR3_INT_TRIE_struct
\r
377 pANTLR3_INT_TRIE_NODE root; /* Root node of this integer trie */
\r
378 pANTLR3_INT_TRIE_NODE current; /* Used to traverse the TRIE with the next() method */
\r
379 ANTLR3_UINT32 count; /* Current entry count */
\r
380 ANTLR3_BOOLEAN allowDups; /* Whether this trie accepts duplicate keys */
\r
383 pANTLR3_TRIE_ENTRY (*get) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key);
\r
384 ANTLR3_BOOLEAN (*del) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key);
\r
385 ANTLR3_BOOLEAN (*add) (struct ANTLR3_INT_TRIE_struct * trie, ANTLR3_INTKEY key, ANTLR3_UINT32 type, ANTLR3_INTKEY intVal, void * data, void (ANTLR3_CDECL *freeptr)(void *));
\r
386 void (*free) (struct ANTLR3_INT_TRIE_struct * trie);
\r
392 * A topological sort system that given a set of dependencies of a node m on node n,
\r
393 * can sort them in dependency order. This is a generally useful utility object
\r
394 * that does not care what the things are it is sorting. Generally the set
\r
395 * to be sorted will be numeric indexes into some other structure such as an ANTLR3_VECTOR.
\r
396 * I have provided a sort method that given ANTLR3_VECTOR as an input will sort
\r
397 * the vector entries in place, as well as a sort method that just returns an
\r
398 * array of the sorted noded indexes, in case you are not sorting ANTLR3_VECTORS but
\r
399 * some set of your own device.
\r
401 * Of the two main algorithms that could be used, I chose to use the depth first
\r
402 * search for unvisited nodes as a) This runs in linear time, and b) it is what
\r
403 * we used in the ANTLR Tool to perform a topological sort of the input grammar files
\r
404 * based on their dependencies.
\r
406 typedef struct ANTLR3_TOPO_struct
\r
409 * A vector of vectors of edges, built by calling the addEdge method()
\r
410 * to indicate that node number n depends on node number m. Each entry in the vector
\r
411 * contains a bitset, which has a bit index set for each node upon which the
\r
412 * entry node depends.
\r
414 pANTLR3_BITSET * edges;
\r
417 * A vector used to build up the sorted output order. Note that
\r
418 * as the vector contains UINT32 then the maximum node index is
\r
419 * 'limited' to 2^32, as nodes should be zero based.
\r
421 pANTLR3_UINT32 sorted;
\r
424 * A vector used to detect cycles in the edge dependecies. It is used
\r
425 * as a stack and each time we descend a node to one of its edges we
\r
426 * add the node into this stack. If we find a node that we have already
\r
427 * visited in the stack, then it means there wasa cycle such as 9->8->1->9
\r
428 * as the only way a node can be on the stack is if we are currently
\r
429 * descnding from it as we remove it from the stack as we exit from
\r
430 * descending its dependencies
\r
432 pANTLR3_UINT32 cycle;
\r
435 * A flag that indicates the algorithm found a cycle in the edges
\r
436 * such as 9->8->1->9
\r
437 * If this flag is set after you have called one of the sort routines
\r
438 * then the detected cycle will be contained in the cycle array and
\r
439 * cycleLimit will point to the one after the last entry in the cycle.
\r
441 ANTLR3_BOOLEAN hasCycle;
\r
444 * A watermark used to accumulate potential cycles in the cycle array.
\r
445 * This should be zero when we are done. Check hasCycle after calling one
\r
446 * of the sort methods and if it is ANTLR3_TRUE then you can find the cycle
\r
447 * in cycle[0]...cycle[cycleMark-1]
\r
449 ANTLR3_UINT32 cycleMark;
\r
452 * One more than the largest node index that is contained in edges/sorted.
\r
454 ANTLR3_UINT32 limit;
\r
457 * The set of visited nodes as determined by a set entry in
\r
460 pANTLR3_BITSET visited;
\r
463 * A method that adds an edge from one node to another. An edge
\r
464 * of n -> m indicates that node n is dependent on node m. Note that
\r
465 * while building these edges, it is perfectly OK to add nodes out of
\r
466 * sequence. So, if you have edges:
\r
472 * The you can add them in that order and so add node 3 before nodes 2 and 1
\r
475 void (*addEdge) (struct ANTLR3_TOPO_struct * topo, ANTLR3_UINT32 edge, ANTLR3_UINT32 dependency);
\r
479 * A method that returns a pointer to an array of sorted node indexes.
\r
480 * The array is sorted in topological sorted order. Note that the array
\r
481 * is only as large as the largest node index you created an edge for. This means
\r
482 * that if you had an input of 32 nodes, but that largest node with an edge
\r
483 * was 16, then the returned array will be the sorted order of the first 16
\r
484 * nodes and the last 16 nodes of your array are basically fine as they are
\r
485 * as they had no dependencies and do not need any particular sort order.
\r
487 * NB: If the structure that contains the array is freed, then the sorted
\r
488 * array will be freed too so you should use the value of limit to
\r
489 * make a long term copy of this array if you do not want to keep the topo
\r
490 * structure around as well.
\r
492 pANTLR3_UINT32 (*sortToArray) (struct ANTLR3_TOPO_struct * topo);
\r
495 * A method that sorts the supplied ANTLR3_VECTOR in place based
\r
496 * on the previously supplied edge data.
\r
498 void (*sortVector) (struct ANTLR3_TOPO_struct * topo, pANTLR3_VECTOR v);
\r
501 * A method to free this structure and any associated memory.
\r
503 void (*free) (struct ANTLR3_TOPO_struct * topo);
\r