00001 /******************************************************************************* 00002 00003 Copyright (C) 2007 by Brendon Costa 00004 00005 This library is free software; you can redistribute it and/or modify 00006 it under the terms of the "LGPL Like" License defined in the file COPYING 00007 that should have been distributed along with this source. 00008 00009 This library is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 00012 00013 You should have received a copy of the "LGPL Like" License 00014 along with this library; see the file COPYING. if not, it can be 00015 obtained from the EDoc++ website: 00016 http://edoc.sourceforge.net/license.html 00017 00018 *******************************************************************************/ 00019 #ifndef EDOC_DICTIONARY_H 00020 #define EDOC_DICTIONARY_H 00021 00022 #include "EDoc/PropogatingException.h" 00023 #include "EDoc/exceptions.h" 00024 #include "EDoc/ProgressIFace.h" 00025 #include "EDoc/StringIdentifiedObject.h" 00026 #include "EDoc/TranslationUnit.h" 00027 #include "EDoc/SpecificManagedObject.h" 00028 00029 #include <string> 00030 #include <map> 00031 #include <ostream> 00032 #include <vector> 00033 #include <list> 00034 #include <sstream> 00035 00036 00037 00038 namespace EDoc 00039 { 00040 class PersistenceIFace; 00041 class Type; 00042 class Function; 00043 class File; 00044 class CodeBlock; 00045 class CatchBlock; 00046 class TryBlock; 00047 class FunctionType; 00048 00049 //=========================================================================== 00050 /** \brief Represents one or more merged translation units. 00051 * 00052 * The Dictionary class is the main class of the system. Dictionary objects 00053 * have 1:1 mappings with .edc files. You will read a .edc file into a 00054 * dictionary instance or write one from a Dictionary instance. 00055 * 00056 * Most objects belong to a specific dictionary. This is important for 00057 * correct memory management. It is not possible to assign pointers to 00058 * DictionarySpecific objects between different dictionary instances. 00059 * 00060 * The main object types directly stored in a Dictionary include: 00061 * 00062 * - File : Represents a source file used in compiling some 00063 * code (Headers too). 00064 * - Type : Represents a C/C++ type like integers, floats, classes etc. 00065 * - FunctionType : This is used to represent C/C++ function types. They 00066 * are stored seperate from normal types as they are "conglomerate" 00067 * types made up from a number of Type objects. 00068 * - Function : These represent individual functions in a compiled binary. 00069 * . 00070 * 00071 * These above object types are specially managed. In order to simplify the 00072 * memory allocation and handling of these objects, we have made them all 00073 * derive from the StringIdentifiedObject class. Each of these objects is 00074 * identified by a unique string. For example a Function is identified by the 00075 * string for the name used as its link name which you would see using nm on 00076 * a binary file. 00077 * 00078 * It is helpful to note that while these objects are all identified by 00079 * unique string keys, in the .edc files they are identified by integer 00080 * indexes. This makes the loading procedure become a two stage procedure 00081 * and the IndexedDictionary is used to perform the initial load using 00082 * indexes. Using integer indexes in the .edc files was done to decrease 00083 * loading/saving times and to decrease file size. See Read() and Write() for 00084 * more information. 00085 * 00086 * Most objects in EDoc are DictionarySpecific objects. This means that 00087 * they are associated with a particular dictionary. These object can NOT be 00088 * referenced from objects within a different dictionary but must be 00089 * duplicated to the other dictionary first before they can be referenced by 00090 * any objects in the other dictionary. This can often be achieved using code 00091 * like follows: 00092 * 00093 * \code 00094 * void Func(Type* other_dict_type) 00095 * { 00096 * // Will retrieve or create the type from this dictionary with the 00097 * // same string identifier as the type from the other dictionary. 00098 * // However the type will be "empty" if it did not exist in this 00099 * // dictionary to start with. 00100 * Type* this_dict_type = this_dict.types.AlwaysGet(*other_dict_type); 00101 * } 00102 * \endcode 00103 * 00104 * If the type is not complete then its IsPopulated() will return 00105 * false. In which case you will need to populate it using the assignment 00106 * operator or some other means. Also note however that when populating it 00107 * the type may also referr to other "un-populated" objects that you will 00108 * need to populate. 00109 * 00110 * As you can see it is not easy to move or copy objects from one dictionary 00111 * to another. Most usages of the EDoc however will not require the user to 00112 * do anything like this anyhow as all the assigning/merging etc is done by 00113 * the procedure of: 00114 * 00115 * dict1.Read(...); 00116 * dict2.Read(...); 00117 * dict1.Merge(dict2); 00118 * 00119 * This will read two .edc files into two seperate dictionaries, and then 00120 * "merge" the two dictionaries. A merge is kind of like linking 00121 * objects/libraries together in the development of an application. 00122 * 00123 * Temporary Dictionary Staging Area 00124 * 00125 * When using the EDoc python module, you will probably make use of the 00126 * temporary dictionary staging area. This is a static pointer to a "current" 00127 * working dictionary. This will generally be used for communication between 00128 * the python and C++ code. In particular the python wrappers require the use 00129 * of a default constructor for most data types due to a limitation (OR I 00130 * JUST DONT KNOW HOW TO USE IT PROPERLY YET) of Swig. Since most objects 00131 * require that the dictionary that they will belong to be passed upon 00132 * construction time, this means that there is no real possibility of having 00133 * a default constructor. Instead now, if the default constructor is used, 00134 * all DictionarySpecific objects will use the dictionary that can be 00135 * currently retrieved using GetCurrentDictionaryAssert() 00136 * 00137 * See GetCurrentDictionary() for more details. 00138 */ 00139 class Dictionary 00140 { 00141 public: 00142 00143 //------------------------------------------------------------------------ 00144 /** \brief Constructor. 00145 * 00146 * Note: Will create a single File object called UNKNOWN_FILE and also 00147 * a single location object called UNKNOWN_LOCATION. These will exist in 00148 * all dictionaries. 00149 */ 00150 Dictionary(); 00151 00152 //------------------------------------------------------------------------ 00153 /** \brief Erases all memory for all objects that are specific to this 00154 * dictionary. 00155 * 00156 * This will erase all DictionarySpecific objects owned 00157 * by this dictionary. 00158 */ 00159 ~Dictionary(); 00160 00161 //------------------------------------------------------------------------ 00162 /** \brief Reads the contents of a .edc file into this dictionary 00163 * instance. 00164 * 00165 * This will replace the contents of this dictionary with the contents of 00166 * the .edc file. All old state of this dictionary will be lost. 00167 * 00168 * The process of reading .edc files is quite involved. It is mostly made 00169 * so by certain limitations placed on it by the GCC modification and also 00170 * due to the design decision of storing references to objects in the .edc 00171 * file using integer indexes instead of string keys as are using in the 00172 * runtime objects. 00173 * 00174 * When a read operation occurs the following sequence is followed: 00175 * -# Erase all current data from the dictionary. 00176 * -# Construct an Indexed dictionary used to associate integer index 00177 * references with object instances. 00178 * -# Read the contents of the .edc file header. This includes info 00179 * like translation units referenced in .edc file, version 00180 * information etc. 00181 * -# Read all data records. 00182 * -# Validate that all read data objects have been inserted into the 00183 * dictionary. 00184 * . 00185 * 00186 * The IndexedDictionary is used to associated particular instances with 00187 * indexes as they are read from the file. 00188 * 00189 * reading of data objects occurs in a a set number of stages. These 00190 * stages correspond to the indexes of items in the managed objects 00191 * vector. I.e. 00192 * 00193 * -# Stage 0: File objects : GetManagedObjectsVector() index 0 00194 * -# Stage 1: Type objects : GetManagedObjectsVector() index 1 00195 * -# Stage 2: FunctionType objects : GetManagedObjectsVector() index 2 00196 * -# Stage 3: Function objects : GetManagedObjectsVector() index 3 00197 * . 00198 * 00199 * Doing the stages this way GREATLY simplifies the reading code as it is 00200 * the same for each data object type except the index to the vector 00201 * changes as you move from one stage to the next. 00202 * 00203 * This places the requirement on the .edc files that all items must be 00204 * saved to the file in the given order: File, Type, FunctionType, 00205 * Function 00206 */ 00207 void Read(PersistenceIFace& file); 00208 00209 //------------------------------------------------------------------------ 00210 /** \brief Writes the contents of this dictionary to a .edc file. 00211 * 00212 * Writing is a simple task in comparison to reading. It simply assigns 00213 * an integer index to each object before writing. Writes all objects and 00214 * then resets the integer indexes to -1. 00215 */ 00216 void Write(PersistenceIFace& file) const; 00217 00218 //------------------------------------------------------------------------ 00219 /** \brief Merges the contents of another dictionary into this dictionary. 00220 * 00221 * This is similar to linking objects together. It will resolve calls to 00222 * un-implemented functions with the correct implementations from other 00223 * translation units. It will produce errors if multiple implementations 00224 * are found of a single function (If they are different so we dont emit 00225 * errors for "vaguely" linked functions). 00226 * 00227 * The result is a modified Dictionary that is the union of the two. 00228 */ 00229 void Merge(const Dictionary& right); 00230 00231 //------------------------------------------------------------------------ 00232 /** \brief Replaces all references of pointer remove with pointer replace. 00233 * 00234 * This is used during the loading process to remove duplicate objects 00235 * from all dictionary specific objects and replace them with a single 00236 * "reference" object. After calling this function the user can be 00237 * guarenteed that this object will no longer contain any references to 00238 * "remove". 00239 * 00240 * When used on a dictionary this will attempt to replace all references 00241 * to the pointer remove over the entire Dictionary instance andall its 00242 * children. 00243 * 00244 * \param stack Is a processing stack used to ensure that recursive 00245 * associations do not form endless loops of replacing. 00246 * 00247 * \param remove Is a pointer value that we wish to replace. 00248 * 00249 * \param replace Is a pointer value to replace all instances of remove 00250 * with. 00251 * 00252 * \return Returns the number of occurences of remove that were replaced 00253 * with replace. 00254 */ 00255 size_t ReplaceReferences(PStack& stack, void* remove, void* replace); 00256 00257 //------------------------------------------------------------------------ 00258 /** \brief Prints the contents of this object to the given stream in a 00259 * user readable manner. 00260 * 00261 * \param out The output stream to print to. 00262 * 00263 * \param prefix A prefix to print before every line displayed. Usually 00264 * used to provide indenting when printing a heirarchy of objects. 00265 */ 00266 std::ostream& Print(std::ostream& out, std::string prefix="") const; 00267 00268 //------------------------------------------------------------------------ 00269 /** \brief Returns a string with the same contents as would be displayed 00270 * by the Print() function. 00271 */ 00272 std::string ToString(std::string prefix="") const; 00273 00274 //------------------------------------------------------------------------ 00275 /** \brief Performs internal validation of the Dictionary object and all 00276 * its data. 00277 * 00278 * This will raise a BugException if internal validation fails andshould 00279 * only be used for debugging purposes. It is quite slow. 00280 */ 00281 void Validate() const; 00282 00283 00284 //------------------------------------------------------------------------ 00285 // Calculation of propogating exceptions. 00286 //------------------------------------------------------------------------ 00287 00288 //------------------------------------------------------------------------ 00289 /** \brief Expands FunctionPointer calls and Virtual function calls into 00290 * a list of "possible" function calls. 00291 * 00292 * This results in a "more than complete" call-graph. 00293 * 00294 * \see See CalculatePropogatingExceptions() for information about 00295 * this group of functions. 00296 */ 00297 void ExpandCallGraph(); 00298 00299 //------------------------------------------------------------------------ 00300 /** \brief Erases all propogating exception data from all functions. 00301 * 00302 * This must be called before calculating the propogating exception 00303 * information to ensure it starts with a clean slate. 00304 * 00305 * \see See CalculatePropogatingExceptions() for information about 00306 * this group of functions. 00307 */ 00308 void ErasePropogatingExceptions(); 00309 00310 //------------------------------------------------------------------------ 00311 /** \brief Analyses the callgraph of all functions ordering them 00312 * by complexity and marking circular callgraphs. 00313 * 00314 * This will mark the complexity of a function by setting: 00315 * Function::total_calls to the number of unique functions called. 00316 * It will also identify circular callgraphs between a group of functions 00317 * and if such exists will create the member: Function::circular 00318 * with a set of the functions in the circular graph. 00319 * 00320 * Finally it will return a vector of all the functions sorted in 00321 * order of their Function::total_calls member. This is used to process 00322 * the functions in order when calculating the propogating exceptions 00323 * In particular it is dynamic programming in action. By calculating the 00324 * least complex functions first, they are then available to be used 00325 * in the more complex functions later. I.e. The processing the functions 00326 * in this order means that we dont need to "re-calculate" information. 00327 * 00328 * 00329 * \see See CalculatePropogatingExceptions() for information about 00330 * this group of functions. 00331 */ 00332 void CalculateCallComplexity(std::vector <Function*>& funcs); 00333 00334 //------------------------------------------------------------------------ 00335 /** \brief Calculates the propogating exceptions for all functions 00336 * provided in the vector sorted which must be in order of the call 00337 * complexity. 00338 * 00339 * This function asserts that the vector is provided in the correct order. 00340 * 00341 * To calculate the propogating exceptions for a dictionary 00342 * the following functions must be called in order: 00343 * 00344 * -# ExpandCallGraph() 00345 * -# ErasePropogatingExceptions() 00346 * -# CalculateCallComplexity() 00347 * -# CalculatePropogatingExceptions() 00348 * . 00349 * 00350 * This function makes use of: 00351 * - GenerateExceptions() 00352 * - HandleCircularCallgraph( 00353 * - PostProcessExceptions() 00354 * - ProcessFunction() 00355 * - ProcessCodeBlock() 00356 * - ProcessTryBlock() 00357 * . 00358 * 00359 * \todo Document the algorithm used for this operation. 00360 */ 00361 void CalculatePropogatingExceptions(std::vector <Function*>& sorted, 00362 FunctionProgressIFace* progress = NULL); 00363 00364 00365 //------------------------------------------------------------------------ 00366 /** \brief Returns the translation unit with the given index. 00367 */ 00368 TranslationUnit* GetIndexedTranslationUnit(int32_t index); 00369 00370 //------------------------------------------------------------------------ 00371 /** \brief Returns the index of the provided translation unit. 00372 * 00373 * NOTE: Will throw a BugException if the translation unit is not in this 00374 * dictionaries list of translation units. 00375 */ 00376 int32_t GetTranslationUnitIndex(TranslationUnit* tu); 00377 00378 //------------------------------------------------------------------------ 00379 /** \brief Returns a list containing a reference to the ManagedObject 00380 * types being managed by this dictionary. 00381 * 00382 * This includes all File, Type, FunctionType and Function instances. 00383 */ 00384 inline std::list<ManagedObjectPtr> GetManagedObjects() 00385 { 00386 // NOTE: The order of this list is important and MUST be the same order 00387 // in which we expect to load the various record types from the file. 00388 std::list<ManagedObjectPtr> ret; 00389 ret.push_back(&files); 00390 ret.push_back(&types); 00391 ret.push_back(&function_types); 00392 ret.push_back(&functions); 00393 return ret; 00394 } 00395 00396 //------------------------------------------------------------------------ 00397 /** \brief See GetManagedObjects() 00398 */ 00399 inline std::vector<ManagedObjectPtr> GetManagedObjectsVector() 00400 { 00401 // NOTE: The order of this list is important and MUST be the same order 00402 // in which we expect to load the various record types from the file. 00403 std::vector<ManagedObjectPtr> ret; 00404 ret.push_back(&files); 00405 ret.push_back(&types); 00406 ret.push_back(&function_types); 00407 ret.push_back(&functions); 00408 return ret; 00409 } 00410 00411 //------------------------------------------------------------------------ 00412 /** \brief See GetManagedObjects() 00413 */ 00414 inline std::vector<const ManagedObject*> GetManagedObjectsVector() const 00415 { 00416 // NOTE: The order of this list is important and MUST be the same order 00417 // in which we expect to load the various record types from the file. 00418 std::vector<const ManagedObject*> ret; 00419 ret.push_back(&files); 00420 ret.push_back(&types); 00421 ret.push_back(&function_types); 00422 ret.push_back(&functions); 00423 return ret; 00424 } 00425 00426 00427 00428 00429 //------------------------------------------------------------------------ 00430 // Public Data 00431 //------------------------------------------------------------------------ 00432 00433 //------------------------------------------------------------------------ 00434 /** \brief Every dictionary contains an "Unknown File" pointer that is 00435 * used for source files we could not determine. 00436 */ 00437 const File* UNKNOWN_FILE; 00438 00439 //------------------------------------------------------------------------ 00440 /** \brief Every dictionary contains an "Unknown File" pointer that is 00441 * used for source locations we could not determine in the GCC MOD. 00442 */ 00443 const Location* UNKNOWN_LOCATION; 00444 00445 //------------------------------------------------------------------------ 00446 /** \brief Substitution exceptions type. 00447 */ 00448 const Type* SUBSTITUTION_EXCEPTION_TYPE; 00449 00450 SpecificManagedObject<File> files; 00451 SpecificManagedObject<Type> types; 00452 SpecificManagedObject<FunctionType> function_types; 00453 SpecificManagedObject<Function> functions; 00454 00455 //------------------------------------------------------------------------ 00456 /** \brief A vector of all translation units that have been merged into 00457 * this dictionary. 00458 */ 00459 std::vector<TranslationUnit*> translation_units; 00460 00461 //------------------------------------------------------------------------ 00462 00463 protected: 00464 00465 00466 //------------------------------------------------------------------------ 00467 /** \brief Erases all data associated with this dictionary instance. 00468 */ 00469 void EraseAll(); 00470 00471 //------------------------------------------------------------------------ 00472 /** \brief Generates default set of data required by all dictionaries. 00473 */ 00474 void GeneratePredefined(); 00475 00476 //------------------------------------------------------------------------ 00477 /** \brief Process the given function and functions it calls to generate 00478 * the list of exceptions that may be emitted by it. 00479 * 00480 * This does not return a completed list of propogating exceptions 00481 * in the case where the function participates in a circular callgraph. 00482 * In this case the list of exceptions will include one or more exceptions 00483 * with type: SUBSTITUTION_EXCEPTION_TYPE and the function 00484 * will return a boolean value of true. 00485 * 00486 * In the case of the function participating in a circular 00487 * callgraph, only the first function in that callgraph should 00488 * return a value of true. 00489 * 00490 * \return Returns a value of true if this function was part 00491 * of a circular callgraph AND additional processing needs to be 00492 * performed on this function (and the others in the callgraph) by 00493 * the HandleCircularCallgraph) function. I.e. If this function 00494 * returns true then you must also call HandleCircularCallgraph(). 00495 */ 00496 bool GenerateExceptions( 00497 FunctionProgressIFace* progress, Function& function); 00498 00499 //------------------------------------------------------------------------ 00500 /** \brief Handles the special case of a circular callgraph 00501 * when calculating propogating exceptions for a function. 00502 * 00503 * This will ensure that GenerateExceptions() has been called 00504 * for all functions in the circular callgraph. Following this 00505 * it will then find the union of all exceptions thrown by those 00506 * functions and then will substitute that unioned list of 00507 * exceptions for all exceptions of that have the type: 00508 * SUBSTITUTION_EXCEPTION_TYPE. 00509 */ 00510 void HandleCircularCallgraph( 00511 FunctionProgressIFace* progress, Function& function); 00512 00513 //------------------------------------------------------------------------ 00514 /** \brief After the complete list of propogating exceptions 00515 * has been calculated, this will filter out those exceptions 00516 * that will not be emitted as a result of the functions throw() 00517 * specifier while emitting any warnings for such cases. 00518 */ 00519 void PostProcessExceptions(Function& function); 00520 00521 //------------------------------------------------------------------------ 00522 /** \brief For internal use in calculating propogating exceptions. 00523 */ 00524 void ProcessFunction( 00525 Function& top, 00526 FunctionLoc& function, 00527 std::list<PropogatingException>& poss_curr_catch_types); 00528 00529 //------------------------------------------------------------------------ 00530 /** \brief For internal use in calculating propogating exceptions. 00531 */ 00532 void ProcessCodeBlock( 00533 Function& top, 00534 CodeBlock& block, 00535 FunctionLoc& function, 00536 std::list<PropogatingException>& poss_curr_catch_types, 00537 std::list<PropogatingException>& exceptions); 00538 00539 //------------------------------------------------------------------------ 00540 /** \brief For internal use in calculating propogating exceptions. 00541 */ 00542 void ProcessTryBlock( 00543 Function& top, 00544 TryBlock& block, 00545 FunctionLoc& function, 00546 std::list<PropogatingException>& poss_curr_catch_types, 00547 std::list<PropogatingException>& exceptions); 00548 00549 //------------------------------------------------------------------------ 00550 /** \brief For internal use in ExpandCallgraph() 00551 */ 00552 void UpdateFunctionAddressLists(); 00553 00554 //------------------------------------------------------------------------ 00555 00556 }; 00557 //=========================================================================== 00558 00559 00560 00561 00562 //=========================================================================== 00563 // Temporary staging area for dictionaries. 00564 //=========================================================================== 00565 /** \brief Returns the currently active dictionary instance that is in the 00566 * staging area. 00567 * 00568 * \return May return NULL if no dictionaryis currently in the staging area. 00569 */ 00570 Dictionary* GetCurrentDictionary(); 00571 00572 //=========================================================================== 00573 /** \brief Set the currently active dictionary. 00574 * 00575 * May set to NULL to clear the staging area. 00576 */ 00577 void SetCurrentDictionary(Dictionary* current_in); 00578 00579 //=========================================================================== 00580 /** \brief Returns the currently active dictionary instance that is in the 00581 * staging area. 00582 * 00583 * This is used by many of the python wrappers, so before making use of the 00584 * python wrappers it is necessary to place a Dictionary instance in this 00585 * staging area. 00586 * 00587 * \return Will never return NULL, instead will throw a BugException if the 00588 * staging area is empty. 00589 */ 00590 Dictionary* GetCurrentDictionaryAssert(); 00591 00592 //=========================================================================== 00593 } 00594 00595 namespace std 00596 { 00597 //=========================================================================== 00598 /** \brief Print a dictionary to a std ostream. 00599 */ 00600 static inline std::ostream& operator<<(std::ostream& out, 00601 const EDoc::Dictionary& value) 00602 { 00603 return value.Print(out); 00604 } 00605 //=========================================================================== 00606 } 00607 00608 #endif // EDOC_DICTIONARY_H