include/EDoc/Dictionary.h

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003    Copyright (C) 2007 by Brendon Costa
00004 
00005    This library is free software; you can redistribute it and/or modify 
00006    it under the terms of the "LGPL Like" License defined in the file COPYING 
00007    that should have been distributed along with this source.
00008 
00009    This library is distributed in the hope that it will be useful, 
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of 
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  
00012 
00013    You should have received a copy of the "LGPL Like" License 
00014    along with this library; see the file COPYING. if not, it can be 
00015    obtained from the EDoc++ website: 
00016    http://edoc.sourceforge.net/license.html
00017 
00018 *******************************************************************************/
00019 #ifndef EDOC_DICTIONARY_H
00020 #define EDOC_DICTIONARY_H
00021 
00022 #include "EDoc/PropogatingException.h"
00023 #include "EDoc/exceptions.h"
00024 #include "EDoc/ProgressIFace.h"
00025 #include "EDoc/StringIdentifiedObject.h"
00026 #include "EDoc/TranslationUnit.h"
00027 #include "EDoc/SpecificManagedObject.h"
00028 
00029 #include <string>
00030 #include <map>
00031 #include <ostream>
00032 #include <vector>
00033 #include <list>
00034 #include <sstream>
00035 
00036 
00037 
00038 namespace EDoc
00039 {
00040    class PersistenceIFace;
00041    class Type;
00042    class Function;
00043    class File;
00044    class CodeBlock;
00045    class CatchBlock;
00046    class TryBlock;
00047    class FunctionType;
00048    
00049    //===========================================================================
00050    /** \brief Represents one or more merged translation units.
00051     *
00052     * The Dictionary class is the main class of the system. Dictionary objects
00053     * have 1:1 mappings with .edc files. You will read a .edc file into a
00054     * dictionary instance or write one from a Dictionary instance.
00055     *
00056     * Most objects belong to a specific dictionary. This is important for
00057     * correct memory management. It is not possible to assign pointers to
00058     * DictionarySpecific objects between different dictionary instances.
00059     *
00060     * The main object types directly stored in a Dictionary include:
00061     *
00062     *    - File : Represents a source file used in compiling some 
00063     *       code (Headers too).
00064     *    - Type : Represents a C/C++ type like integers, floats, classes etc.
00065     *    - FunctionType : This is used to represent C/C++ function types. They
00066     *       are stored seperate from normal types as they are "conglomerate"
00067     *       types made up from a number of Type objects. 
00068     *    - Function : These represent individual functions in a compiled binary.
00069     *    .
00070     *
00071     * These above object types are specially managed. In order to simplify the
00072     * memory allocation and handling of these objects, we have made them all
00073     * derive from the StringIdentifiedObject class. Each of these objects is
00074     * identified by a unique string. For example a Function is identified by the
00075     * string for the name used as its link name which you would see using nm on
00076     * a binary file.
00077     *
00078     * It is helpful to note that while these objects are all identified by
00079     * unique string keys, in the .edc files they are identified by integer
00080     * indexes. This makes the loading procedure become a two stage procedure
00081     * and the IndexedDictionary is used to perform the initial load using
00082     * indexes. Using integer indexes in the .edc files was done to decrease
00083     * loading/saving times and to decrease file size. See Read() and Write() for
00084     * more information.
00085     *
00086     * Most objects in EDoc are DictionarySpecific objects. This means that
00087     * they are associated with a particular dictionary. These object can NOT be
00088     * referenced from objects within a different dictionary but must be
00089     * duplicated to the other dictionary first before they can be referenced by
00090     * any objects in the other dictionary. This can often be achieved using code
00091     * like follows:
00092     *
00093     * \code
00094     *    void Func(Type* other_dict_type)
00095     *    {
00096     *       // Will retrieve or create the type from this dictionary with the
00097     *       // same string identifier as the type from the other dictionary.
00098     *       // However the type will be "empty" if it did not exist in this
00099     *       // dictionary to start with.
00100     *       Type* this_dict_type = this_dict.types.AlwaysGet(*other_dict_type);
00101     *    }
00102     * \endcode
00103     *
00104     * If the type is not complete then its IsPopulated() will return
00105     * false. In which case you will need to populate it using the assignment
00106     * operator or some other means. Also note however that when populating it
00107     * the type may also referr to other "un-populated" objects that you will
00108     * need to populate. 
00109     *
00110     * As you can see it is not easy to move or copy objects from one dictionary
00111     * to another. Most usages of the EDoc however will not require the user to
00112     * do anything like this anyhow as all the assigning/merging etc is done by
00113     * the procedure of:
00114     *
00115     * dict1.Read(...);
00116     * dict2.Read(...);
00117     * dict1.Merge(dict2);
00118     *
00119     * This will read two .edc files into two seperate dictionaries, and then
00120     * "merge" the two dictionaries. A merge is kind of like linking
00121     * objects/libraries together in the development of an application.
00122     *
00123     * Temporary Dictionary Staging Area
00124     *
00125     * When using the EDoc python module, you will probably make use of the
00126     * temporary dictionary staging area. This is a static pointer to a "current"
00127     * working dictionary. This will generally be used for communication between
00128     * the python and C++ code. In particular the python wrappers require the use
00129     * of a default constructor for most data types due to a limitation (OR I
00130     * JUST DONT KNOW HOW TO USE IT PROPERLY YET) of Swig. Since most objects
00131     * require that the dictionary that they will belong to be passed upon
00132     * construction time, this means that there is no real possibility of having
00133     * a default constructor. Instead now, if the default constructor is used,
00134     * all DictionarySpecific objects will use the dictionary that can be
00135     * currently retrieved using GetCurrentDictionaryAssert()
00136     *
00137     * See GetCurrentDictionary() for more details.
00138     */
00139    class Dictionary
00140    {
00141    public:
00142 
00143       //------------------------------------------------------------------------
00144       /** \brief Constructor.
00145        *
00146        * Note: Will create a single File object called UNKNOWN_FILE and also
00147        * a single location object called UNKNOWN_LOCATION. These will exist in
00148        * all dictionaries.
00149        */
00150       Dictionary();
00151 
00152       //------------------------------------------------------------------------
00153       /** \brief Erases all memory for all objects that are specific to this
00154        * dictionary.
00155        *
00156        * This will erase all DictionarySpecific objects owned
00157        * by this dictionary.
00158        */
00159       ~Dictionary();
00160 
00161       //------------------------------------------------------------------------
00162       /** \brief Reads the contents of a .edc file into this dictionary
00163        * instance.
00164        *
00165        * This will replace the contents of this dictionary with the contents of
00166        * the .edc file. All old state of this dictionary will be lost.
00167        *
00168        * The process of reading .edc files is quite involved. It is mostly made
00169        * so by certain limitations placed on it by the GCC modification and also
00170        * due to the design decision of storing references to objects in the .edc
00171        * file using integer indexes instead of string keys as are using in the
00172        * runtime objects.
00173        *
00174        * When a read operation occurs the following sequence is followed:
00175        *    -# Erase all current data from the dictionary.
00176        *    -# Construct an Indexed dictionary used to associate integer index
00177        *       references with object instances.
00178        *    -# Read the contents of the .edc file header. This includes info
00179        *       like translation units referenced in .edc file, version
00180        *       information etc.
00181        *    -# Read all data records.
00182        *    -# Validate that all read data objects have been inserted into the
00183        *       dictionary.
00184        *    .
00185        *
00186        * The IndexedDictionary is used to associated particular instances with
00187        * indexes as they are read from the file. 
00188        *
00189        * reading of data objects occurs in a a set number of stages. These
00190        * stages correspond to the indexes of items in the managed objects
00191        * vector. I.e.
00192        *
00193        *    -# Stage 0: File objects         : GetManagedObjectsVector() index 0
00194        *    -# Stage 1: Type objects         : GetManagedObjectsVector() index 1
00195        *    -# Stage 2: FunctionType objects : GetManagedObjectsVector() index 2
00196        *    -# Stage 3: Function objects     : GetManagedObjectsVector() index 3
00197        *    .
00198        *
00199        * Doing the stages this way GREATLY simplifies the reading code as it is
00200        * the same for each data object type except the index to the vector
00201        * changes as you move from one stage to the next.
00202        *
00203        * This places the requirement on the .edc files that all items must be
00204        * saved to the file in the given order: File, Type, FunctionType,
00205        * Function
00206        */
00207       void Read(PersistenceIFace& file);
00208 
00209       //------------------------------------------------------------------------
00210       /** \brief Writes the contents of this dictionary to a .edc file.
00211        *
00212        * Writing is a simple task in comparison to reading. It simply assigns 
00213        * an integer index to each object before writing. Writes all objects and
00214        * then resets the integer indexes to -1.
00215        */
00216       void Write(PersistenceIFace& file) const;
00217 
00218       //------------------------------------------------------------------------
00219       /** \brief Merges the contents of another dictionary into this dictionary.
00220        *
00221        * This is similar to linking objects together. It will resolve calls to
00222        * un-implemented functions with the correct implementations from other
00223        * translation units. It will produce errors if multiple implementations
00224        * are found of a single function (If they are different so we dont emit
00225        * errors for "vaguely" linked functions). 
00226        *
00227        * The result is a modified Dictionary that is the union of the two.
00228        */
00229       void Merge(const Dictionary& right);
00230 
00231       //------------------------------------------------------------------------
00232       /** \brief Replaces all references of pointer remove with pointer replace.
00233        *
00234        * This is used during the loading process to remove duplicate objects
00235        * from all dictionary specific objects and replace them with a single
00236        * "reference" object. After calling this function the user can be
00237        * guarenteed that this object will no longer contain any references to
00238        * "remove".
00239        *
00240        * When used on a dictionary this will attempt to replace all references
00241        * to the pointer remove over the entire Dictionary instance andall its
00242        * children.
00243        *
00244        * \param stack Is a processing stack used to ensure that recursive
00245        * associations do not form endless loops of replacing.
00246        *
00247        * \param remove Is a pointer value that we wish to replace.
00248        *
00249        * \param replace Is a pointer value to replace all instances of remove
00250        * with.
00251        *
00252        * \return Returns the number of occurences of remove that were replaced
00253        * with replace.
00254        */
00255       size_t ReplaceReferences(PStack& stack, void* remove, void* replace);
00256 
00257       //------------------------------------------------------------------------
00258       /** \brief Prints the contents of this object to the given stream in a
00259        * user readable manner.
00260        *
00261        * \param out The output stream to print to.
00262        *
00263        * \param prefix A prefix to print before every line displayed. Usually
00264        * used to provide indenting when printing a heirarchy of objects.
00265        */
00266       std::ostream& Print(std::ostream& out, std::string prefix="") const;
00267 
00268       //------------------------------------------------------------------------
00269       /** \brief Returns a string with the same contents as would be displayed
00270        * by the Print() function.
00271        */
00272       std::string ToString(std::string prefix="") const;
00273 
00274       //------------------------------------------------------------------------
00275       /** \brief Performs internal validation of the Dictionary object and all
00276        * its data.
00277        *
00278        * This will raise a BugException if internal validation fails andshould
00279        * only be used for debugging purposes. It is quite slow.
00280        */
00281       void Validate() const;
00282 
00283 
00284       //------------------------------------------------------------------------
00285       // Calculation of propogating exceptions.
00286       //------------------------------------------------------------------------
00287 
00288       //------------------------------------------------------------------------
00289       /** \brief Expands FunctionPointer calls and Virtual function calls into
00290        * a list of "possible" function calls.
00291        *
00292        * This results in a "more than complete" call-graph.
00293        *
00294        * \see See CalculatePropogatingExceptions() for information about 
00295        * this group of functions.
00296        */
00297       void ExpandCallGraph();
00298 
00299       //------------------------------------------------------------------------
00300       /** \brief Erases all propogating exception data from all functions.
00301        *
00302        * This must be called before calculating the propogating exception 
00303        * information to ensure it starts with a clean slate.
00304        *
00305        * \see See CalculatePropogatingExceptions() for information about 
00306        * this group of functions.
00307        */
00308       void ErasePropogatingExceptions();
00309 
00310       //------------------------------------------------------------------------
00311       /** \brief Analyses the callgraph of all functions ordering them 
00312        * by complexity and marking circular callgraphs.
00313        *
00314        * This will mark the complexity of a function by setting: 
00315        * Function::total_calls to the number of unique functions called.
00316        * It will also identify circular callgraphs between a group of functions
00317        * and if such exists will create the member: Function::circular
00318        * with a set of the functions in the circular graph.
00319        *
00320        * Finally it will return a vector of all the functions sorted in
00321        * order of their Function::total_calls member. This is used to process
00322        * the functions in order when calculating the propogating exceptions
00323        * In particular it is dynamic programming in action. By calculating the
00324        * least complex functions first, they are then available to be used
00325        * in the more complex functions later. I.e. The processing the functions
00326        * in this order means that we dont need to "re-calculate" information.
00327        * 
00328        *
00329        * \see See CalculatePropogatingExceptions() for information about 
00330        * this group of functions.
00331        */
00332       void CalculateCallComplexity(std::vector <Function*>& funcs);
00333 
00334       //------------------------------------------------------------------------
00335       /** \brief Calculates the propogating exceptions for all functions
00336        * provided in the vector sorted which must be in order of the call 
00337        * complexity.
00338        *
00339        * This function asserts that the vector is provided in the correct order.
00340        *
00341        * To calculate the propogating exceptions for a dictionary
00342        * the following functions must be called in order:
00343        *
00344        *    -# ExpandCallGraph()
00345        *    -# ErasePropogatingExceptions()
00346        *    -# CalculateCallComplexity()
00347        *    -# CalculatePropogatingExceptions()
00348        *    .
00349        *
00350        * This function makes use of:
00351        *    - GenerateExceptions()
00352        *    - HandleCircularCallgraph(
00353        *    - PostProcessExceptions()
00354        *    - ProcessFunction()
00355        *    - ProcessCodeBlock()
00356        *    - ProcessTryBlock()
00357        *    .
00358        *
00359        * \todo Document the algorithm used for this operation.
00360        */
00361       void CalculatePropogatingExceptions(std::vector <Function*>& sorted,
00362          FunctionProgressIFace* progress = NULL);
00363 
00364 
00365       //------------------------------------------------------------------------
00366       /** \brief Returns the translation unit with the given index.
00367        */
00368       TranslationUnit* GetIndexedTranslationUnit(int32_t index);
00369 
00370       //------------------------------------------------------------------------
00371       /** \brief Returns the index of the provided translation unit.
00372        *
00373        * NOTE: Will throw a BugException if the translation unit is not in this
00374        * dictionaries list of translation units.
00375        */
00376       int32_t GetTranslationUnitIndex(TranslationUnit* tu);
00377 
00378       //------------------------------------------------------------------------
00379       /** \brief Returns a list containing a reference to the ManagedObject
00380        * types being managed by this dictionary.
00381        *
00382        * This includes all File, Type, FunctionType and Function instances.
00383        */
00384       inline std::list<ManagedObjectPtr> GetManagedObjects()
00385       {
00386          // NOTE: The order of this list is important and MUST be the same order
00387          // in which we expect to load the various record types from the file.
00388          std::list<ManagedObjectPtr> ret;
00389          ret.push_back(&files);
00390          ret.push_back(&types);
00391          ret.push_back(&function_types);
00392          ret.push_back(&functions);
00393          return ret;
00394       }
00395 
00396       //------------------------------------------------------------------------
00397       /** \brief See GetManagedObjects()
00398        */
00399       inline std::vector<ManagedObjectPtr> GetManagedObjectsVector()
00400       {
00401          // NOTE: The order of this list is important and MUST be the same order
00402          // in which we expect to load the various record types from the file.
00403          std::vector<ManagedObjectPtr> ret;
00404          ret.push_back(&files);
00405          ret.push_back(&types);
00406          ret.push_back(&function_types);
00407          ret.push_back(&functions);
00408          return ret;
00409       }
00410 
00411       //------------------------------------------------------------------------
00412       /** \brief See GetManagedObjects()
00413        */
00414       inline std::vector<const ManagedObject*> GetManagedObjectsVector() const
00415       {
00416          // NOTE: The order of this list is important and MUST be the same order
00417          // in which we expect to load the various record types from the file.
00418          std::vector<const ManagedObject*> ret;
00419          ret.push_back(&files);
00420          ret.push_back(&types);
00421          ret.push_back(&function_types);
00422          ret.push_back(&functions);
00423          return ret;
00424       }
00425 
00426 
00427 
00428 
00429       //------------------------------------------------------------------------
00430       // Public Data
00431       //------------------------------------------------------------------------
00432 
00433       //------------------------------------------------------------------------
00434       /** \brief Every dictionary contains an "Unknown File" pointer that is
00435        * used for source files we could not determine.
00436        */
00437       const File* UNKNOWN_FILE;
00438 
00439       //------------------------------------------------------------------------
00440       /** \brief Every dictionary contains an "Unknown File" pointer that is
00441        * used for source locations we could not determine in the GCC MOD.
00442        */
00443       const Location* UNKNOWN_LOCATION;
00444 
00445       //------------------------------------------------------------------------
00446       /** \brief Substitution exceptions type.
00447        */
00448       const Type* SUBSTITUTION_EXCEPTION_TYPE;
00449 
00450       SpecificManagedObject<File> files;
00451       SpecificManagedObject<Type> types;
00452       SpecificManagedObject<FunctionType> function_types;
00453       SpecificManagedObject<Function> functions;
00454 
00455       //------------------------------------------------------------------------
00456       /** \brief A vector of all translation units that have been merged into
00457        * this dictionary.
00458        */
00459       std::vector<TranslationUnit*> translation_units;
00460 
00461       //------------------------------------------------------------------------
00462 
00463    protected:
00464 
00465 
00466       //------------------------------------------------------------------------
00467       /** \brief Erases all data associated with this dictionary instance.
00468        */
00469       void EraseAll();
00470 
00471       //------------------------------------------------------------------------
00472       /** \brief Generates default set of data required by all dictionaries.
00473        */
00474       void GeneratePredefined();
00475 
00476       //------------------------------------------------------------------------
00477       /** \brief Process the given function and functions it calls to generate
00478        * the list of exceptions that may be emitted by it.
00479        *
00480        * This does not return a completed list of propogating exceptions
00481        * in the case where the function participates in a circular callgraph.
00482        * In this case the list of exceptions will include one or more exceptions
00483        * with type: SUBSTITUTION_EXCEPTION_TYPE and the function
00484        * will return a boolean value of true.
00485        *
00486        * In the case of the function participating in a circular
00487        * callgraph, only the first function in that callgraph should
00488        * return a value of true. 
00489        *
00490        * \return Returns a value of true if this function was part
00491        * of a circular callgraph AND additional processing needs to be
00492        * performed on this function (and the others in the callgraph) by
00493        * the HandleCircularCallgraph) function. I.e. If this function
00494        * returns true then you must also call HandleCircularCallgraph().
00495        */
00496       bool GenerateExceptions(
00497          FunctionProgressIFace* progress, Function& function);
00498    
00499       //------------------------------------------------------------------------
00500       /** \brief Handles the special case of a circular callgraph
00501        * when calculating propogating exceptions for a function.
00502        *
00503        * This will ensure that GenerateExceptions() has been called
00504        * for all functions in the circular callgraph. Following this
00505        * it will then find the union of all exceptions thrown by those
00506        * functions and then will substitute that unioned list of 
00507        * exceptions for all exceptions of that have the type:
00508        * SUBSTITUTION_EXCEPTION_TYPE.
00509        */
00510       void HandleCircularCallgraph(
00511          FunctionProgressIFace* progress, Function& function);
00512    
00513       //------------------------------------------------------------------------
00514       /** \brief After the complete list of propogating exceptions
00515        * has been calculated, this will filter out those exceptions
00516        * that will not be emitted as a result of the functions throw() 
00517        * specifier while emitting any warnings for such cases.
00518        */
00519       void PostProcessExceptions(Function& function);
00520 
00521       //------------------------------------------------------------------------
00522       /** \brief For internal use in calculating propogating exceptions.
00523        */
00524       void ProcessFunction(
00525          Function& top,
00526          FunctionLoc& function,
00527          std::list<PropogatingException>& poss_curr_catch_types);
00528 
00529       //------------------------------------------------------------------------
00530       /** \brief For internal use in calculating propogating exceptions.
00531        */
00532       void ProcessCodeBlock(
00533          Function& top,
00534          CodeBlock& block,
00535          FunctionLoc& function,
00536          std::list<PropogatingException>& poss_curr_catch_types,
00537          std::list<PropogatingException>& exceptions);
00538 
00539       //------------------------------------------------------------------------
00540       /** \brief For internal use in calculating propogating exceptions.
00541        */
00542       void ProcessTryBlock(
00543          Function& top,
00544          TryBlock& block,
00545          FunctionLoc& function,
00546          std::list<PropogatingException>& poss_curr_catch_types,
00547          std::list<PropogatingException>& exceptions);
00548 
00549       //------------------------------------------------------------------------
00550       /** \brief For internal use in ExpandCallgraph()
00551        */
00552       void UpdateFunctionAddressLists();
00553 
00554       //------------------------------------------------------------------------
00555 
00556    };
00557    //===========================================================================
00558 
00559 
00560 
00561 
00562    //===========================================================================
00563    // Temporary staging area for dictionaries.
00564    //===========================================================================
00565    /** \brief Returns the currently active dictionary instance that is in the
00566     * staging area.
00567     *
00568     * \return May return NULL if no dictionaryis currently in the staging area.
00569     */
00570    Dictionary* GetCurrentDictionary();
00571 
00572    //===========================================================================
00573    /** \brief Set the currently active dictionary.
00574     *
00575     * May set to NULL to clear the staging area.
00576     */
00577    void SetCurrentDictionary(Dictionary* current_in);
00578 
00579    //===========================================================================
00580    /** \brief Returns the currently active dictionary instance that is in the
00581     * staging area.
00582     *
00583     * This is used by many of the python wrappers, so before making use of the
00584     * python wrappers it is necessary to place a Dictionary instance in this
00585     * staging area.
00586     *
00587     * \return Will never return NULL, instead will throw a BugException if the
00588     * staging area is empty. 
00589     */
00590    Dictionary* GetCurrentDictionaryAssert();
00591 
00592    //===========================================================================
00593 }
00594 
00595 namespace std
00596 {
00597    //===========================================================================
00598    /** \brief Print a dictionary to a std ostream.
00599     */
00600    static inline std::ostream& operator<<(std::ostream& out, 
00601       const EDoc::Dictionary& value)
00602    {
00603       return value.Print(out);
00604    }
00605    //===========================================================================
00606 }
00607 
00608 #endif // EDOC_DICTIONARY_H

Generated on Tue Jan 20 18:26:07 2009 for EDoc-0.2.1 by  doxygen 1.5.1