/*
 * (c) Copyright IBM Corp. 2005 All Rights Reserved
 *
 * Physical Memory Information Module
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * at your option any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
 * the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

/**
 * @file td.h
 *
 * @brief Definitions for the physical memory analyzer.
 *
 * This part provides the source for the literal values, global variables,
 * and structures for the physical memory analyzer.
 *
 * @author International Business Machines
 * @author Paul Movall <movall@us.ibm.com>
 *
 * @version Current Version: 1.0
 *
 * @date Current Date: 01/2005
 *
 * @version 0.1, 11/2003: File created by Paul Movall <movall@us.ibm.com>
 * @version 1.0, 01/2005: Miscellaneous cleanup for publish
 *
 */

/**
 * @mainpage
 *
 * This application provides for analysis of physical memory usage of
 * Linux operating system and its user space tasks. It takes the input
 * from the physmem_info module and formats it into usable data. This
 * data can then either be used directly in a human readable form or
 * it can also be written to a comma separated value (CSV) file.
 * The CSV file format is suitable for import into a spreadsheet for
 * further analysis and / or graphing.
 *
 * Use the "-h" input parameter for help on the application input
 * parameters.
 *
 * @section Rationale
 * In most Linux environments, the standard memory metric is virtual memory.
 * This metric can be used to determine the swap space needed as well
 * as swap rates and other metrics related to swapping. This can be seen
 * by the prevalence of various virtual memory tools, such as mpatrol or
 * even the /proc/@<pid@>/maps node.
 *
 * However in an environment without a swap device, the virtual
 * memory metric is not as important as physical memory. In such a system,
 * physical memory usage is a critical metric. Virtual memory tools as
 * noted above are still needed, but they are of limited use when
 * the system has exhausted all of its physical memory. When this
 * happens, the kernel will arbitrarily start killing processes or
 * just panic. This is not a desirable situation.
 *
 * Therefore the understanding of the assignment and usage patterns of
 * physical pages is vital. During a review of existing open source projects,
 * no viable tool to perform such analysis was found. This tool set,
 * consisting of a dynamically loadable kernel module and
 * a post-processing analyzer were designed and implemented to perform
 * such analysis.
 *
 * This is the post-processing analyzer portion of the toolset.
 *
 */

/**
 * @page lexpg Input Data Parser
 *
 * This section provides some documentation for the input data parser.
 * This documentation is not part of the parser source part due to
 * fomatting problems.
 *
 * @section lexformat Format of Parser
 * The input data parser is based on flex, the open source implementation
 * of the standard UNIX lex. It uses the same format for state definition,
 * and pattern matching as lex so the code should look familiar to those
 * users.
 *
 * Flex, or lex, does not support comments outside of a state definition,
 * and doxygen does not handle descriptions within a function, so this
 * must be documented elsewhere. This documentation is currently located
 * in the @c td.h file.
 *
 * The lexical analyzer is used to parse the data and build the global
 * data structures that are used by the various analyzers. Only the
 * lexical analysis part is needed, so there is no corresponding yacc
 * portion in this program.
 *
 * The analyzer is built very specifically to the output format of
 * the task descripion information from the physmem_info kernel module.
 * By convention, the extension of the task output of the kernel
 * module has been ".td" for task data, so in this program that name
 * has been also used.
 *
 * @section lexfuncs Additional Functions in Parser
 *
 * The input parser does provide some functionality other than reading
 * the formatted data into the global data structures. These functions
 * are described in the sections below.
 *
 * @subsection lexfuncthread Process vs. Thread Determination
 *
 * The parser is responsible for filtering out the various thread. This
 * is done when the mm value of the task line is read. The task structure
 * is being biult on the fly by the parser, and when the mm field is
 * filled in, it will check against the already parsed processes for
 * the occurrence of the same mm value. When two tasks have the same mm
 * value, it means that they are using the same memory map and both do
 * not need to be kept in the list. This is done using the @c find_task()
 * function.
 *
 * When the function returns TRUE, that indicates that the
 * mm has been found and the @c task_valid flag is set to FALSE. All
 * remaining data for that task will be ignored. Note that it will be
 * matched by the parser but no changes will be made in the global
 * task list.
 *
 * When the function returns FALSE, that indicates that the mm has
 * not been found and that this is a unique process. The @c task_valid
 * flag is set to TRUE, and the rest of the input data will be saved
 * off of this task_t structure.
 *
 * @subsection lexfuncbss Setting a VMA as BSS
 *
 * When the VMA entry is complete, the parser will check if the VMA
 * matches the criteria for BSS. This is following a convention by
 * the loader in which the BSS (global uninitialized data) will
 * be adjacent to the data (global initialized data). The data
 * is signified by having a file association and the bss does not.
 * In addition, both VMAs are writable. So the combination of
 * this information allows the parser to set the BSS flag on
 * that VMA. 
 *
 * @subsection lexfuncassocnm Setting the Associated Name
 *
 * In addition to the BSS function, the parser will also set the
 * associated name for a VMA if it matches the bss criteria or
 * is a stack VMA. This allows the analyzers additional data on
 * the origin of this data area.
 *
 * @subsection lexfuncwrexec Clearing the Write Flag on Executables
 *
 * An executable (i.e., the code where main resides) may have
 * the write flag turned on for its text segment. This can happen
 * when the -fpic compile flag is turned on during its build.
 * This variation in the VMA causes confusion to the parsers
 * that are trying to analyze exactly for what each area is used.
 * The parser will automatically clear the write flag if this
 * condition is detected.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/param.h>
#include <stdint.h>

#ifndef __uint32_t_defined
/**
 * @typedef uint32_t
 * @brief Typedef to insure a 32 bit unsigned entity.
 */
typedef unsigned long uint32_t;
#endif

/**
 * @struct map_struct
 *
 * @brief Parsed format of the virtual to physical mapping structure.
 *
 * The data in this structure is filled in from one of the entries in the
 * M: stanza from the input. Each mapping area is preceded by either
 * "M:" for the first one or a "|" for each additional mapping entry.
 */
/**
 * @typedef map_t
 * @brief Type definition for the map_struct.
 */
typedef struct map_struct {
    uint32_t virt;		/**< Virtual address of this page.	*/
    uint32_t phys;		/**< Physical address of this page.	*/
    uint32_t flags;		/**< Kernel page structure flags.	*/
    uint32_t count;		/**< Usage count of this page.		*/
    struct map_struct *next;	/**< Next mapping in the list.		*/
} map_t;

/**
 * @struct pte_struct
 *
 * @brief Parsed format of the page table entries summary structure.
 *
 * The data in this structure is filled in from the P: line from the
 * input. This data is based on the PTE data for a particular VMA.
 * There is one PTE structures for each VMA structure.
 */
/**
 * @typedef pte_t
 * @brief Type definition for the pte_struct.
 */
typedef struct pte_struct {
    uint32_t user_space_count;	/**< How many pages in this VMA are
				 * user space pages.			*/
    uint32_t exec_count;	/**< Count of executable pages in VMA.	*/
    uint32_t rd_only_count;	/**< Count of read-only pages in VMA.	*/
    uint32_t single_use_count;	/**< Count of single use pages in VMA.  */
    uint32_t shared_count;
} pte_t;

/**
 * @struct vma_struct
 *
 * @brief Parsed format of the Virtual Memory Area (VMA) structure.
 *
 * The data in this structure is filled in from the V: line from the
 * input. This data is based on the VMA data.
 */
/**
 * @typedef vma_t
 * @brief Type definition for the vma_struct.
 */
typedef struct vma_struct {
    uint32_t start;		/**< Starting virtual address of area.	*/
    uint32_t end;		/**< Ending virtual address of area.	*/
    uint32_t vlen;		/**< Length in bytes of virtual area.	*/
    uint32_t rlen;		/**< Number of physical bytes assigned
				 * to this VMA.				*/
    uint32_t pte_flags;		/**< Physical flags for this VMA.	*/
    uint32_t vma_flags;		/**< VMA Flags for this area.		*/
    pte_t *pte_info;		/**< Pointer to summary information
				 * from the PTEs.			*/
    map_t *map_head;		/**< Head of the mapping list.		*/
    map_t *map_tail;		/**< Tail of the mapping list. This is
				 * used to add at end to keep this list
				 * in order.
				 */
    char name[PATH_MAX];	/**< Name of the file associated to this
				 * VMA.	If this is anonymous memory,
				 * this field will be string "NULL".	*/
    char assoc_name[PATH_MAX];	/**< For anonymous memory, it can
				 * sometimes be associated with a file.
				 * If it can, the associated file is
				 * saved here. Otherwise this field is
				 * set to "None".
				 */
    struct vma_struct *next;	/**< Next VMA area in this task.	*/
    struct vma_struct *prev;	/**< Previous VMA area in this task.	*/
} vma_t;

/**
 * @struct task_struct
 *
 * @brief Parsed format of the task, or process, structure.
 *
 * The data in this structure is filled in from the T: line from the
 * input. This data is based on the task information.
 */
/**
 * @typedef task_t
 * @brief Type definition for the task_struct.
 */
typedef struct task_struct {
    char name[16];		/**< Command used for this task.	*/
    unsigned long mm;		/**< Kernel's pointer to the memory
				 * manager's structure. This is used
				 * to determine the difference between
				 * a thread (same mm) versus a
				 * process (different mm) for each
				 * task structure. Note that only the
				 * processes are adding to the list.
				 */
    struct task_struct *next;	/**< Next task in the list.		*/
    int page_count;		/**< fix				*/
    vma_t *vma_head;		/**< Head of the list of VMAs for this
				 * task.				*/
    vma_t *vma_tail;		/**< Tail of the list of VMAs for this
				 * task. This is used when adding to
				 * keep the list in order.		*/
    unsigned long minor_flts;	/**< The number of "minor" faults taken.*/
    unsigned long major_flts;	/**< The number of "major" faults taken.*/
    unsigned long chld_min_flts;/**< The number of "minor" faults taken
				 * by children tasks. If in the same mm,
				 * these should be counted against the
				 * process!				*/
    unsigned long chld_maj_flts;/**< The number of "major" faults taken
				 * by children tasks. If in the same mm,
				 * these should be counted against the
				 * process!				*/
    unsigned long user_time;	/**< The amount of time used by this
				 * process while in user space. Note that
				 * any pthreads must be summed with the
				 * main thread for a total view of the
				 * process.				*/
    unsigned long sys_time;	/**< The amount of time used by this
				 * process while in kernel space. Note that
				 * any pthreads must be summed with the
				 * main thread for a total view of the
				 * process.				*/
} task_t;

/**
 * @struct task_list_head_struct
 *
 * @brief Global task list structure.
 *
 * This structure is only used by the task list head. It keeps both the
 * head and tail pointer to keep the tasks in order of the input data.
 */
/**
 * @typedef task_list_head_t
 * @brief Type definition for the task_list_head_struct.
 */
typedef struct task_list_head_struct {
	task_t *head;
	task_t *tail;
} task_list_head_t;

#ifndef FALSE
/** @def FALSE Integer value for a logical false condition.		*/
#define FALSE 0
#endif

#ifndef TRUE
/** @def TRUE Integer value for a logical true condition.		*/
#define TRUE 1
#endif

/** @def VMA_FLAGS_BSS Internal flag that this VMA is likely a .BSS
 * segment (global uninitialized data). This flag must not overlap any
 * of the kernel VMA flags.
 */
#define VMA_FLAGS_BSS	0x20000000
/** @def VMA_FLAGS_RPT Internal flag that this VMA has already been
 * included in calculations (i.e., reported). This flag must not overlap any
 * of the kernel VMA flags.
 */
#define VMA_FLAGS_RPT	0x40000000

#ifndef VMA_FLAGS_RD
/** @def VMA_FLAGS_RD Internal flag that this VMA is readable.
 * This flag must match the kernel VMA read flag.
 */
#define VMA_FLAGS_RD	0x1
#endif

#ifndef VMA_FLAGS_WR
/** @def VMA_FLAGS_WR Internal flag that this VMA is writable.
 * This flag must match the kernel VMA write flag.
 */
#define VMA_FLAGS_WR	0x2
#endif

#ifndef VMA_FLAGS_EXEC
/** @def VMA_FLAGS_EXEC Internal flag that this VMA is executable.
 * This flag must match the kernel VMA execute flag.
 */
#define VMA_FLAGS_EXEC	0x4
#endif

/** @def VMA_FLAGS_RWX Internal flag that this VMA is
 * readable, writable, and executable.
 */
#define VMA_FLAGS_RWX	(VMA_FLAGS_RD | \
                         VMA_FLAGS_WR | \
                         VMA_FLAGS_EXEC)

/** @def VMA_FLAGS_MASK_RWX Mask for the RWX flags.			*/
#define VMA_FLAGS_MASK_RWX	0xF

/** @def VMA_FLAGS_SE_MASK Mask for the stack and executable flags.	*/
#define VMA_FLAGS_SE_MASK	0xFF00

#ifndef VMA_FLAGS_STACK
/** @def VMA_FLAGS_STACK Internal flag that this VMA is stack space.
 * This flag must match the kernel VMA growsdown flag.
 */
#define VMA_FLAGS_STACK	0x100
#endif

#ifndef VMA_FLAGS_EXECUTABLE
/** @def VMA_FLAGS_EXECUTABLE Internal flag that this VMA is part of
 * the main executable from which this task is derived.
 * This flag must match the kernel VMA executable flag.
 */
#define VMA_FLAGS_EXECUTABLE	0x1000
#endif

/** @def FORMAT_VMA_DETAIL_TERSE Format of the VMA detail
 * within a given process in a terse, or CSV, format. */
#define FORMAT_VMA_DETAIL_TERSE	   \
" , , %d, , , , , , , , , %d, %d, "
/** @def FORMAT_VMA_DETAIL_VERBOSE Format of the VMA detail
 * within a given process in a verbose, or human readable, format. */
#define FORMAT_VMA_DETAIL_VERBOSE  \
"  vsize: %4d, single: %4d, shared: %4d, flags: "

/** @def FORMAT_TASK_DETAIL_TERSE_1 Format of the first portion
 * of the task detail information in terse, or CSV, format.
 * This first portion is the name, read/write count, read/exec
 * count, and total count of pages.
 */
#define FORMAT_TASK_DETAIL_TERSE_1 "%s, %d, %d, %d, "
/** @def FORMAT_TASK_DETAIL_TERSE_2 Format of the second portion
 * of the task detail information in terse, or CSV, format.
 * This second portion is the name, stack count, .data
 * count, .text count, and .bss/heap count of pages.
 */
#define FORMAT_TASK_DETAIL_TERSE_2 "%s, %d, %d, %d, %d, "
/** @def FORMAT_TASK_DETAIL_TERSE_3 Format of the third portion
 * of the task detail information in terse, or CSV, format.
 * This third portion is the shared library .bss count,
 * shared library .data count, shared library 
 * executable count of pages.
 */
#define FORMAT_TASK_DETAIL_TERSE_3 "%d, %d, %d, "
/** @def FORMAT_TASK_DETAIL_TERSE_4 Format of the fourth portion
 * of the task detail information in terse, or CSV, format.
 * This fourth portion is the shared library single use
 * executable count and shared library multiple use count of pages.
 */
#define FORMAT_TASK_DETAIL_TERSE_4 "%d, %d\n"

/** @def FORMAT_TASK_DETAIL_VERBOSE_1 Format of the first portion
 * of the task detail information in verbose, or human readable, format.
 * This first portion is the name, read/write count, read/exec
 * count, and total count of pages.
 */
#define FORMAT_TASK_DETAIL_VERBOSE_1 \
"Process \"%s\" has %d rw and %d rx pages, (%d pages total)\n"
/** @def FORMAT_TASK_DETAIL_VERBOSE_2 Format of the second portion
 * of the task detail information in verbose, or human readable, format.
 * This second portion is the name, stack count, .data
 * count, .text count, and .bss/heap count of pages.
 */
#define FORMAT_TASK_DETAIL_VERBOSE_2 \
"\tFor [%s]: %d stack, %d data, %d .text, %d heap\n"
/** @def FORMAT_TASK_DETAIL_VERBOSE_3 Format of the third portion
 * of the task detail information in verbose, or human readable, format.
 * This third portion is the shared library .bss count,
 * shared library .data count, shared library 
 * executable count of pages.
 */
#define FORMAT_TASK_DETAIL_VERBOSE_3 \
"\tFor libraries: %d .bss, %d .data, %d .text\n"
/** @def FORMAT_TASK_DETAIL_VERBOSE_4 Format of the fourth portion
 * of the task detail information in verbose, or human readable, format.
 * This fourth portion is the shared library single use
 * executable count and shared library multiple use count of pages.
 */
#define FORMAT_TASK_DETAIL_VERBOSE_4 \
"\t  %d are used only in this process, %d are shared\n"

#ifndef PAGE_SIZE
/** @def PAGE_SIZE Insure that we have a page size set.
 * Both Intel and PPC use 4k, so use that if not set. */
#define PAGE_SIZE 4096
#endif

/**
 * @brief The global head of the task list to which all input data is placed.
 *
 * The default value of both the head and tail is NULL.
 */
task_list_head_t head_task;

/**
 * @brief The head of the diff task list to which diff input data is placed.
 *
 * The default value of both the head and tail is NULL.
 */
task_list_head_t diff_head;

/**
 * @brief Flag used in parsing to signify to keep task's data.
 *
 * This flag is set to TRUE when the task is a process (i.e., a unique mm
 * value). All of the input data is saved. This flag is set to FALSE when
 * the task is a thread (i.e., it's mm value is shared with another task
 * that has already been read). All of the input data for that task is
 * then ignored as it is the same as the task with which the mm is
 * the same. The default value is TRUE.
 */
int task_valid;

/**
 * @brief Flag used to signify to write PostScript output.
 *
 * This flag is set to TRUE when the user has requested to output
 * PostScript data for the graph of the base memory utilization and
 * the overall library utilization.
 * The default value is FALSE.
 */
int post_parm;

/**
 * @brief Flag used to filter the virtual size of the pthread stack size.
 *
 * This flag is set to TRUE when the user has requested to filter
 * the virtual size of the pthread stack. By default, the pthread
 * stack has a 511 page virtual stack. This allows the user to reset
 * the virtual stack size to something reasonable.
 * The default value is FALSE.
 */
int pstack_filter;

/**
 * @brief Variable to hold the filtered size of the pthread stack.
 *
 * This variable is used to set the virtual stack size of the
 * pthread stack when the @c pstack_filter is set to TRUE.
 * The default value is 16.
 */
int pstack_size;

/**
 * @brief Global variable for terse (CSV) output.
 *
 * The default value is FALSE for human readable output. This can be
 * set by using the "-t" input parameter.
 */
int terse;

/**
 * @brief Global variable for analysis of virtual usage.
 *
 * The default value of this flag is FALSE for physical memory analysis.
 * This can be set to true by using the "-v" input parameter.
 */
int virtual;

/**
 * @brief Global variable to output summary or details on file diff.
 *
 * The default value of this flag is FALSE for detailed output.
 * This can be set to true by using the "-s" input parameter.
 */
int summary;

/**
 * @brief Input file name.
 */
char infile[PATH_MAX];

/**
 * @brief Output file name.
 */
char outfile[PATH_MAX];

/**
 * @brief PostScript output file name.
 */
char post_file[PATH_MAX];

/**
 * @brief Convert a character string to an integer for hex strings.
 *
 * This is similar to atoi() standard library function but works for
 * hexadecimal strings instead of decimal strings. It will stop
 * parsing the string when a non-hex character is encountered,
 * including NULL.
 *
 * @param str Character string.
 *
 * @retval Unsigned integer value of the hexadecimal string.
 */
unsigned long xtoi(char *str);


/**
 * @brief Determine if a task with the same mm exists within the task list.
 *
 * @param this_task Task to look for in the global task list.
 *
 * @retval TRUE The task @c this_task was found in the list.
 * @retval FALSE The task @c this_task was not found in the list.
 */
int find_task(task_t *this_task);

/**
 * @brief Add a task to the global task list.
 *
 * @param this_task The task to add to the list.
 *
 * @retval None.
 */
void add_task(task_t *this_task);

/**
 * @brief Adds a VMA structure to the task's vma list.
 *
 * @param cur_task The task to which to add the vma.
 * @param cur_vma The vma to add to the list.
 */
void add_vma(task_t *cur_task, vma_t *cur_vma);

/**
 * @brief Print out help text on the input parameters.
 *
 * @retval None
 */
void print_usage(void);

/**
 * @brief Print out the task basic summary information.
 *
 * @param cur_task The task for which to print the summary information.
 * @param outf File descriptor for the output file.
 * @param vma_detail Flag to indicate whether to print out details on VMA.
 * @param postfp FILE pointer to PostScript file. NULL when no PostScript.
 * @param max_pgs The maximum number of pages in any given task.
 *
 * @return None.
 */
void task_summary(task_t *cur_task, FILE *outf, int vma_detail,
		  FILE *postfp);

/**
 * @brief Print the VMA flags in character form rather than hex values.
 *
 * @param cur_vma The VMA for which to print its flags.
 * @param outf The output file.
 *
 * @return None.
 */
void print_vma_flags(vma_t *cur_vma, FILE *outf);
