Source code for chc.app.CApplication

# ------------------------------------------------------------------------------
# CodeHawk C Analyzer
# Author: Henny Sipma
# ------------------------------------------------------------------------------
# The MIT License (MIT)
#
# Copyright (c) 2017-2020 Kestrel Technology LLC
# Copyright (c) 2020-2022 Henny B. Sipma
# Copyright (c) 2023-2024 Aarno Labs LLC
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# ------------------------------------------------------------------------------
"""Main access point to the analysis of a C application.

"""

from typing import (
    Any, Callable, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING)
import os
import multiprocessing
import sys

from chc.api.CGlobalContract import CGlobalContract

from chc.app.CCompInfo import CCompInfo
from chc.app.CFile import CFile
from chc.app.CVarInfo import CVarInfo
from chc.app.IndexManager import IndexManager, FileVarReference, FileKeyReference
from chc.app.CGlobalDeclarations import CGlobalDeclarations
from chc.app.CGlobalDictionary import CGlobalDictionary

from chc.source.CSrcFile import CSrcFile

import chc.util.fileutil as UF
from chc.util.loggingutil import chklogger


if TYPE_CHECKING:
    from chc.app.CFunction import CFunction
    from chc.app.CInstr import CCallInstr
    from chc.proof.CFunctionCallsiteSPOs import CFunctionCallsiteSPOs
    from chc.proof.CFunctionPO import CFunctionPO


[docs]class CApplication(object): """Primary access point for source code and analysis results. An application can consist of a single file, or of multiple files managed by a Makefile. In case of a single file the following call on CApplication initializes the single file: - capp.initialize_single_file(cfilename) The filepath in this case is assumed to be empty. The file index is set to 0. In case of multiple files the following file is assumed to be present in the top analysis-results directory: - <projectpath>/<projectname>.cch/a/target_files.xml This file, normally created by the CodeHawk-C parser, is expected to contain a list of c-file entries that provide the attributes: - id: a unique file index, a number greater than zero; - name: a string denoting the relative path of the file w.r.t. the project directory (e.g., src/cgi/buffer.c) """ def __init__( self, projectpath: str, projectname: str, targetpath: str, contractpath: str, singlefile: bool = False, excludefiles: List[str] = []) -> None: self._projectpath = projectpath self._projectname = projectname self._targetpath = targetpath self._contractpath = contractpath self._singlefile = singlefile self._excludefiles = excludefiles self._indexmanager = IndexManager(singlefile) self._globalcontract: Optional[CGlobalContract] = None self._dictionary: Optional[CGlobalDictionary] = None self._declarations: Optional[CGlobalDeclarations] = None self._files: Optional[Dict[int, CFile]] = None # file-index -> CFile self._callgraph: Optional[ Dict[Tuple[int, int], List[Tuple[Tuple[int, int], "CFunctionCallsiteSPOs"]]]] = None self._revcallgraph: Optional[ Dict[Tuple[int, int], List[Tuple[Tuple[int, int], "CFunctionCallsiteSPOs"]]]] = None @property def projectpath(self) -> str: return self._projectpath @property def projectname(self) -> str: return self._projectname @property def targetpath(self) -> str: return self._targetpath @property def contractpath(self) -> str: return self._contractpath @property def globalcontract(self) -> CGlobalContract: if self._globalcontract is None: self._globalcontract = CGlobalContract(self) return self._globalcontract @property def is_singlefile(self) -> bool: return self._singlefile @property def excludefiles(self) -> List[str]: return self._excludefiles @property def files(self) -> Dict[int, CFile]: """Returns a map from file-indexes to CFile objects.""" if self._files is None: self._files = {} if self.is_singlefile: chklogger.logger.error( "Single-file application should be initialized via " + " the call initialize_single_file(cfilename)") raise UF.CHCError("Single file not yet initialized") else: self._initialize_from_target_files() return self._files @property def cfiles(self) -> Iterable[CFile]: return self.files.values() @property def filenames(self) -> List[str]: """Returns full filenames relative to the project path (without .c).""" return [cfile.name for cfile in self.files.values()]
[docs] def is_application_header(self, name: str) -> bool: """Returns true if the name corresponds to an application file.""" for n in self.filenames: if name == os.path.basename(n): return True else: return False
@property def filexref(self) -> Dict[str, int]: """Returns a map from c filenames to file-indexes. Note: filenames include extension and paths relative to project directory. """ return {cfile.name: index for (index, cfile) in self.files.items()} @property def indexmanager(self) -> IndexManager: return self._indexmanager @property def dictionary(self) -> CGlobalDictionary: if self._dictionary is None: xnode = UF.get_global_dictionary_xnode( self.targetpath, self.projectname) self._dictionary = CGlobalDictionary(self, xnode) return self._dictionary @property def declarations(self) -> CGlobalDeclarations: if self._declarations is None: xnode = UF.get_global_declarations_xnode( self.targetpath, self.projectname) self._declarations = CGlobalDeclarations(self, xnode) return self._declarations
[docs] def get_max_filename_length(self) -> int: return max([len(x) for x in self.filenames])
[docs] def get_cfile(self) -> CFile: """Returns the CFile object in a single-file project.""" if self.is_singlefile: if 0 in self.files: return self.files[0] else: raise UF.CHCError("Single file has not been initialized") else: raise UF.CHCError( "This application is not a single-file application")
[docs] def get_file(self, fname: str) -> CFile: """Access to file with full relative path and no extension.""" if fname in self.filexref: return self.files[self.filexref[fname]] else: chklogger.logger.error("File not found: %s", fname) raise UF.CHCError(f"File with name {fname} not found")
[docs] def has_file(self, fname: str) -> bool: return fname in self.filexref
[docs] def get_file_by_index(self, index: int) -> CFile: if index in self.files: return self.files[index] else: raise UF.CHCError(f"File with index {index} not found")
[docs] def has_file_index(self, index: int) -> bool: return index in self.files
[docs] def get_function(self, filevar: FileVarReference) -> "CFunction": if self.has_file_index(filevar.fid): cfile = self.get_file_by_index(filevar.fid) if cfile.has_function_by_index(filevar.vid): return cfile.get_function_by_index(filevar.vid) else: raise UF.CHCError( f"Function with index {filevar.vid} not found in file {cfile.name}") else: raise UF.CHCError(f"File with index {filevar.fid} not found")
[docs] def has_function(self, filevar: FileVarReference) -> bool: if self.has_file_index(filevar.fid): cfile = self.get_file_by_index(filevar.fid) return cfile.has_function_by_index(filevar.vid) else: return False
[docs] def get_callsites( self, fid: int, vid: int) -> List[Tuple[Tuple[int, int], "CFunctionCallsiteSPOs"]]: """Return a list of ((fid, vid), callsitespos).""" if (fid, vid) in self.revcallgraph: return self.revcallgraph[(fid, vid)] return []
[docs] def iter_files(self, f: Callable[[CFile], None]) -> None: chklogger.logger.info( "Iter files over %d cfiles", len(list(self.cfiles))) for file in list(self.cfiles): f(file)
[docs] def iter_files_parallel( self, f: Callable[[CFile], None], processes: int) -> None: for cfile in self.cfiles: while len(multiprocessing.active_children()) >= processes: pass p = multiprocessing.Process(target=f, args=(cfile,)) p.start() while len(multiprocessing.active_children()) > 0: pass
[docs] def iter_functions(self, f: Callable[["CFunction"], None]) -> None: def g(fi: CFile) -> None: fi.iter_functions(f) self.iter_files(g)
[docs] def iter_functions_parallel( self, f: Callable[["CFunction"], None], maxprocesses: int) -> None: def g(fi: CFile) -> None: fi.iter_functions(f) self.iter_files_parallel(g, maxprocesses)
[docs] def resolve_vid_function( self, filevar: FileVarReference) -> Optional["CFunction"]: """Returns the function def for the local file-index fid and vid. Note: the function definition may or may not reside in the same file, that is, the c-file with index fid may have a function declaration for the function with index vid, but not a a definition. The location of the definition is obtained from the index manager via the corresponding global index of the (local) vid (as initialized by the linker). """ defvar = self.indexmanager.resolve_vid(filevar) if defvar is not None: if defvar.fid in self.files: deffile = self.files[defvar.fid] if deffile.has_function_by_index(defvar.vid): return deffile.get_function_by_index(defvar.vid) else: chklogger.logger.warning( ("Function definition %d not found in file %d for " + "(fid:%d, vid:%d)"), defvar.vid, defvar.fid, filevar.fid, filevar.vid) return None else: chklogger.logger.warning( "File with index %d not found for (fid: %d, vid: %d)", defvar.fid, filevar.fid, filevar.vid) return None else: chklogger.logger.warning( "No function definition found for (fid: %d, vid: %d)", filevar.fid, filevar.vid) return None
[docs] def convert_vid(self, filevar: FileVarReference, tgtfid: int) -> int: cvid = self.indexmanager.convert_vid(filevar, tgtfid) if cvid is None: raise UF.CHCError("Error in convert_vid: " + str(filevar.vid)) return cvid
[docs] def get_gckey(self, filekey: FileKeyReference) -> Optional[int]: return self.indexmanager.get_gckey(filekey)
[docs] def get_function_by_index(self, index: int) -> "CFunction": for f in self.files: if self.files[f].has_function_by_index(index): return self.files[f].get_function_by_index(index) else: raise UF.CHCError(f"Function with index {index} not found")
[docs] def get_callinstrs(self) -> List["CCallInstr"]: result: List["CCallInstr"] = [] def f(fi: CFile) -> None: result.extend(fi.get_callinstrs()) self.iter_files(f) return result
# ------------------- Application statistics -----------------------------
[docs] def get_line_counts(self) -> str: counts: Dict[str, Tuple[int, int, int]] = {} def f(cfile: CFile) -> None: counts[cfile.name] = ( cfile.declarations.get_max_line(), cfile.declarations.get_code_line_count(), cfile.functioncount, ) self.iter_files(f) flen = self.get_max_filename_length() lines: List[str] = [] lines.append( "file".ljust(flen) + "LOC".rjust(12) + "CLOC".rjust(12) + "functions".rjust(12) ) lines.append("-" * (flen + 36)) for (c, (ml, mc, fc)) in sorted(counts.items()): lines.append( c.ljust(flen) + str(ml).rjust(12) + str(mc).rjust(12) + str(fc).rjust(12) ) lines.append("-" * (flen + 36)) mltotal = sum(x[0] for x in counts.values()) mctotal = sum(x[1] for x in counts.values()) fctotal = sum(x[2] for x in counts.values()) lines.append( "total".ljust(flen) + str(mltotal).rjust(12) + str(mctotal).rjust(12) + str(fctotal).rjust(12) ) return "\n".join(lines)
[docs] def get_project_counts( self, filefilter: Callable[[str], bool] = lambda f: True ) -> Tuple[int, int, int]: linecounts: List[int] = [] clinecounts: List[int] = [] cfuncounts: List[int] = [] def f(cfile: CFile) -> None: if filefilter(cfile.name): decls = cfile.declarations linecounts.append(decls.get_max_line()) clinecounts.append(decls.get_code_line_count()) cfuncounts.append(cfile.functioncount) self.iter_files(f) return (sum(linecounts), sum(clinecounts), sum(cfuncounts))
[docs] def update_spos(self) -> None: """Create supporting proof obligations for all call sites.""" def f(fn: "CFunction") -> None: fn.update_spos() fn.save_spos() fn.save_pod() def h(cfile: CFile) -> None: cfile.iter_functions(f) cfile.save_predicate_dictionary() cfile.save_interface_dictionary() cfile.save_declarations() self.iter_files(h)
[docs] def collect_post_assumes(self) -> None: """Collect postconditions from callee's contracts and add as assume.""" for fi in self.cfiles: fi.collect_post_assumes()
[docs] def distribute_post_guarantees(self) -> None: """add callee postcondition guarantees to call sites as assumptions""" if self.contractpath is None: return # no contracts provided def f(fn: "CFunction") -> None: fn.distribute_post_guarantees() fn.save_spos() fn.save_pod() def h(cfile: CFile) -> None: cfile.iter_functions(f) cfile.save_predicate_dictionary() cfile.save_interface_dictionary() cfile.save_declarations() self.iter_files(h)
[docs] def reinitialize_tables(self) -> None: def f(fi: CFile) -> None: fi.reinitialize_tables() self.iter_files(f)
[docs] def reload_ppos(self) -> None: """Reload primary proof obligations after analyzer has run.""" def f(fn: "CFunction") -> None: fn.reload_ppos() self.iter_functions(f)
[docs] def reload_spos(self) -> None: """Reload supporting proof obligations after analyzer has run.""" def f(fn: "CFunction") -> None: fn.reload_spos() self.iter_functions(f)
[docs] def get_contract_condition_violations( self) -> List[Tuple[str, List[Tuple[str, str]]]]: result: List[Tuple[str, List[Tuple[str, str]]]] = [] def f(fn: "CFunction") -> None: if fn.violates_contract_conditions(): result.append((fn.name, fn.get_contract_condition_violations())) self.iter_functions(f) return result
[docs] def get_ppos(self) -> List["CFunctionPO"]: result: List["CFunctionPO"] = [] def f(fn: "CFunction") -> None: result.extend(fn.get_ppos()) self.iter_functions(f) return result
[docs] def get_spos(self) -> List["CFunctionPO"]: result: List["CFunctionPO"] = [] def f(fn: "CFunction") -> None: result.extend(fn.get_spos()) self.iter_functions(f) return result
[docs] def get_open_ppos(self) -> List["CFunctionPO"]: result: List["CFunctionPO"] = [] def f(fn: "CFunction") -> None: result.extend(fn.get_open_ppos()) self.iter_functions(f) return result
[docs] def get_ppos_violated(self) -> List["CFunctionPO"]: result: List["CFunctionPO"] = [] def f(fn: "CFunction") -> None: result.extend(fn.get_ppos_violated()) self.iter_functions(f) return result
[docs] def get_ppos_delegated(self) -> List["CFunctionPO"]: result: List["CFunctionPO"] = [] def f(fn: "CFunction") -> None: result.extend(fn.get_ppos_delegated()) self.iter_functions(f) return result
[docs] def initialize_files(self, filenames: Dict[int, str]) -> None: self._files = {} for (index, fname) in filenames.items(): self._files[index] = self._initialize_file(index, fname)
def _initialize_from_target_files(self) -> None: chklogger.logger.info("Initialize from target files") tgtxnode = UF.get_targetfiles_xnode(self.targetpath, self.projectname) if tgtxnode is None: raise UF.CHCXmlParseError(self.targetpath, 0, (0, 0)) self._files = {} for c in tgtxnode.findall("c-file"): cfilename_c = c.get("name") if cfilename_c is None: raise UF.CHCXmlParseError(self.targetpath, 0, (0, 0)) if cfilename_c in self.excludefiles: continue id = c.get("id") if id is None: chklogger.logger.error( "No id found for target file %s", cfilename_c) else: fid = int(id) self._files[fid] = self._initialize_file(fid, cfilename_c)
[docs] def initialize_single_file(self, fname: str) -> None: xcfile = UF.get_cfile_xnode( self.targetpath, self.projectname, None, fname) if xcfile is not None: cfile = CFile(self, 0, fname, None) self._files = {} self._files[0] = cfile self.indexmanager.add_file(cfile) chklogger.logger.info("Single c file was initialized %s", fname) else: chklogger.logger.error("c_file could not be extracted %s", fname)
def _initialize_file(self, index: int, fname: str) -> CFile: xcfilepath = os.path.dirname(fname) cfilename = os.path.basename(fname) if xcfilepath == "": cfilepath: Optional[str] = None else: cfilepath = xcfilepath chklogger.logger.info( "Initialize file name: %s, file path: %s", cfilename, str(cfilepath)) cfile = CFile(self, index, cfilename[:-2], cfilepath) self.indexmanager.add_file(cfile) chklogger.logger.info("initialized cfile %s", fname) return cfile @property def callgraph(self) -> Dict[ Tuple[int, int], List[Tuple[Tuple[int, int], "CFunctionCallsiteSPOs"]]]: if self._callgraph is None: self._callgraph = {} for (fid, cfile) in self.files.items(): for (vid, cfun) in cfile.functions.items(): for cs in cfun.proofs.spos.callsite_spos.values(): if cs.has_callee() and cs.callee is not None: fncallee = FileVarReference(fid, cs.callee.vid) fundef = self.indexmanager.resolve_vid(fncallee) if fundef is not None: self._callgraph.setdefault((fid, vid), []) self._callgraph[(fid, vid)].append( (fundef.tuple, cs)) return self._callgraph @property def revcallgraph(self) -> Dict[ Tuple[int, int], List[Tuple[Tuple[int, int], "CFunctionCallsiteSPOs"]]]: if self._revcallgraph is None: self._revcallgraph = {} for s in self.callgraph: for (t, cs) in self.callgraph[s]: self._revcallgraph.setdefault(t, []) self._revcallgraph[t].append((s, cs)) return self._revcallgraph