diff --git a/tools/agentic_import/metrics/pvmap_generator_metrics.py b/tools/agentic_import/metrics/pvmap_generator_metrics.py new file mode 100644 index 0000000000..73ceeabf5a --- /dev/null +++ b/tools/agentic_import/metrics/pvmap_generator_metrics.py @@ -0,0 +1,245 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities to generate metrics for PV map generation for statvar imports. +""" + +import os +import sys +import subprocess +import tempfile +import time + +from absl import app +from absl import flags +from absl import logging + +_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_DIR) +sys.path.append(os.path.dirname(_SCRIPT_DIR)) +sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR))) +sys.path.append( + os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(_SCRIPT_DIR)), 'util'))) +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(_SCRIPT_DIR)), + 'statvar_importer')) + +import file_util +from counters import Counters +from mcf_diff import diff_mcf_files +from mcf_file_util import get_value_list +from stat_var_test_runner import StatVarProcessorTestRunner, run_script + + +class PVMapGeneratorMetricsRunner(StatVarProcessorTestRunner): + """Class to generate metrics for PV map generation for statvar imports. + + The metrics are generated by running the statvar processor with the test + config and comparing the output with the expected output. + The files for pvmap egenration are specified in the test config as follows: + "test_name": "", + ... + "pvmap_generator_test": { + "pvmap_generator_args": { + "input_data": "", + "output_data": "", + }, + "expected_output_files": [ + "", + "", + ... + ], + } + + """ + + def __init__(self, test_config_file: str = None, test_config: dict = None): + super().__init__(test_config_file, test_config) + + def run_pvmap_generator(self, test_config: dict) -> dict: + """Runs a single instance of pvmap generator.""" + test_name = test_config.get('test_name', 'pvmap_generator_test') + test_dir = os.path.join(self._temp_dir.name, test_name) + output_dir = os.path.join(test_dir, 'generated') + logging.info( + f'Running pvmap generator for test {test_name}, output in {output_dir}, config: {test_config}' + ) + env_dict = dict(self.get_env_dict()) + env_dict.update(get_env_dict(test_config.get("env_file"))) + # Build the pvmap generator commandline + cmd_args = dict() + # Add default arguments for output + + cmd_args = merged_args([ + { + '--output_path': output_dir + }, + test_config.get('pvmap_generator_args', {}), + ]) + output_arg = get_arg(cmd_args, '--output_path') + if output_arg: + output_dir = os.path.realpath(output_arg) + if not output_dir.endswith('/'): + output_dir = os.path.dirname(output_dir) + + cwd = self.get_local_dir() + script_status = run_script( + interpreter=sys.executable, + script=os.path.join(os.path.dirname(_SCRIPT_DIR), + 'pvmap_generator.py'), + args=cmd_args, + cwd=cwd, + env=env_dict, + output_dir=output_dir, + log_dir=os.path.join(output_dir, 'debug'), + ) + + # Check for generated files + generated_config = { + 'pv_map': os.path.join(output_dir, 'generated_pvmap.csv'), + 'config_file': os.path.join(output_dir, 'generated_metadata.csv'), + } + script_status['generated_config'] = generated_config + for arg, file in generated_config.items(): + if not os.path.exists(file): + logging.error( + f'Failed to generate {file} for test: {test_name}') + script_status['status'] = 'FAIL' + + return script_status + + def run_sample_statvar_processor(self, test_config: dict, + generated_config: dict) -> dict: + """Run statvar processor on the sample input using the generated configs.""" + test_name = test_config.get('test_name', 'sample_statvar_processor') + test_dir = os.path.join(self._temp_dir.name, test_name) + output_dir = os.path.join(test_dir, 'sample') + env_dict = dict(self.get_env_dict()) + env_dict.update(get_env_dict(test_config.get("env_file"))) + + # Get commandline args for stavtar processor + # to use generated pvmap files. + cmd_args = merged_args([ + generated_config, + { + '--output_path': + output_dir, + '--output_data_pvs': + os.path.join(output_dir, 'output_data_pvs.csv'), + }, + ]) + script_status = self.run_statvar_processor(test_config, cmd_args) + + return script_status + + def generate_metrics(self, test_config: dict, test_status: dict) -> dict: + """Generates metrics for a test config. + + Args: + test_config: Dictionary with the test config. + + Returns: + Dictionary with the metrics. + """ + test_name = test_config.get('test_name', 'sample_statvar_processor') + test_dir = os.path.join(self._temp_dir.name, test_name) + outputs = test_config.get('pvmap_generator_outputs') + output_stats = self.verify_outputs(outputs, test_config, + test_status.get('output_dir')) + stats = {} + for output_name, stats in output_stats: + output_stats = output.get('counters', {}) + output_stats = self.get_stats_from_diff_counters(output_stats) + file_util.file_write_csv_dict( + output_stats, + os.path.join(test_dir, output_name + '-diff-counters.csv')) + stats[output_name] = output_stats + logging.info(f'Diff stats for {output_name}: {output_stats}') + + return stats + + def get_stats_from_diff_counters(self, + diff_stats: dict, + stats: dict = None) -> dict: + """Returns precision and recall stats from diff counters. + + Args: + diff_stats: diff counters for diff between actual and expected output files. + stats: output stats dictionary into which stats are added. + + """ + if stats is None: + stats = dict({ + 'false_positive': 0, + 'false_negative': 0, + 'true_positive': 0, + }) + counters = diff_stats.get('counters', {}) + matched = counters.get('pvs-matched', 0) + false_negative = counters.get('pvs-added', 0) + false_positive = counters.get('pvs-deleted', 0) + # Treat modfied as deleted+added + modified = counters.get('pvs-modified', 0) + false_negative += modified + false_positive += modified + + stats['true_positive'] += matched + stats['false_positive'] += false_positive + stats['false_negative'] += false_negative + + # Compute precision and recall + true_positive = stats['true_positive'] + precision = true_positive / (max( + true_positive + stats['false_positive'], 1)) + recall = true_positive / (max(true_positive + stats['false_negative'], + 1)) + + stats['precision'] = precision + stats['recall'] = recall + stats['f1'] = 2 * (precision * recall) / max(precision + recall, 1) + + return stats + + def run_test(self, test_output: str = None) -> dict: + """Runs the pvmap generator and returns the comparison metrics. + + Args: + test_output: JSON file with the test output status + + Returns: + JSON dict with the test summary + """ + cwd = self.get_local_dir() + os.chdir(cwd) + return_status = {'status': 'PASS'} + for index, test_config in enumerate(self._test_config): + test_status = {} + test_name = test_config.get('test_name') + if not test_name: + test_name = f'pvmap-generator-test-{index}' + basedir = os.path.basename(self.get_local_dir()) + if basedir: + test_name += '-' + basedir + test_config['test_name'] = test_name + logging.info(f'Running pvmap generator test: {index}:{test_name}') + pvmap_generator_status = self.run_pvmap_generator(test_config) + test_status['pvmap_generator_status'] = pvmap_generator_status + + logging.info( + f'Running statvar processor for test: {index}:{test_name}') + statvar_status = self.run_sample_statvar_processor( + test_config, pvmap_generator_status.get('generated_config', {})) + + test_status['statvar_processor_status'] = statvar_status + diff --git a/tools/statvar_importer/mcf_diff.py b/tools/statvar_importer/mcf_diff.py index e7eaa825f8..eae35b90cd 100644 --- a/tools/statvar_importer/mcf_diff.py +++ b/tools/statvar_importer/mcf_diff.py @@ -190,10 +190,11 @@ def diff_mcf_node_pvs(node_1: dict, pvs_deleted = set() pvs_added = set() pvs_modified = set() + pvs_matched = set() for d in diff: diff_str.append(d) - if d[0] == ' ': - counters and counters.add_counter(f'PVs-matched', 1) + if d[0] == ' ' and len(d) > 1: + counters and counters.add_counter(f'pvs-matched', 1) else: prop = '' value = '' @@ -204,10 +205,16 @@ def diff_mcf_node_pvs(node_1: dict, if prop: pvs_deleted.add(prop) has_diff = True - if d[0] == '+': + elif d[0] == '+': if prop: pvs_added.add(prop) has_diff = True + elif d[0] == '?': + # Ignore modifications that already show as delete/add + has_diff = True + else: + if prop: + pvs_matched.add(prop) if has_diff: pvs_modified = pvs_deleted.intersection(pvs_added) pvs_added = pvs_added.difference(pvs_modified) @@ -217,6 +224,7 @@ def diff_mcf_node_pvs(node_1: dict, (pvs_modified, 'modified'), (pvs_added, 'added'), (pvs_deleted, 'deleted'), + (pvs_matched, 'matched'), ]: for prop in props: counters.add_counter(f'pvs-{diff_type}', 1) @@ -230,7 +238,9 @@ def diff_mcf_node_pvs(node_1: dict, else: counters and counters.add_counter(f'nodes-missing-in-mcf1', 1) else: - counters and counters.add_counter(f'nodes-matched', 1) + if counters: + counters.add_counter(f'nodes-matched', 1) + counters.add_counter(f'pvs-matched', len(pvs_matched)) return has_diff, '\n'.join(diff_str), pvs_added, pvs_deleted, pvs_modified diff --git a/tools/statvar_importer/stat_var_processor.py b/tools/statvar_importer/stat_var_processor.py index 487cce556f..d0184e022c 100644 --- a/tools/statvar_importer/stat_var_processor.py +++ b/tools/statvar_importer/stat_var_processor.py @@ -83,7 +83,7 @@ from mcf_filter import drop_existing_mcf_nodes from mcf_diff import fingerprint_node, fingerprint_mcf_nodes, diff_mcf_node_pvs from place_resolver import PlaceResolver -from property_value_mapper import PropertyValueMapper +from property_value_mapper import PropertyValueMapper, write_pv_map from schema_resolver import SchemaResolver from json_to_csv import file_json_to_csv from schema_generator import generate_schema_nodes, generate_statvar_name @@ -1429,6 +1429,8 @@ def __init__( self._config.get('numeric_data_key', 'Number'), self._config.get('pv_lookup_key', 'Key'), ] + # Save per input cell PVs if required. + self._data_pvs = {} if self._config.get('output_data_pvs') else None def generate_pvmap(self): """Generate a PV Map from the input data.""" @@ -2272,6 +2274,8 @@ def process_row(self, row: list, row_index: int): row_index, col_index + 1) and self.process_stat_var_obs_pvs( merged_col_pvs, row_index, col_index): row_svobs += 1 + if self._data_pvs is not None: + self._data_pvs[self._file_context] = merged_col_pvs self.process_row_header_pvs(row, row_index, row_col_pvs, row_svobs, cols_with_pvs) # If row has no SVObs but has PVs, it must be a header. @@ -2818,6 +2822,11 @@ def write_outputs(self, output_path: str): columns=self._config.get('output_columns', []), output_tmcf_file=output_tmcf_file, ) + output_data_pvs = self._config.get('output_data_pvs') + if output_data_pvs and self._data_pvs: + write_pv_map(self._data_pvs, output_data_pvs) + self._counters.add_counter('output-data-pvs', len(self._data_pvs)) + self._counters.print_counters() counters_filename = self._config.get('output_counters', output_path + '_counters.txt') diff --git a/tools/statvar_importer/stat_var_test_runner.py b/tools/statvar_importer/stat_var_test_runner.py new file mode 100644 index 0000000000..ed9a6fe99b --- /dev/null +++ b/tools/statvar_importer/stat_var_test_runner.py @@ -0,0 +1,524 @@ +# Copyright 2026 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utilities to test statvar imports. + +StatVar imports use a JSON test config for regression tests that invoke +statvar processor with a specified set of configs on a test input and +verifies the expected output files. +The test config is a list of statvar processor command line invocation +settings with each invocation as a dictionary with the following sections:a + test_name: name of the test for logs + env_file: a file with environment variables such as API keys + statvar_processor_args: dictionary with the command line arguments + for statvar processor configs. + expected_outputs: + A dictionary of expected output files mapped to output files. + + All file references are releative to the import folder with the + test config. + +Example: +[ + # Parameters for a single invocation of statvar processor + { + "test_name": "Test_Import", + + # Enviroment file loaded with dictionary of env settings + # such as API keys + "env_file": "gs://datcom-prod-imports/config/test_env.csv" + + # Statvar processor command line arguments + "statvar_processor_args": { + "config_file": "metadata.csv", + "pv_map": "import_pvmap.csv", + "places_resolved_csv": "places.csv", + "input_data": "test_data/sample_input.csv", + }, + + # Statvar processor output files with expected outputs to compare with + "statvar_processor_outputs": [ + { + "output_name": "Test_Import_Observations", + "output_file": "", + "expected_output_file": "test_data/sample_output.csv", + }, + ] + } +] + +""" + +import os +import sys +import subprocess +import tempfile +import time + +from absl import app +from absl import flags +from absl import logging + +_SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(_SCRIPT_DIR) +sys.path.append(os.path.dirname(_SCRIPT_DIR)) +sys.path.append(os.path.dirname(os.path.dirname(_SCRIPT_DIR))) +sys.path.append( + os.path.join(os.path.dirname(os.path.dirname(_SCRIPT_DIR)), 'util')) + +import file_util +from mcf_diff import diff_mcf_files +from mcf_file_util import get_value_list + +from counters import Counters + +flags.DEFINE_string('test_config', '', 'JSON test config file') +flags.DEFINE_string('test_output', '', 'Output JSON file with test summary') + +_FLAGS = flags.FLAGS + +_BASH_PATH = os.environ.get('BASH_PATH', 'bash') + + +class StatVarProcessorTestRunner: + """Class to run statvar processor for a test config + """ + + def __init__(self, test_config_file: str = None, test_config: dict = None): + self._temp_dir = tempfile.TemporaryDirectory() + self._test_config = [] + if test_config: + self._test_config.extend(test_config) + self._env_dict = {} + self.load_config(test_config_file) + self._counters = Counters() + + def __del__(self): + """Cleanup local files.""" + self._temp_dir.cleanup() + + def load_config(self, test_config_file: str): + """Loads a test config from a JSON file.""" + if test_config_file: + file_config = file_util.file_load_py_dict(test_config_file) + logging.info(f'Loading test config: {file_config}') + self._test_config.extend(file_config) + self._test_config_file = os.path.realpath(test_config_file) + self._env_dict.update( + get_env_dict(self._test_config[0].get('env_file', ''))) + + def get_config(self) -> dict: + """Returns the test config.""" + return self._test_config + + def get_local_dir(self) -> str: + """Returns the local directory for the test.""" + if self._test_config_file: + return os.path.dirname(self._test_config_file) + return os.getcwd() + + def get_env_dict(self) -> dict: + """Returns the environment variables for the test.""" + return self._env_dict + + def run_tests(self, test_output: str = None) -> dict: + """Runs all the statvar processor test and verified each output. + + Args: + test_output: JSON file with the test output status + + Returns: + JSON dict with the test summary + """ + cwd = self.get_local_dir() + os.chdir(cwd) + logging.info( + f'Running statvar processor for {len(self._test_config)} tests from {cwd}' + ) + return_status = {'status': 'PASS'} + for index, test_config in enumerate(self._test_config): + test_name = test_config.get('test_name') + if not test_name: + test_name = f'statvar-processor-test-{index}' + basedir = os.path.basename(self.get_local_dir()) + if basedir: + test_name += '-' + basedir + test_config['test_name'] = test_name + logging.info(f'Running statvar processor test: {index}:{test_name}') + test_status = self.run_statvar_processor(test_config) + if test_status.get('returncode', 1) != 0: + logging.error( + f'statvar processor test failed for {test_name}: {test_status}' + ) + return_status['status'] = 'FAIL' + else: + outputs = test_config.get('statvar_processor_outputs') + if outputs: + output_status = self.verify_outputs( + outputs, test_config, test_status.get('output_dir')) + if output_status.get('status', '') != 'PASS': + logging.error( + f'Failed to verify outputs for test: {index}:{test_name}:{output_status}' + ) + return_status['status'] = 'FAIL' + test_status['output_status'] = output_status + return_status[test_name] = test_status + logging.info( + f'statvar processor test: {index}:{test_name}: {return_status["status"]}' + ) + if test_output: + file_util.file_write_py_dict(return_status, test_output) + return return_status + + def run_statvar_processor(self, + config: dict, + override_args: dict = {}) -> dict: + """Runs a single instance of statvar processor.""" + test_name = config.get('test_name', 'statvar_processor_test') + test_dir = os.path.join(self._temp_dir.name, test_name) + output_dir = os.path.join(test_dir, 'output') + env_dict = dict(self.get_env_dict()) + env_dict.update(get_env_dict(config.get("env_file"))) + # Build the statvar processor commandline + cmd_args = merge_args([ + { + '--output_path': '"{output_dir}"', + '--output_counters': '"{output_dir}/statvar_counters.json"', + }, + config.get('statvar_processor_args', {}), + override_args, + ]) + output_arg = get_arg(cmd_args, '--output_path') + if output_arg: + output_dir = os.path.realpath(output_arg) + if not output_dir.endswith('/'): + output_dir = os.path.dirname(output_dir) + cwd = self.get_local_dir() + script_status = run_script( + interpreter=sys.executable, + script=os.path.join(_SCRIPT_DIR, 'stat_var_processor.py'), + args=cmd_args, + cwd=cwd, + env=env_dict, + output_dir=output_dir, + log_dir=os.path.join(output_dir, 'debug'), + ) + script_status['output_dir'] = output_dir + return script_status + + def verify_outputs(self, outputs: list[dict], config: dict, + output_path: dir) -> dict: + """Compare actual and expected outputs. + + Args: + outputs: list of expected and actual outputs to be compared. + each item in the list is a dictionary: + { + output_name: (optional) name of the output for debug and logs + output_file: actual output file to be compared. + this is a local file or path relative to output_path + expected_output_file: path to expected file to be compared with + diff_config: (optional) dictionary with diff configs such as: + ignore_property: 'name' + } + + Returns: + dictionary with the overall status as well as a list + of status per output. + { + 'status': 'PASS' if there are no diffs else 'FAIL' + + # diff summary per output + '': { + + 'diff_status': 'MATCHED' or 'DELETED' or 'MODIFIED' + 'deleted': count of deleted nodes + 'modified': count of modified nodes + }, + } + """ + logging.info(f'Comparing outputs: {outputs}') + return_status = {'status': 'PASS'} + for index, output in enumerate(outputs): + expected_files = file_util.file_get_matching( + output.get('expected_output_file')) + if not expected_files: + logging.warning( + f'Ignoring config without expected file: {output}') + continue + output_name = output.get('name') + if not output_name: + output_name = f'output_{index}_{os.path.basename(expected_files[0])}' + if output_name in return_status: + output_name = output_name + '-' + str(index) + actual_files = output.get('output_file') + actuals = [] + for file in get_value_list(actual_files): + matching_files = file_util.file_get_matching( + os.path.join(output_path, file)) + if not matching_files: + matching_files = file_util.file_get_matching(file) + if matching_files: + actuals.extend(matching_files) + if not actuals: + logging.error(f'Missing outputs for {output}') + return_status['status'] = 'ERROR' + diff_status = diff_files( + actuals, expected_files, output.get('diff_config', {}), + os.path.join(output_path, f'{output_name}.diff')) + if diff_status.get('diff_status') != 'MATCHED': + return_status['status'] = 'FAIL' + logging.error( + f'Failed to match {index}:{output}, diff: {diff_status}') + return_status[output_name] = diff_status + return_status[output_name]['name'] = output_name + logging.info( + f'Verify outputs: {return_status.get("status")} for {outputs}') + return return_status + + +def merge_args(self, args: list[dict]) -> dict: + """Returns a dictionary of command line args from a list of args. + + Args: + args: list of dictionary of command line argument and valies: + [ + { 'arg1': 'value1', ...}, + { ...}, + ... + } + + Returns: + Dictionary of command line args with the value from the last arg. + """ + + if not isinstance(args, list): + args = list(args) + + # Merge arguments keeping the last value + merged_args = dict() + for args_dict in args: + for arg, value in args_dict.items(): + if value is not None and not arg.startswith('--'): + arg = '--' + arg + merged_args[arg] = value + + return merged_args + + +def get_args_list(self, args: dict) -> list: + """Returns a list of command line args.""" + cmd_args = [] + if isinstance(args, dict): + for arg, val in args.items(): + if val: + cmd_args.append(f'{arg}={val}') + else: + cmd_args.append(f'{arg}') + elif isinstance(args, list): + cmd_args.extend(args) + else: + cmd_args.append(args) + return cmd_args + + +def get_arg(self, args_dict: dict, arg: str, default=None) -> str: + """Returns the value of a specific arg if present.""" + return args_dict.get(arg, default) + + +def get_env_dict(filename: str) -> dict: + """Returns a dictionary of env variable settings from a file. + + Args: + filename: file with the environemnt settings. + It can be a csv, json, txt or shell file. + + Returns: + dictionary of env variable to values mappings. + """ + env_dict = {} + if isinstance(filename, dict): + # arguent is a dictionary of env variables. Use it as is. + return filename + + env_files = file_util.file_get_matching(filename) + for env_file in env_files: + _, file_ext = os.path.splitext(env_file) + if file_ext == '.sh' or file_ext == ".txt" or file_ext == '.env': + # File has one variable per line + with file_util.FileIO(env_file) as fp: + for line in fp.readlines(): + line = line.strip() + if line.startswith('#'): + # Ignore commented lines + continue + if '=' in line: + env_var, value = line.split('=', 1) + env_var = env_var.strip().strip('"') + value = value.strip().strip('"') + env_dict[env_var] = value + else: + # Assume the file is a dictionary + env_dict.update(file_util.file_load_py_dict(env_file)) + + logging.info(f'Loaded {len(env_dict)} env vars from {env_files}') + return env_dict + + +def run_script(interpreter: str, script: str, args: list | dict, cwd: str, + env: dict, output_dir: str, log_dir: str) -> dict: + """Run a commandline script as a child process + + Args: + interpreter: interpreter for the script such as python, bash + script: local or full path to the script file. + args: command line arguments for the script as a list + cwd: current directory for the script + env: dictionary of environment variables. + output_dir: fodler for output files generated by the script, if any. + log_dir: directory to store logs. + + Returns: + dictionary of command and status + """ + cmd_args = [] + if interpreter: + cmd_args.append(interpreter) + else: + if script and script.endswith('.py'): + # For python scripts use the current script's python binary + interpreter = sys.executable + if script and script.endswith('.sh'): + # For shell scripts use bash + interpreter = _BASH_PATH + if script: + cmd_args.append(script) + if args: + cmd_args.extend(get_args_list(args)) + + logging.info(f'Running command: {cmd_args} in {cwd}, env: {env}') + env_dict = dict(os.environ) + if env: + env_dict.update(env) + start_time = time.perf_counter() + process = subprocess.Popen( + cmd_args, + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env_dict, + ) + + # Log output continuously until the command completes. + stderr_file = os.path.join(log_dir, 'stderr.log') + stdout_file = os.path.join(log_dir, 'stdout.log') + with file_util.FileIO(stderr_file, 'wb') as f_err: + for line in process.stderr: + f_err.write(line) + logging.info(f'stderr: {line}') + with file_util.FileIO(stdout_file, 'wb') as f_out: + for line in process.stdout: + f_out.write(line) + logging.info(f'stdout: {line}') + + # Wait for process to complete + process.wait() + end_time = time.perf_counter() + return_code = process.returncode + duration = end_time - start_time + + logging.info( + f'Completed command: {cmd_args}, return code: {return_code}, time: {duration} secs, logs in: {log_dir}' + ) + + return { + 'command': ' '.join(cmd_args), + 'args': args, + 'cwd': cwd, + 'returncode': return_code, + 'output_path': output_dir, + 'debug': log_dir, + 'duration': duration, + 'stdout': stdout_file, + 'stderr': stderr_file, + } + + +def diff_files(actual: str, + expected: str, + diff_config: dict = {}, + diff_output_file: str = None) -> dict: + """Compares actual and expected files and returns dictionary with results. + + Args: + actual: list of actual files as csv or mcf. + expected: list of expected output files as csv or mcf + diff_config: dictionary of diff settings (refer to mcf_diff:diff_mcf_files) + compare_dcids: list of dcids to be compared + compare_nodes_with_pv: only compare nodes that have a listed propeorty:value + ignore_nodes_with_pv: ignore nodes with any of the property:value listed + compare_property: only compare listed propeorties in a node + + Returns: + { + status: MATCH or DELETED or MODIFIED + diff_log: log file with all the text style diffs with +|- prefixes. + missing: count of expected nodes missing in actual + modified: count of nodes expected nodes with modified pvs in actual + sample: a sample of 100 lines of diff output + } + """ + + counters = Counters() + diff_str = diff_mcf_files(actual, expected, diff_config, counters) + matched = counters.get_counter('nodes-matched') + deleted = counters.get_counter('dcid-missing-in-nodes1') + added = counters.get_counter('dcid-missing-in-nodes2') + modified = counters.get_counter('nodes-with-diff') + status = 'MATCHED' + if modified: + status = 'MODIFIED' + if deleted: + status = 'DELETED' + return_status = { + 'actual': actual, + 'expected': expected, + 'diff_status': status, + 'missing': deleted, + 'modified': modified, + 'added': added, + 'matched': matched, + 'counters': counters.get_counters(), + } + logging.info(f'Diff summary: {return_status}') + + if diff_str: + if diff_output_file: + with file_util.FileIO(diff_output_file, 'w') as diff_file: + diff_file.write(diff_str) + return_status['diff_output'] = diff_output_file + return_status['diff_sample'] = diff_str[:1000] + + return return_status + + +def main(_): + statvar_processor_tester = StatVarProcessorTestRunner( + test_config_file=_FLAGS.test_config) + test_result = statvar_processor_tester.run_tests(_FLAGS.test_output) + logging.info(f'Test result: {test_result}') + + +if __name__ == '__main__': + app.run(main)