microsoft · nick863 · May 13, 2024 · May 11, 2024 · May 11, 2024 · May 11, 2024
@@ -2,9 +2,7 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # ---------------------------------------------------------
 import inspect
-import os
 import re
-import tempfile
 from typing import Any, Callable, Dict, Optional, Set, Tuple
 
 import pandas as pd
@@ -139,19 +137,29 @@ def _apply_target_to_data(
  # Remove input and output prefix
  prefix = "outputs."
  rename_dict = {col: col[len(prefix):] for col in target_output.columns if col.startswith(prefix)}
+ # Sometimes user data may contain column named the same as the one generated by target.
+ # In this case we will not rename the column.
+ generated_columns = set(rename_dict.values())
+ tgt_out_and_input = set()
+ for col in initial_data.columns:
+ if col in generated_columns:
+ tgt_out = f'{prefix}{col}'
+ del rename_dict[tgt_out]
+ tgt_out_and_input.add(tgt_out)
+ tgt_out_and_input.add(col)
  # Sort output by line numbers
  target_output.set_index(f"inputs.{LINE_NUMBER}", inplace=True)
  target_output.sort_index(inplace=True)
  target_output.reset_index(inplace=True, drop=False)
  # target_output contains only input columns, taken by function,
  # so we need to concatenate it to the input data frame.
- drop_columns = set(target_output.columns) - set(rename_dict.keys())
+ drop_columns = set(target_output.columns) - set(rename_dict.keys()) - tgt_out_and_input
  target_output.drop(drop_columns, inplace=True, axis=1)
  # Remove outputs. prefix
  target_output.rename(columns=rename_dict, inplace=True)
  # Concatenate output to input
  target_output = pd.concat([target_output, initial_data], axis=1)
- return target_output, set(rename_dict.values()), run
+ return target_output, generated_columns, run
 
 
 def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace: bool = False):
@@ -163,17 +171,34 @@ def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace
 
  if mapping_config:
  column_mapping = {}
+ columns_to_drop = set()
  pattern_prefix = "data."
+ run_outputs_prefix = "run.outputs."
 
  for map_to_key, map_value in mapping_config.items():
  match = re.search(r"^\${([^{}]+)}$", map_value)
  if match is not None:
  pattern = match.group(1)
  if pattern.startswith(pattern_prefix):
- map_from_key = pattern.split(pattern_prefix)[1]
- column_mapping[map_from_key] = map_to_key
-
- result_df = source_df.rename(columns=column_mapping, inplace=inplace)
+ column_mapping[pattern[len(pattern_prefix):]] = map_to_key
+ elif pattern.startswith(run_outputs_prefix):
+ map_from_key = pattern[len(run_outputs_prefix):]
+ col_outputs = f'outputs.{map_from_key}'
+ # If data set had target-generated column before application of
+ # target, the column will have "outputs." prefix. We will use
+ # target-generated column for validation.
+ if col_outputs in source_df.columns:
+ # If column needs to be mapped to already existing column.
+ if map_to_key in source_df.columns:
+ columns_to_drop.add(map_to_key)
+ column_mapping[col_outputs] = map_to_key
+ else:
+ column_mapping[map_from_key] = map_to_key
+ # If some columns, which has to be dropped actually can renamed,
+ # we will not drop it.
+ columns_to_drop = columns_to_drop - set(column_mapping.keys())
+ result_df = source_df.drop(columns=columns_to_drop, inplace=inplace)
+ result_df.rename(columns=column_mapping, inplace=True)
 
  return result_df
 
@@ -199,12 +224,39 @@ def _process_evaluator_config(evaluator_config: Dict[str, Dict[str, str]]):
  "Ensure only ${target.} and ${data.} are used."
  )
 
- # Replace ${target.} with ${data.}
- processed_config[evaluator][map_to_key] = map_value.replace("${target.", "${data.")
+ # Replace ${target.} with ${run.}
+ processed_config[evaluator][map_to_key] = map_value.replace("${target.", "${run.outputs.")
 
  return processed_config
 
 
+def _rename_columns_maybe(df: pd.DataFrame, target_generated: Set[str]):
+ """
+ Change the column names for data frame. The change happens inplace.
+
+ The columns with "outputs." prefix will not be changed. "outputs." prefix will
+ will be added to columns in target_generated set. The rest columns will get
+ ".inputs" prefix.
+ :param df: The data frame to apply changes to.
+ :param target_generated: The columns generated by the target.
+ :return: The changed data frame.
+ """
+ rename_dict = {}
+ for col in df.columns:
+ outputs_col = f'outputs.{col}'
+ # Do not rename columns with "outputs."
+ if 'outputs.' in col and col[len('outputs.'):] in target_generated:
+ continue
+ # If target has generated outputs.{col}, we do not need to rename column
+ # as it is actually input. Otherwise add outputs. prefix.
+ if col in target_generated and outputs_col not in df.columns:
+ rename_dict[col] = outputs_col
+ else:
+ rename_dict[col] = f'inputs.{col}'
+ df.rename(columns=rename_dict, inplace=True)
+ return df
+
+
 def evaluate(
  *,
  evaluation_name: Optional[str] = None,
@@ -256,60 +308,70 @@ def evaluate(
  if data is not None and target is not None:
  input_data_df, target_generated_columns, target_run = _apply_target_to_data(target, data, pf_client,
  input_data_df, evaluation_name)
+
+ # Make sure, the default is always in the configuration.
+ if not evaluator_config:
+ evaluator_config = {}
+ if 'default' not in evaluator_config:
+ evaluator_config['default'] = {}
+
+ for evaluator_name, mapping in evaluator_config.items():
+ mapped_to_values = set(mapping.values())
+ for col in target_generated_columns:
+ # If user defined mapping differently, do not change it.
+ # If it was mapped to target, we have already changed it
+ # in _process_evaluator_config
+ run_output = f'${{run.outputs.{col}}}'
+ # We will add our mapping only if
+ # customer did not mapped target output.
+ if col not in mapping and run_output not in mapped_to_values:
+ evaluator_config[evaluator_name][col] = run_output
+
  # After we have generated all columns we can check if we have
  # everything we need for evaluators.
  _validate_columns(input_data_df, evaluators, target=None, evaluator_config=evaluator_config)
 
  evaluator_info = {}
 
- with tempfile.TemporaryDirectory() as d:
- data_file = data
- if target_generated_columns:
- data_file = os.path.join(d, "input.jsonl")
- input_data_df.to_json(data_file, orient="records", lines=True)
-
- for evaluator_name, evaluator in evaluators.items():
- evaluator_info[evaluator_name] = {}
- evaluator_info[evaluator_name]["run"] = pf_client.run(
- flow=evaluator,
- run=target_run,
- column_mapping=evaluator_config.get(evaluator_name, evaluator_config.get("default", None)),
- data=data_file,
- stream=True,
- )
-
- evaluators_result_df = None
- for evaluator_name, evaluator_info in evaluator_info.items():
- evaluator_result_df = pf_client.get_details(evaluator_info["run"], all_results=True)
-
- # drop input columns
- evaluator_result_df = evaluator_result_df.drop(
- columns=[col for col in evaluator_result_df.columns if col.startswith("inputs.")]
- )
-
- # rename output columns
- # Assuming after removing inputs columns, all columns are output columns
- evaluator_result_df.rename(
- columns={
- col: "outputs." f"{evaluator_name}.{col.replace('outputs.', '')}"
- for col in evaluator_result_df.columns
- },
- inplace=True,
- )
-
- evaluators_result_df = (
- pd.concat([evaluators_result_df, evaluator_result_df], axis=1, verify_integrity=True)
- if evaluators_result_df is not None
- else evaluator_result_df
- )
+ for evaluator_name, evaluator in evaluators.items():
+ evaluator_info[evaluator_name] = {}
+ evaluator_info[evaluator_name]["run"] = pf_client.run(
+ flow=evaluator,
+ run=target_run,
+ column_mapping=evaluator_config.get(evaluator_name, evaluator_config.get("default", None)),
+ data=data,
+ stream=True,
+ )
+
+ evaluators_result_df = None
+ for evaluator_name, evaluator_info in evaluator_info.items():
+ evaluator_result_df = pf_client.get_details(evaluator_info["run"], all_results=True)
+
+ # drop input columns
+ evaluator_result_df = evaluator_result_df.drop(
+ columns=[col for col in evaluator_result_df.columns if col.startswith("inputs.")]
+ )
+
+ # rename output columns
+ # Assuming after removing inputs columns, all columns are output columns
+ evaluator_result_df.rename(
+ columns={
+ col: "outputs." f"{evaluator_name}.{col.replace('outputs.', '')}"
+ for col in evaluator_result_df.columns
+ },
+ inplace=True,
+ )
+
+ evaluators_result_df = (
+ pd.concat([evaluators_result_df, evaluator_result_df], axis=1, verify_integrity=True)
+ if evaluators_result_df is not None
+ else evaluator_result_df
+ )
 
  # Rename columns, generated by template function to outputs instead of inputs.
- input_data_df.rename(
- columns={
- col: f"{'outputs' if col in target_generated_columns else 'inputs'}.{col}" for col in input_data_df.columns
- },
- inplace=True,
- )
+ # If target generates columns, already present in the input data, these columns
+ # will be marked as outputs already so we do not need to rename them.
+ input_data_df = _rename_columns_maybe(input_data_df, target_generated_columns)
 
  result_df = pd.concat([input_data_df, evaluators_result_df], axis=1, verify_integrity=True)
  metrics = _calculate_mean(evaluators_result_df)

@@ -11,3 +11,9 @@ def target_fn(question: str) -> str:
 def target_fn2(question: str) -> str:
  answer = target_fn(question)["answer"]
  return {"response": answer}
+
+
+def target_fn3(question: str) -> str:
+ response = target_fn(question)
+ response['question'] = f'The question is as follows: {question}'
+ return response
@@ -27,6 +27,10 @@ def answer_evaluator(answer):
  return {"length": len(answer)}
 
 
+def question_evaluator(question):
+ return {"length": len(question)}
+
+
 def _get_run_from_run_history(flow_run_id, runs_operation):
  """Get run info from run history"""
  token = "Bearer " + AzureCliCredential().get_token("https://management.azure.com/.default").token
@@ -132,7 +136,10 @@ def test_evaluate_with_target(self, questions_file):
  result = evaluate(
  data=questions_file,
  target=target_fn,
- evaluators={"answer": answer_evaluator, "f1": f1_score_eval},
+ evaluators={
+ "answer": answer_evaluator,
+ "f1": f1_score_eval
+ },
  )
  row_result_df = pd.DataFrame(result["rows"])
  assert "outputs.answer" in row_result_df.columns
@@ -141,6 +148,46 @@ def test_evaluate_with_target(self, questions_file):
  assert "outputs.f1.f1_score" in row_result_df.columns
  assert not any(np.isnan(f1) for f1 in row_result_df["outputs.f1.f1_score"])
 
+ @pytest.mark.parametrize(
+ 'evaluation_config',
+ [
+ None,
+ {"default": {}},
+ {"default": {}, 'question_ev': {}},
+ {"default": {'question': '${target.question}'}},
+ {"default": {'question': '${data.question}'}},
+ {"default": {}, 'question_ev': {'question': '${data.question}'}},
+ {"default": {}, 'question_ev': {'question': '${target.question}'}},
+ {"default": {}, 'question_ev': {'another_question': '${target.question}'}},
+ {"default": {'another_question': '${target.question}'}},
+ ])
+ def test_evaluate_another_questions(self, questions_file, evaluation_config):
+ """Test evaluation with target function."""
+ from .target_fn import target_fn3
+ # run the evaluation with targets
+ result = evaluate(
+ target=target_fn3,
+ data=questions_file,
+ evaluators={
+ "question_ev": question_evaluator,
+ },
+ evaluator_config=evaluation_config
+ )
+ row_result_df = pd.DataFrame(result["rows"])
+ assert "outputs.answer" in row_result_df.columns
+ assert "inputs.question" in row_result_df.columns
+ assert "outputs.question" in row_result_df.columns
+ assert "outputs.question_ev.length" in row_result_df.columns
+ question = "outputs.question"
+
+ mapping = None
+ if evaluation_config:
+ mapping = evaluation_config.get('question_ev', evaluation_config.get("default", None))
+ if mapping and ('another_question' in mapping or mapping['question'] == '${data.question}'):
+ question = "inputs.question"
+ expected = list(row_result_df[question].str.len())
+ assert expected == list(row_result_df['outputs.question_ev.length'])
+
  @pytest.mark.parametrize(
  "evaluate_config",
  [

@@ -0,0 +1,3 @@
+{"question":"How long is flight from Earth to LV-426?","answer":"There is nothing good there.", "ground_truth": "Far away.", "outputs.question": "The question is as follows: How long is flight from Earth to LV-426?"}
+{"question":"Why there is no central heating on the street?","answer":"There is no central heating on the streets today, but it will be, I promise.", "ground_truth": "It is expensive.", "outputs.question": "The question is as follows: Why there is no central heating on the street?"}
+{"question":"Why these questions are so strange?","answer":"The life is strange...", "ground_truth": "The life is strange...", "outputs.question": "The question is as follows: Why these questions are so strange?"}
@@ -0,0 +1,3 @@
+{"question":"How long is flight from Earth to LV-426?","ground_truth":"Far away."}
+{"question":"Why there is no central heating on the street?","ground_truth":"It is expensive."}
+{"question":"Why these questions are so strange?","ground_truth":"The life is strange..."}