Module openpack_toolkit.codalab.operation_segmentation
Evaluation codes for Operation Semantic Segmentation Task
This task is aimed to recognize 9 operations in the manual packing activity. F1-measure with macro average is used as metrics.
Note
OpenPack Challenge uses eval_operation_segmentation()
in eval.py
for evaluation.
Todo
- Add task desciption.
- Add usage (data format)
- Add detail desciption of evaluation format.
Expand source code
"""Evaluation codes for Operation Semantic Segmentation Task
This task is aimed to recognize 9 operations in the manual packing activity.
F1-measure with macro average is used as metrics.
Note:
OpenPack Challenge uses `eval_operation_segmentation()` in `eval.py` for evaluation.
Todo:
* Add task desciption.
* Add usage (data format)
* Add detail desciption of evaluation format.
"""
from .eval import eval_operation_segmentation
from .utils import (
construct_submission_dict,
eval_operation_segmentation_wrapper,
make_submission_zipfile,
)
__all__ = [
"construct_submission_dict",
"eval_operation_segmentation",
"eval_operation_segmentation_wrapper",
"make_submission_zipfile",
]
Sub-modules
openpack_toolkit.codalab.operation_segmentation.eval
openpack_toolkit.codalab.operation_segmentation.utils
-
Todo
- Make Unit-Test.
- Refactoring is needed!
Functions
def construct_submission_dict(outputs: Dict[str, Dict[str, numpy.ndarray]], act_set: ActSet, include_ground_truth: Optional[bool] = False, cfg: Optional[omegaconf.dictconfig.DictConfig] = None) ‑> Dict
-
Make dict that can be used for submission and
eval_workprocess_segmentation()
func.Args
outputs
:Dict[str, Dict[str, np.ndarray]]
- key is expected to be a pair of user and session. e.g., "U0102-S0100".
act_set
:ActSet
- -
include_ground_truth
:bool
, optional- If True, ground truth labels are included in the submission dict. Set True when you calculate scores.
cfg
:DictConfig
, optional- config dict.
Returns
Dict
- submission dict
Expand source code
def construct_submission_dict( outputs: Dict[str, Dict[str, np.ndarray]], act_set: ActSet, include_ground_truth: Optional[bool] = False, cfg: Optional[DictConfig] = None, ) -> Dict: """Make dict that can be used for submission and `eval_workprocess_segmentation()` func. Args: outputs (Dict[str, Dict[str, np.ndarray]]): key is expected to be a pair of user and session. e.g., "U0102-S0100". act_set (ActSet): - include_ground_truth (bool, optional): If True, ground truth labels are included in the submission dict. Set True when you calculate scores. cfg (DictConfig, optional): config dict. Returns: Dict: submission dict """ submission = dict() keys = sorted(outputs.keys()) for key in keys: d = outputs[key] record = dict() user, session = key.split("-") assert d["y"].ndim == 3 assert d["unixtime"].dtype == np.int64, ( "unixtime must be np.int64, but got {}".format(d["unixtime"].dtype) ) prediction_sess = act_set.convert_index_to_id( np.argmax(d["y"], axis=1).ravel()) unixtime_pred_sess, prediction_sess = resample_prediction_1Hz( ts_unix=d["unixtime"].copy().ravel(), arr=prediction_sess) if include_ground_truth: with open_dict(cfg): cfg.user = {"name": user} cfg.session = session # TODO: Move to new function ( load_ground_truth() ) if hasattr(cfg.dataset.annotation, "spec"): path = Path( cfg.dataset.annotation.spec.path.dir, cfg.dataset.annotation.spec.path.fname ) else: path = Path( cfg.dataset.annotation.path.dir, cfg.dataset.annotation.path.fname ) df_label = pd.read_csv(path) label_format = cfg.dataset.annotation.metadata.labels.get( "label_format", "") if label_format == "soft-target": cols = [c for c in df_label.columns if c.startswith("ID")] index_to_id = {i: int(c.replace("ID", "")) for i, c in enumerate(cols)} df_label["index"] = np.argmax(df_label[cols].values, axis=1) df_label["id"] = df_label["index"].apply( lambda ind: index_to_id[ind]) unixtime_gt_sess = df_label["unixtime"].values ground_truth_sess = df_label["id"].values # check timestamp unixtime_pred_sess, prediction_sess = crop_prediction_sequence( unixtime_gt_sess, unixtime_pred_sess, prediction_sess) np.testing.assert_array_equal(unixtime_pred_sess, unixtime_gt_sess) record["ground_truth"] = ground_truth_sess.copy() record["unixtime"] = unixtime_pred_sess.copy() record["prediction"] = prediction_sess.copy() submission[key] = record return submission
def eval_operation_segmentation(t_id: numpy.ndarray = None, y_id: numpy.ndarray = None, classes: Tuple[Tuple[int, str], ...] = None, ignore_class_id: int = None, mode: str = 'final') ‑> pandas.core.frame.DataFrame
-
Compute metrics (i.e., precision, recall, f1, support) for the given sequence.
Args
t_id
:np.ndarray
- unixtime and corresponding activity ID, shape=(T,)
y_id
:np.ndarray
- unixtime and predicted activity ID, shape=(T,)
classes
:Tuple
- class definition. pairs of class id and name.
mode
:str
- If final, only the macro score will be calculated. Otherwise, macro avg., weighted avg., and score for each class will be calculated.
Returns
pd.DataFrame
Expand source code
def eval_operation_segmentation( t_id: np.ndarray = None, y_id: np.ndarray = None, classes: Tuple[Tuple[int, str], ...] = None, ignore_class_id: int = None, mode: str = "final", ) -> pd.DataFrame: """Compute metrics (i.e., precision, recall, f1, support) for the given sequence. Args: t_id (np.ndarray): unixtime and corresponding activity ID, shape=(T,) y_id (np.ndarray): unixtime and predicted activity ID, shape=(T,) classes (Tuple): class definition. pairs of class id and name. mode (str): If final, only the macro score will be calculated. Otherwise, macro avg., weighted avg., and score for each class will be calculated. Returns: pd.DataFrame """ assert t_id.ndim == 1 assert y_id.ndim == 1 verify_class_ids(y_id, classes) if ignore_class_id is not None: t_id, y_id = drop_ignore_class(t_id, y_id, ignore_class_id) classes = tuple([t for t in classes if t[0] != ignore_class_id]) df_scores = [ calc_avg_metrics(t_id, y_id, classes, average="macro"), calc_avg_metrics(t_id, y_id, classes, average="weighted"), ] if mode != "final": df_scores.append( calc_class_metrics(t_id, y_id, classes) ) df_scores = pd.concat( df_scores, axis=0, ignore_index=True).set_index("name") return df_scores
def eval_operation_segmentation_wrapper(cfg: omegaconf.dictconfig.DictConfig, outputs: Dict[str, Dict[str, numpy.ndarray]], act_set: ActSet = ActSet(classes=(Label(id=100, name='Picking', version='v3.0.0', is_ignore=False, category=None, event=None), Label(id=200, name='Relocate Item Label', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=300, name='Assemble Box', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=400, name='Insert Items', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=500, name='Close Box', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=600, name='Attach Box Label', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=700, name='Scan Label', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=800, name='Attach Shipping Label', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=900, name='Put on Back Table', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=1000, name='Fill out Order', version='v3.2.2', is_ignore=False, category=None, event=None), Label(id=8100, name='Null', version='v3.2.2', is_ignore=True, category=None, event=None))), exclude_ignore_class=True) ‑> pandas.core.frame.DataFrame
-
Compute evaluation metrics from model outputs (predicted probability).
Args
cfg
:DictConfig
- config dict.
outputs
:Dict[str, Dict[str, np.ndarray]]
- dict object that contains t_idx and y. t_idx is a 2d array of target class index with shape=(BATCH_SIZE, WINDOW). y is a 3d array of predction probabilities with shape=(BATCH_SIZE, NUM_CLASSES, WINDOW).
act_set
:ActSete
, optional- class definition.
exclude_ignore_class
:bool
- If true, ignore classes are excluded. (default: True)
Returns
pd.DataFrame
Expand source code
def eval_operation_segmentation_wrapper( cfg: DictConfig, outputs: Dict[str, Dict[str, np.ndarray]], act_set: ActSet = ActSet(OPENPACK_OPERATIONS), exclude_ignore_class=True, ) -> pd.DataFrame: """ Compute evaluation metrics from model outputs (predicted probability). Args: cfg (DictConfig): config dict. outputs (Dict[str, Dict[str, np.ndarray]]): dict object that contains t_idx and y. t_idx is a 2d array of target class index with shape=(BATCH_SIZE, WINDOW). y is a 3d array of predction probabilities with shape=(BATCH_SIZE, NUM_CLASSES, WINDOW). act_set (ActSete, optional): class definition. exclude_ignore_class (bool): If true, ignore classes are excluded. (default: True) Returns: pd.DataFrame """ submission = construct_submission_dict( outputs, act_set, include_ground_truth=True, cfg=cfg) classes = act_set.to_tuple() ignore_class_id = act_set.get_ignore_class_id() if isinstance(ignore_class_id, tuple): raise NotImplementedError() # Evaluate df_scores = [] t_id_concat, y_id_concat = [], [] for key, d in submission.items(): t_id = d["ground_truth"] y_id = d["prediction"] t_id_concat.append(t_id.copy()) y_id_concat.append(y_id.copy()) df_tmp = eval_operation_segmentation( t_id, y_id, classes=classes, ignore_class_id=ignore_class_id, mode=None) df_tmp["key"] = key df_scores.append(df_tmp.reset_index(drop=False)) # Overall Score df_tmp = eval_operation_segmentation( np.concatenate(t_id_concat, axis=0), np.concatenate(y_id_concat, axis=0), classes=classes, ignore_class_id=ignore_class_id, mode=None, ) df_tmp["key"] = "all" df_scores.append(df_tmp.reset_index(drop=False)) df_scores = pd.concat(df_scores, axis=0, ignore_index=True) return df_scores
def make_submission_zipfile(submission: Dict, logdir: pathlib.Path, metadata: dict = None) ‑> None
-
Check dict contents and generate zip file for codalab submission.
Args
submission
:Dict
- submission dict
logdir
:Path
- path to the output directory
metadata
:dict
- dict of additional information that is included in
submission.json
. We recommend to include a data split name.
Returns
None (make JSON & zip files)
Expand source code
def make_submission_zipfile( submission: Dict, logdir: Path, metadata: dict = None) -> None: """Check dict contents and generate zip file for codalab submission. Args: submission (Dict): submission dict logdir (Path): path to the output directory metadata (dict): dict of additional information that is included in ``submission.json``. We recommend to include a data split name. Returns: None (make JSON & zip files) """ # Check data format and convert into pure objects submission_clean = dict() for key, d in submission.items(): assert isinstance(d, dict) record = dict() for arr_name, arr in d.items(): if arr_name not in ("prediction", "unixtime"): logger.warning( f"unexpected entry[{arr_name}] is found in submission dict.") assert isinstance(arr, np.ndarray) record[arr_name] = arr.tolist() submission_clean[key] = record # Add meta data if metadata is not None: submission_clean["meta"] = metadata # Write JSON file path_json = Path(logdir, "submission.json") if path_json.exists(): os.remove(path_json) with open(path_json, "w") as f: json.dump(submission_clean, f) logger.info(f"write submission.json to {path_json}") # Make zip file path_zip = Path(logdir, "submission.zip") if path_zip.exists(): os.remove(path_zip) with zipfile.ZipFile(path_zip, "w") as zf: zf.write(path_json, arcname="./submission.json") logger.info(f"write submission.zip to {path_zip}")