Module openpack_toolkit.bin.file

Expand source code
import argparse
from logging import INFO, basicConfig, getLogger
from pathlib import Path

from openpack_toolkit import DATASET_VERSION
from openpack_toolkit.validation.file import (
    FILE_EXISTS_KEY_NAME,
    DatasetRepo,
    DatasetStatus,
    check_files_exists,
    get_dataset_file_index_uri,
    make_dataset_file_index,
)

basicConfig(level=INFO)
logger = getLogger(__name__)

LATEST_VERSION_ON_ZENODO = DATASET_VERSION
DEFAULT_OUTPUT_DIR = Path(".")


def make_parser():
    def _add_common_params(parser: argparse.ArgumentParser):
        parser.add_argument(
            "-r",
            "--rootdir",
            required=True,
            type=Path,
            help="OpenPack dataset directory.",
        )
        parser.add_argument(
            "-v",
            "--version",
            default=LATEST_VERSION_ON_ZENODO,
            type=str,
            help=f"dataset version. (Default: {LATEST_VERSION_ON_ZENODO})",
        )
        parser.add_argument(
            "--data-repo",
            default=DatasetRepo.ZENODO.value,
            type=str,
            help=(
                f"dataset version. (Default: {DatasetRepo.ZENODO.value}, "
                f"Option: {DatasetRepo.ZENODO.value}, {DatasetRepo.GOOGLE_DRIVE.value}, "
                f"{DatasetRepo.GOOGLE_DRIVE_RGB.value})"
            ),
        )
        return parser

    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(required=True)

    # == Make File Index ==
    parser_make_index = subparsers.add_parser(
        "make-index", help="Make file index of the OpenPack dataset. See `make-index -h`"
    )
    parser_make_index.set_defaults(handler=entry_func_make_index)
    parser_make_index = _add_common_params(parser_make_index)
    parser_make_index.add_argument(
        "--output-dir",
        required=False,
        default=DEFAULT_OUTPUT_DIR,
        type=Path,
        help=f"output directory. (Default: {DEFAULT_OUTPUT_DIR})",
    )

    # == Check Files ==
    parser_check = subparsers.add_parser("check", help="Check downloaded files. See `check -h`")
    parser_check.set_defaults(handler=entry_func_check)
    parser_check = _add_common_params(parser_check)
    parser_check.add_argument(
        "--output-dir",
        required=False,
        default=DEFAULT_OUTPUT_DIR,
        type=Path,
        help=f"output directory. (Default: {DEFAULT_OUTPUT_DIR})",
    )
    return parser


def entry_func_make_index(args: argparse.Namespace):
    df = make_dataset_file_index(args.rootdir, args.version)
    logger.info(f"File Index:\n{df}")

    path = Path(args.output_dir, f"file_index_OpenPack_{args.version}_{args.data_repo}.csv")
    logger.info(f"Save file index to {path}")
    path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(path, index=False)


def entry_func_check(args: argparse.Namespace):
    file_index_uri = get_dataset_file_index_uri(args.version, args.data_repo)
    logger.info(f"Get file index from {file_index_uri}")

    status_code, df_file_index = check_files_exists(args.rootdir, args.version, file_index_uri)

    path = Path(
        args.output_dir, f"file_index_OpenPack_{args.version}_{args.data_repo}_check_result.csv"
    )
    logger.info(f"Save check results to {path}")
    path.parent.mkdir(parents=True, exist_ok=True)
    df_file_index.to_csv(path, index=False)

    if status_code == DatasetStatus.HAS_MISSING_FILE:
        df_missing_files = df_file_index[df_file_index[FILE_EXISTS_KEY_NAME] == False]
        logger.warning(
            f"{len(df_missing_files)} files ({len(df_missing_files)/len(df_file_index)*100:.1f}%)"
            f" are missing.\n{df_missing_files}"
        )
    else:
        logger.info("No missing files! It's ready to use!")


def entry_func():
    parser = make_parser()
    args = parser.parse_args()

    if hasattr(args, "handler"):
        args.handler(args)
    else:
        parser.print_help()


if __name__ == "__main__":
    entry_func()

Functions

def entry_func()
Expand source code
def entry_func():
    parser = make_parser()
    args = parser.parse_args()

    if hasattr(args, "handler"):
        args.handler(args)
    else:
        parser.print_help()
def entry_func_check(args: argparse.Namespace)
Expand source code
def entry_func_check(args: argparse.Namespace):
    file_index_uri = get_dataset_file_index_uri(args.version, args.data_repo)
    logger.info(f"Get file index from {file_index_uri}")

    status_code, df_file_index = check_files_exists(args.rootdir, args.version, file_index_uri)

    path = Path(
        args.output_dir, f"file_index_OpenPack_{args.version}_{args.data_repo}_check_result.csv"
    )
    logger.info(f"Save check results to {path}")
    path.parent.mkdir(parents=True, exist_ok=True)
    df_file_index.to_csv(path, index=False)

    if status_code == DatasetStatus.HAS_MISSING_FILE:
        df_missing_files = df_file_index[df_file_index[FILE_EXISTS_KEY_NAME] == False]
        logger.warning(
            f"{len(df_missing_files)} files ({len(df_missing_files)/len(df_file_index)*100:.1f}%)"
            f" are missing.\n{df_missing_files}"
        )
    else:
        logger.info("No missing files! It's ready to use!")
def entry_func_make_index(args: argparse.Namespace)
Expand source code
def entry_func_make_index(args: argparse.Namespace):
    df = make_dataset_file_index(args.rootdir, args.version)
    logger.info(f"File Index:\n{df}")

    path = Path(args.output_dir, f"file_index_OpenPack_{args.version}_{args.data_repo}.csv")
    logger.info(f"Save file index to {path}")
    path.parent.mkdir(parents=True, exist_ok=True)
    df.to_csv(path, index=False)
def make_parser()
Expand source code
def make_parser():
    def _add_common_params(parser: argparse.ArgumentParser):
        parser.add_argument(
            "-r",
            "--rootdir",
            required=True,
            type=Path,
            help="OpenPack dataset directory.",
        )
        parser.add_argument(
            "-v",
            "--version",
            default=LATEST_VERSION_ON_ZENODO,
            type=str,
            help=f"dataset version. (Default: {LATEST_VERSION_ON_ZENODO})",
        )
        parser.add_argument(
            "--data-repo",
            default=DatasetRepo.ZENODO.value,
            type=str,
            help=(
                f"dataset version. (Default: {DatasetRepo.ZENODO.value}, "
                f"Option: {DatasetRepo.ZENODO.value}, {DatasetRepo.GOOGLE_DRIVE.value}, "
                f"{DatasetRepo.GOOGLE_DRIVE_RGB.value})"
            ),
        )
        return parser

    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(required=True)

    # == Make File Index ==
    parser_make_index = subparsers.add_parser(
        "make-index", help="Make file index of the OpenPack dataset. See `make-index -h`"
    )
    parser_make_index.set_defaults(handler=entry_func_make_index)
    parser_make_index = _add_common_params(parser_make_index)
    parser_make_index.add_argument(
        "--output-dir",
        required=False,
        default=DEFAULT_OUTPUT_DIR,
        type=Path,
        help=f"output directory. (Default: {DEFAULT_OUTPUT_DIR})",
    )

    # == Check Files ==
    parser_check = subparsers.add_parser("check", help="Check downloaded files. See `check -h`")
    parser_check.set_defaults(handler=entry_func_check)
    parser_check = _add_common_params(parser_check)
    parser_check.add_argument(
        "--output-dir",
        required=False,
        default=DEFAULT_OUTPUT_DIR,
        type=Path,
        help=f"output directory. (Default: {DEFAULT_OUTPUT_DIR})",
    )
    return parser