Skip to content

interface


get_processor_from_source(identifier, source_cfg)

Args: identifier: source_cfg:

Returns: object:

Source code in preprocess_toolbox/interface.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def get_processor_from_source(identifier: str, source_cfg: dict) -> object:
    """

    Args:
        identifier:
        source_cfg:

    Returns:
        object:

    """
    if "dataset_config" not in source_cfg:
        raise RuntimeError("Source configuration should link to a dataset!")
    if "implementation" not in source_cfg:
        raise RuntimeError("Must specify the implementation to use!")

    create_kwargs = {k: v for k, v in source_cfg.items() if k not in ["dataset_config", "implementation"]}
    logging.info("Attempting to instantiate {} with loaded configuration".format(source_cfg["implementation"]))
    logging.debug("Converted kwargs from the retrieved configuration: {}".format(create_kwargs))

    return get_implementation(source_cfg["implementation"])(
        get_dataset_config_implementation(source_cfg["dataset_config"]),
        identifier=identifier,
        init_source=False,
        **create_kwargs)

get_processor_implementation(config)

Args: config:

Returns: object:

Source code in preprocess_toolbox/interface.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def get_processor_implementation(config: os.PathLike) -> object:
    """

    Args:
        config:

    Returns:
        object:

    """
    if not str(config).endswith(".json"):
        raise RuntimeError("{} does not look like a JSON configuration".format(config))
    if not os.path.exists(config):
        raise RuntimeError("{} is not a configuration in existence".format(config))

    logging.debug("Retrieving implementations details from {}".format(config))

    with open(config) as fh:
        data = fh.read()

    cfg = orjson.loads(data)
    cfg, implementation = cfg["data"], get_implementation(cfg["implementation"])

    remaining = {k.strip("_"): v for k, v in cfg.items()}

    create_kwargs = dict(**remaining)
    logging.info("Attempting to instantiate {} with loaded configuration".format(implementation))
    logging.debug("Converted kwargs from the retrieved configuration: {}".format(create_kwargs))

    return implementation(**create_kwargs)