#!/usr/bin/python
# -*- coding: utf-8 -*-
"""This module provides utility classes and functions whose usage is not limited to a specific context."""
import json
import os
import sys
from loguru import logger
from bibliometa.config import LOGGING_FILENAME, LOGGING_FORMAT
[docs]class MainUtils:
"""The :class:`~bibliometa.utils.utils.MainUtils` provides generic utilities.
"""
[docs] @staticmethod
def get_file_info(path, suffix=""):
"""Get filename, suffix and file extension from a path.
:param path: Path to a file
:type path: `str`
:param suffix: Suffix that should be added to a filename (optional)
:type suffix: `str`
:return: Filename, suffix and file extension
:rtype: tuple of `str`
"""
filename, ext = os.path.splitext(path)
dirname = os.path.dirname(path)
if not os.path.exists(dirname):
os.makedirs(dirname)
s = MainUtils._get_suffix(suffix)
return filename, s, ext
@staticmethod
def _get_suffix(s):
"""Create a filename suffix by adding an underscore before parameter `s`.
:param s: Suffix that will be preceded by an underscore
:type s: `str`
:return: "_" + suffix, if suffix length > 0
:rtype: `str`
"""
suffix = ""
if len(str(s)) > 0:
suffix = "_" + str(s)
return suffix
[docs] @staticmethod
def get_factor(size):
"""Calculate factor to keep max value of progress bar below 100.
:param size: Number of total values
:type size: `int`
:return: Factor by which number of values needs to be divided
:rtype: `int`
"""
factor = 1
while size > 100:
size = int(size / 10)
factor *= 10
return factor
[docs]class DictUtils:
"""The :class:`~bibliometa.utils.utils.DictUtils` provides generic utilities."""
# Default values
_ENCODING = "utf-8"
[docs] @staticmethod
def remove_keys(i, o, k, encoding=_ENCODING):
"""Remove all keys from a given dict i (in a JSON file) if not in k and save the remaining dict as JSON in o.
:param i: Path to input JSON file
:type i: `str`
:param o: Path to output JSON file
:type o: `str`
:param k: List of keys to be removed
:type k: `list`
:param encoding: File encoding
:type encoding: `str`
"""
# Set up logging
logger.remove()
logger.add(LOGGING_FILENAME, format=LOGGING_FORMAT, level="DEBUG")
logger.add(sys.stderr, level="INFO")
with open(i, "r", encoding=encoding) as f:
in_data = json.load(f)
out_data = dict()
for key in in_data.keys():
if key in list(k):
out_data[key] = in_data[key]
with open(o, 'w', encoding=encoding) as f:
json.dump(out_data, f, indent=4)
logger.info(f"Keys from {i} were removed (except keys in {k}). New dictionary was written to {o}.")
[docs] @staticmethod
def remove_empty_entries(d):
"""Remove keys from dict d that have no values.
:param d: A dictionary
:type d: `dict`
:return: Input dictionary without empty entries.
:rtype: `dict`
"""
if isinstance(d, dict):
return {
k: v
for k, v in ((k, DictUtils.remove_empty_entries(v)) for k, v in d.items())
if v
}
if isinstance(d, list):
return [v for v in map(DictUtils.remove_empty_entries, d) if v]
return d
[docs] @staticmethod
def merge(a, b):
"""Merge two dictionaries.
:param a: A dictionary
:type a: `dict`
:param b: Another dictionary
:type b: `dict`
:raise: KeyError if a key is found in both dictionaries
:return: Dictionary a merged with b
:rtype: `dict`
"""
intersection = set(a.keys()).intersection(set(b.keys()))
if len(intersection) > 0:
raise KeyError(f"Duplicate key found: {intersection}.")
else:
a.update(b)
return a
[docs] @staticmethod
def get_top_keys(d, k):
"""Get keys with highest values from a dictionary.
:param d: A dictionary
:type d: `dict`
:param k: Top k elements that will be returned
:type k: `int`
:return: List of tuples (value, key) for top k keys
:rtype: `list`
"""
items = sorted(d.items(), reverse=True, key=lambda x: x[1])
return map(lambda x: x[0], items[:k])
[docs] @staticmethod
def sort_by_key(d):
"""Sort a dictionary alphabetically by its keys.
:param d: A dictionary
:type d: `dict`
:return: The sorted dictionary
:rtype: `dict`
"""
return dict(sorted(d.items(), key=lambda x: str(x[0])))
[docs] @staticmethod
def save_to_json(d, f, encoding=_ENCODING):
"""Save a dictionary to a JSON file.
:param d: A dictionary
:type d: `dict`
:param f: Path to file
:type f: `str`
:param encoding: File encoding
:type encoding: `str`
:raise FileNotFoundError: If f does not point to a file
"""
try:
with open(f, 'w', encoding=encoding) as file:
json.dump(d, file, indent=4)
except FileNotFoundError as e:
raise e
[docs] @staticmethod
def read_from_json(f, encoding=_ENCODING):
"""Read a dictionary from a JSON file.
:param f: Path to file
:type f: `str`
:param encoding: File encoding
:type encoding: `str`
:return: Dictionary loaded from JSON file
:rtype: `dict`
:raise FileNotFoundError: If f does not point to a file
"""
try:
with open(f, "r", encoding=encoding) as file:
return json.load(file)
except FileNotFoundError as e:
raise e