diff --git a/src/tagstudio/core/library/alchemy/library.py b/src/tagstudio/core/library/alchemy/library.py index bc728a999..ac3d81832 100644 --- a/src/tagstudio/core/library/alchemy/library.py +++ b/src/tagstudio/core/library/alchemy/library.py @@ -357,7 +357,8 @@ def migrate_json_to_sqlite(self, json_lib: JsonLibrary): # extension include/exclude list (unwrap(self.library_dir) / TS_FOLDER_NAME / IGNORE_NAME).write_text( - migrate_ext_list([x.strip(".") for x in json_lib.ext_list], json_lib.is_exclude_list) + migrate_ext_list([x.strip(".") for x in json_lib.ext_list], json_lib.is_exclude_list), + encoding="utf-8", ) end_time = time.time() @@ -775,7 +776,7 @@ def __migrate_sql_to_ts_ignore(self, library_dir: Path): session.scalar(text("SELECT value FROM preferences WHERE key = 'IS_EXCLUDE_LIST'")) ) - with open(ts_ignore, "w") as f: + with open(ts_ignore, "w", encoding="utf-8") as f: f.write(migrate_ext_list(extensions, is_exclude_list)) def __apply_db200_migrations(self, session: Session): diff --git a/src/tagstudio/core/library/refresh.py b/src/tagstudio/core/library/refresh.py index 824cae527..92c53066c 100644 --- a/src/tagstudio/core/library/refresh.py +++ b/src/tagstudio/core/library/refresh.py @@ -88,7 +88,7 @@ def __get_dir_list(self, library_dir: Path, ignore_patterns: list[str]) -> list[ compiled_ignore_path = library_dir / ".TagStudio" / ".compiled_ignore" # Write compiled ignore patterns (built-in + user) to a temp file to pass to ripgrep - with open(compiled_ignore_path, "w") as pattern_file: + with open(compiled_ignore_path, "w", encoding="utf-8") as pattern_file: pattern_file.write("\n".join(ignore_patterns)) result = silent_run( diff --git a/src/tagstudio/qt/global_settings.py b/src/tagstudio/qt/global_settings.py index 94be1f076..520064ac8 100644 --- a/src/tagstudio/qt/global_settings.py +++ b/src/tagstudio/qt/global_settings.py @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-3.0-only +import locale import platform from datetime import datetime from enum import Enum, IntEnum, StrEnum @@ -13,6 +14,7 @@ from pydantic import BaseModel, Field from tagstudio.core.enums import ShowFilepathOption, TagClickActionOption +from tagstudio.core.utils.encoding import detect_char_encoding logger = structlog.get_logger(__name__) @@ -84,13 +86,16 @@ class GlobalSettings(BaseModel): @staticmethod def read_settings(path: Path = DEFAULT_GLOBAL_SETTINGS_PATH) -> "GlobalSettings": if path.exists(): - with open(path) as file: - filecontents = file.read() - if len(filecontents.strip()) != 0: - logger.info("[Settings] Reading Global Settings File", path=path) - settings_data = toml.loads(filecontents) - settings = GlobalSettings(**settings_data, loaded_from=path) - return settings + try: + filecontents = path.read_text(encoding="utf-8") + except UnicodeDecodeError: + encoding = detect_char_encoding(path) or locale.getencoding() + filecontents = path.read_text(encoding=encoding) + if len(filecontents.strip()) != 0: + logger.info("[Settings] Reading Global Settings File", path=path) + settings_data = toml.loads(filecontents) + settings = GlobalSettings(**settings_data, loaded_from=path) + return settings return GlobalSettings(loaded_from=path) @@ -100,7 +105,7 @@ def save(self, path: Path | None = None) -> None: if not path.parent.exists(): path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "w") as f: + with open(path, "w", encoding="utf-8") as f: toml.dump(self.model_dump(), f, encoder=TomlEnumEncoder()) @property diff --git a/src/tagstudio/qt/resource_manager.py b/src/tagstudio/qt/resource_manager.py index f1d387ab6..8444138d7 100644 --- a/src/tagstudio/qt/resource_manager.py +++ b/src/tagstudio/qt/resource_manager.py @@ -86,7 +86,7 @@ def get(self, id: str) -> TData | None: try: match mode: case "r": - data = file_path.read_text() + data = file_path.read_text(encoding="utf-8") case "rb": data = file_path.read_bytes() diff --git a/tests/qt/test_global_settings.py b/tests/qt/test_global_settings.py index 63eb51953..9eab01857 100644 --- a/tests/qt/test_global_settings.py +++ b/tests/qt/test_global_settings.py @@ -34,3 +34,18 @@ def test_read_settings(library_dir: Path): assert settings.date_format == "%x" assert settings.hour_format assert settings.zero_padding + + +def test_read_settings_legacy_locale_encoding(library_dir: Path, monkeypatch): + settings_path = library_dir / "settings.toml" + settings_path.write_bytes( + b'language = "ja"\ndate_format = "' + "写真".encode("cp932") + b'"\n' + ) + monkeypatch.setattr("tagstudio.qt.global_settings.detect_char_encoding", lambda _: None) + monkeypatch.setattr("tagstudio.qt.global_settings.locale.getencoding", lambda: "cp932") + + settings = GlobalSettings.read_settings(settings_path) + settings.save() + + assert settings.date_format == "写真" + assert settings_path.read_bytes().decode("utf-8")