From 91a16c9616d85c845eced7d987d49530039572b8 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Fri, 10 Apr 2026 17:31:50 +0100 Subject: [PATCH 1/9] Separate the bulk of the '_analyse' logic from the while loop it is in; '_analyse' is now run in the while loop from '_analyse_in_thread'; grouped SPA/tomo-specific Analyser attributes in a separate block --- src/murfey/client/analyser.py | 338 +++++++++++++++++----------------- 1 file changed, 168 insertions(+), 170 deletions(-) diff --git a/src/murfey/client/analyser.py b/src/murfey/client/analyser.py index 88ee0721c..dd15ec53c 100644 --- a/src/murfey/client/analyser.py +++ b/src/murfey/client/analyser.py @@ -56,23 +56,14 @@ def __init__( ): super().__init__() self._basepath = basepath_local.absolute() + self._token = token + self._environment = environment self._limited = limited self._experiment_type = "" self._acquisition_software = "" - self._extension: str = "" - self._unseen_xml: list = [] self._context: Context | None = None - self._batch_store: dict = {} - self._environment = environment - self._force_mdoc_metadata = force_mdoc_metadata - self._token = token - self._serialem = serialem - self.parameters_model: ( - Type[ProcessingParametersSPA] | Type[ProcessingParametersTomo] | None - ) = None - self.queue: queue.Queue = queue.Queue() - self.thread = threading.Thread(name="Analyser", target=self._analyse) + self.thread = threading.Thread(name="Analyser", target=self._analyse_in_thread) self._stopping = False self._halt_thread = False self._murfey_config = ( @@ -85,6 +76,17 @@ def __init__( else {} ) + # SPA & Tomo-specific attributes + self._extension: str = "" + self._unseen_xml: list = [] + self._batch_store: dict = {} + self._force_mdoc_metadata = force_mdoc_metadata + self._mdoc_for_reading: Path | None = None + self._serialem = serialem + self.parameters_model: ( + Type[ProcessingParametersSPA] | Type[ProcessingParametersTomo] | None + ) = None + def __repr__(self) -> str: return f"" @@ -334,9 +336,8 @@ def post_transfer(self, transferred_file: Path): f"An exception was encountered post transfer: {e}", exc_info=True ) - def _analyse(self): + def _analyse_in_thread(self): logger.info("Analyser thread started") - mdoc_for_reading = None while not self._halt_thread: transferred_file = self.queue.get() transferred_file = ( @@ -347,185 +348,182 @@ def _analyse(self): if not transferred_file: self._halt_thread = True continue - if self._limited: - if ( - "Metadata" in transferred_file.parts - or transferred_file.name == "EpuSession.dm" - and not self._context - ): - if not (context := _get_context("SPAMetadataContext")): - continue - self._context = context.load()( - "epu", - self._basepath, - self._murfey_config, - self._token, - ) - elif ( - "Batch" in transferred_file.parts - or "SearchMaps" in transferred_file.parts - or transferred_file.name == "Session.dm" - and not self._context - ): - if not (context := _get_context("TomographyMetadataContext")): - continue - self._context = context.load()( - "tomo", - self._basepath, - self._murfey_config, - self._token, - ) - self.post_transfer(transferred_file) - else: - dc_metadata = {} - if not self._serialem and ( - self._force_mdoc_metadata - and transferred_file.suffix == ".mdoc" - or mdoc_for_reading - ): - if self._context: - try: - dc_metadata = self._context.gather_metadata( - mdoc_for_reading or transferred_file, - environment=self._environment, - ) - except KeyError as e: - logger.error( - f"Metadata gathering failed with a key error for key: {e.args[0]}" - ) - raise e - if not dc_metadata: - mdoc_for_reading = None - elif transferred_file.suffix == ".mdoc": - mdoc_for_reading = transferred_file - if not self._context: - if not self._find_extension(transferred_file): - logger.debug(f"No extension found for {transferred_file}") - continue - if not self._find_context(transferred_file): - logger.debug( - f"Couldn't find context for {str(transferred_file)!r}" + self._analyse(transferred_file) + self.queue.task_done() + logger.debug("Analyer thread has stopped analysing incoming files") + self.notify(final=True) + + def _analyse(self, transferred_file: Path): + if self._limited: + if ( + "Metadata" in transferred_file.parts + or transferred_file.name == "EpuSession.dm" + and not self._context + ): + if not (context := _get_context("SPAMetadataContext")): + return + self._context = context.load()( + "epu", + self._basepath, + self._murfey_config, + self._token, + ) + elif ( + "Batch" in transferred_file.parts + or "SearchMaps" in transferred_file.parts + or transferred_file.name == "Session.dm" + and not self._context + ): + if not (context := _get_context("TomographyMetadataContext")): + return + self._context = context.load()( + "tomo", + self._basepath, + self._murfey_config, + self._token, + ) + self.post_transfer(transferred_file) + else: + dc_metadata = {} + if not self._serialem and ( + self._force_mdoc_metadata + and transferred_file.suffix == ".mdoc" + or self._mdoc_for_reading + ): + if self._context: + try: + dc_metadata = self._context.gather_metadata( + self._mdoc_for_reading or transferred_file, + environment=self._environment, ) - self.queue.task_done() - continue - elif self._extension: - logger.info( - f"Context found successfully for {transferred_file}" + except KeyError as e: + logger.error( + f"Metadata gathering failed with a key error for key: {e.args[0]}" ) - try: + raise e + if not dc_metadata: + self._mdoc_for_reading = None + elif transferred_file.suffix == ".mdoc": + self._mdoc_for_reading = transferred_file + if not self._context: + if not self._find_extension(transferred_file): + logger.debug(f"No extension found for {transferred_file}") + return + if not self._find_context(transferred_file): + logger.debug(f"Couldn't find context for {str(transferred_file)!r}") + return + elif self._extension: + logger.info(f"Context found successfully for {transferred_file}") + try: + if self._context is not None: self._context.post_first_transfer( transferred_file, environment=self._environment, ) - except Exception as e: - logger.error(f"Exception encountered: {e}") - if "AtlasContext" not in str(self._context): - if not dc_metadata: - try: + except Exception as e: + logger.error(f"Exception encountered: {e}") + if "AtlasContext" not in str(self._context): + if not dc_metadata: + try: + if self._context is not None: dc_metadata = self._context.gather_metadata( self._xml_file(transferred_file), environment=self._environment, ) - except NotImplementedError: - dc_metadata = {} - except KeyError as e: - logger.error( - f"Metadata gathering failed with a key error for key: {e.args[0]}" - ) - raise e - except ValueError as e: - logger.error( - f"Metadata gathering failed with a value error: {e}" - ) - if not dc_metadata or not self._force_mdoc_metadata: - self._unseen_xml.append(transferred_file) - else: - self._unseen_xml = [] - if dc_metadata.get("file_extension"): - self._extension = dc_metadata["file_extension"] - else: - dc_metadata["file_extension"] = self._extension - dc_metadata["acquisition_software"] = ( - self._context._acquisition_software - ) - self.notify(dc_metadata) - - # Contexts that can be immediately posted without additional work - elif "CLEMContext" in str(self._context): - logger.debug( - f"File {transferred_file.name!r} is part of CLEM workflow" - ) - self.post_transfer(transferred_file) - elif "FIBContext" in str(self._context): - logger.debug( - f"File {transferred_file.name!r} is part of the FIB workflow" - ) - self.post_transfer(transferred_file) - elif "SXTContext" in str(self._context): - logger.debug(f"File {transferred_file.name!r} is an SXT file") - self.post_transfer(transferred_file) - elif "AtlasContext" in str(self._context): - logger.debug(f"File {transferred_file.name!r} is part of the atlas") - self.post_transfer(transferred_file) - - # Handle files with tomography and SPA context differently - elif not self._extension or self._unseen_xml: - if not self._find_extension(transferred_file): - logger.error(f"No extension found for {transferred_file}") - continue - if self._extension: - logger.info( - f"Extension found successfully for {transferred_file}" - ) - try: - self._context.post_first_transfer( - transferred_file, - environment=self._environment, - ) - except Exception as e: - logger.error(f"Exception encountered: {e}") - if not dc_metadata: - try: - dc_metadata = self._context.gather_metadata( - mdoc_for_reading - or self._xml_file(transferred_file), - environment=self._environment, - ) + except NotImplementedError: + dc_metadata = {} except KeyError as e: logger.error( f"Metadata gathering failed with a key error for key: {e.args[0]}" ) raise e + except ValueError as e: + logger.error( + f"Metadata gathering failed with a value error: {e}" + ) if not dc_metadata or not self._force_mdoc_metadata: - mdoc_for_reading = None self._unseen_xml.append(transferred_file) - if dc_metadata: + else: self._unseen_xml = [] if dc_metadata.get("file_extension"): self._extension = dc_metadata["file_extension"] else: dc_metadata["file_extension"] = self._extension - dc_metadata["acquisition_software"] = ( - self._context._acquisition_software + if self._context is not None: + dc_metadata["acquisition_software"] = ( + self._context._acquisition_software + ) + self.notify(dc_metadata) + + # Contexts that can be immediately posted without additional work + elif "CLEMContext" in str(self._context): + logger.debug(f"File {transferred_file.name!r} is part of CLEM workflow") + self.post_transfer(transferred_file) + elif "FIBContext" in str(self._context): + logger.debug( + f"File {transferred_file.name!r} is part of the FIB workflow" + ) + self.post_transfer(transferred_file) + elif "SXTContext" in str(self._context): + logger.debug(f"File {transferred_file.name!r} is an SXT file") + self.post_transfer(transferred_file) + elif "AtlasContext" in str(self._context): + logger.debug(f"File {transferred_file.name!r} is part of the atlas") + self.post_transfer(transferred_file) + + # Handle files with tomography and SPA context differently + elif not self._extension or self._unseen_xml: + if not self._find_extension(transferred_file): + logger.error(f"No extension found for {transferred_file}") + return + if self._extension: + logger.info(f"Extension found successfully for {transferred_file}") + try: + self._context.post_first_transfer( + transferred_file, + environment=self._environment, + ) + except Exception as e: + logger.error(f"Exception encountered: {e}") + if not dc_metadata: + try: + dc_metadata = self._context.gather_metadata( + self._mdoc_for_reading + or self._xml_file(transferred_file), + environment=self._environment, ) - self.notify(dc_metadata) - elif any( - context in str(self._context) - for context in ( - "SPAContext", - "SPAMetadataContext", - "TomographyContext", - "TomographyMetadataContext", - ) - ): - context = str(self._context).split(" ")[0].split(".")[-1] - logger.debug( - f"Transferring file {str(transferred_file)} with context {context!r}" - ) - self.post_transfer(transferred_file) - self.queue.task_done() - logger.debug("Analyer thread has stopped analysing incoming files") - self.notify(final=True) + except KeyError as e: + logger.error( + f"Metadata gathering failed with a key error for key: {e.args[0]}" + ) + raise e + if not dc_metadata or not self._force_mdoc_metadata: + self._mdoc_for_reading = None + self._unseen_xml.append(transferred_file) + if dc_metadata: + self._unseen_xml = [] + if dc_metadata.get("file_extension"): + self._extension = dc_metadata["file_extension"] + else: + dc_metadata["file_extension"] = self._extension + dc_metadata["acquisition_software"] = ( + self._context._acquisition_software + ) + self.notify(dc_metadata) + elif any( + context in str(self._context) + for context in ( + "SPAContext", + "SPAMetadataContext", + "TomographyContext", + "TomographyMetadataContext", + ) + ): + context = str(self._context).split(" ")[0].split(".")[-1] + logger.debug( + f"Transferring file {str(transferred_file)} with context {context!r}" + ) + self.post_transfer(transferred_file) def _xml_file(self, data_file: Path) -> Path: if not self._environment: From ef1923ab5d2321ef8f956818a8e46d2fc9829559 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Fri, 10 Apr 2026 17:42:18 +0100 Subject: [PATCH 2/9] Explicitly determine which contexts to permit to enter the DC metadata-parsing logic block --- src/murfey/client/analyser.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/murfey/client/analyser.py b/src/murfey/client/analyser.py index dd15ec53c..4049f2a09 100644 --- a/src/murfey/client/analyser.py +++ b/src/murfey/client/analyser.py @@ -422,7 +422,15 @@ def _analyse(self, transferred_file: Path): ) except Exception as e: logger.error(f"Exception encountered: {e}") - if "AtlasContext" not in str(self._context): + if any( + context in str(self._context) + for context in ( + "SPAContext", + "SPAMetadataContext", + "TomographyContext", + "TomographyMetadataContext", + ) + ): if not dc_metadata: try: if self._context is not None: From e348b9b1d09e3517e2c1ae6425a223a7439e69c1 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 13 Apr 2026 13:24:43 +0100 Subject: [PATCH 3/9] Updated type hinting and rearranged functions in 'murfey.client.context' --- src/murfey/client/context.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/murfey/client/context.py b/src/murfey/client/context.py index c64f713c8..73172e62c 100644 --- a/src/murfey/client/context.py +++ b/src/murfey/client/context.py @@ -3,7 +3,7 @@ import logging from importlib.metadata import entry_points from pathlib import Path -from typing import Any, List, NamedTuple +from typing import Any, NamedTuple, OrderedDict import xmltodict @@ -209,12 +209,6 @@ def ensure_dcg_exists( return dcg_tag -class ProcessingParameter(NamedTuple): - name: str - label: str - default: Any = None - - def detect_acquisition_software(dir_for_transfer: Path) -> str: glob = dir_for_transfer.glob("*") for f in glob: @@ -225,9 +219,15 @@ def detect_acquisition_software(dir_for_transfer: Path) -> str: return "" +class ProcessingParameter(NamedTuple): + name: str + label: str + default: Any = None + + class Context: - user_params: List[ProcessingParameter] = [] - metadata_params: List[ProcessingParameter] = [] + user_params: list[ProcessingParameter] = [] + metadata_params: list[ProcessingParameter] = [] def __init__(self, name: str, acquisition_software: str, token: str): self._acquisition_software = acquisition_software @@ -256,7 +256,7 @@ def post_first_transfer( def gather_metadata( self, metadata_file: Path, environment: MurfeyInstanceEnvironment | None = None - ): + ) -> OrderedDict | None: raise NotImplementedError( f"gather_metadata must be declared in derived class to be used: {self}" ) From a410fa6412f714168eb9b0dfb2ea410230fe0eb9 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 13 Apr 2026 13:29:06 +0100 Subject: [PATCH 4/9] Adjusted logic for '_analyse' so that context-specific metadata processing is done within the relevant Context blocks --- src/murfey/client/analyser.py | 198 ++++++++++++++-------------------- 1 file changed, 79 insertions(+), 119 deletions(-) diff --git a/src/murfey/client/analyser.py b/src/murfey/client/analyser.py index 4049f2a09..a9164c30d 100644 --- a/src/murfey/client/analyser.py +++ b/src/murfey/client/analyser.py @@ -14,7 +14,7 @@ import threading from importlib.metadata import entry_points from pathlib import Path -from typing import Type +from typing import OrderedDict, Type from murfey.client.context import Context from murfey.client.destinations import find_longest_data_directory @@ -384,87 +384,24 @@ def _analyse(self, transferred_file: Path): ) self.post_transfer(transferred_file) else: - dc_metadata = {} + # Logic that doesn't require context determination if not self._serialem and ( - self._force_mdoc_metadata - and transferred_file.suffix == ".mdoc" - or self._mdoc_for_reading + self._force_mdoc_metadata and transferred_file.suffix == ".mdoc" ): - if self._context: - try: - dc_metadata = self._context.gather_metadata( - self._mdoc_for_reading or transferred_file, - environment=self._environment, - ) - except KeyError as e: - logger.error( - f"Metadata gathering failed with a key error for key: {e.args[0]}" - ) - raise e - if not dc_metadata: - self._mdoc_for_reading = None - elif transferred_file.suffix == ".mdoc": - self._mdoc_for_reading = transferred_file - if not self._context: - if not self._find_extension(transferred_file): - logger.debug(f"No extension found for {transferred_file}") - return + self._mdoc_for_reading = transferred_file + + # Try and determine context, and notify once when context is found + if self._context is None: + # Exit early if the file can't be used to determine the context if not self._find_context(transferred_file): logger.debug(f"Couldn't find context for {str(transferred_file)!r}") return - elif self._extension: - logger.info(f"Context found successfully for {transferred_file}") - try: - if self._context is not None: - self._context.post_first_transfer( - transferred_file, - environment=self._environment, - ) - except Exception as e: - logger.error(f"Exception encountered: {e}") - if any( - context in str(self._context) - for context in ( - "SPAContext", - "SPAMetadataContext", - "TomographyContext", - "TomographyMetadataContext", - ) - ): - if not dc_metadata: - try: - if self._context is not None: - dc_metadata = self._context.gather_metadata( - self._xml_file(transferred_file), - environment=self._environment, - ) - except NotImplementedError: - dc_metadata = {} - except KeyError as e: - logger.error( - f"Metadata gathering failed with a key error for key: {e.args[0]}" - ) - raise e - except ValueError as e: - logger.error( - f"Metadata gathering failed with a value error: {e}" - ) - if not dc_metadata or not self._force_mdoc_metadata: - self._unseen_xml.append(transferred_file) - else: - self._unseen_xml = [] - if dc_metadata.get("file_extension"): - self._extension = dc_metadata["file_extension"] - else: - dc_metadata["file_extension"] = self._extension - if self._context is not None: - dc_metadata["acquisition_software"] = ( - self._context._acquisition_software - ) - self.notify(dc_metadata) - - # Contexts that can be immediately posted without additional work - elif "CLEMContext" in str(self._context): + else: + logger.info(f"Context found successfully using {transferred_file}") + + # Trigger processing or metadata parsing according to the context + # Go through the straightforward ones first + if "CLEMContext" in str(self._context): logger.debug(f"File {transferred_file.name!r} is part of CLEM workflow") self.post_transfer(transferred_file) elif "FIBContext" in str(self._context): @@ -480,59 +417,82 @@ def _analyse(self, transferred_file: Path): self.post_transfer(transferred_file) # Handle files with tomography and SPA context differently - elif not self._extension or self._unseen_xml: - if not self._find_extension(transferred_file): - logger.error(f"No extension found for {transferred_file}") - return - if self._extension: - logger.info(f"Extension found successfully for {transferred_file}") + elif ( + any( + context in str(self._context) + for context in ( + "SPAContext", + "SPAMetadataContext", + "TomographyContext", + "TomographyMetadataContext", + ) + ) + and self._context is not None + ): + context = str(self._context).split(" ")[0].split(".")[-1] + + dc_metadata: OrderedDict | None = None + if not self._serialem and ( + self._force_mdoc_metadata + and transferred_file.suffix == ".mdoc" + or self._mdoc_for_reading + ): try: - self._context.post_first_transfer( - transferred_file, + dc_metadata = self._context.gather_metadata( + self._mdoc_for_reading or transferred_file, environment=self._environment, ) - except Exception as e: - logger.error(f"Exception encountered: {e}") + except KeyError as e: + logger.error( + f"Metadata gathering failed with a key error for key: " + f"{e.args[0]}" + ) + raise e + # Set the mdoc field to None if no metadata was found if not dc_metadata: - try: - dc_metadata = self._context.gather_metadata( - self._mdoc_for_reading - or self._xml_file(transferred_file), - environment=self._environment, - ) - except KeyError as e: - logger.error( - f"Metadata gathering failed with a key error for key: {e.args[0]}" - ) - raise e - if not dc_metadata or not self._force_mdoc_metadata: self._mdoc_for_reading = None - self._unseen_xml.append(transferred_file) - if dc_metadata: - self._unseen_xml = [] - if dc_metadata.get("file_extension"): - self._extension = dc_metadata["file_extension"] - else: - dc_metadata["file_extension"] = self._extension - dc_metadata["acquisition_software"] = ( - self._context._acquisition_software + + if not self._extension or self._unseen_xml: + # Early return if no extension was found + if not self._find_extension(transferred_file): + logger.warning(f"No extension found for {transferred_file}") + return + else: + logger.info( + f"Extension found successfully for {transferred_file}" ) - self.notify(dc_metadata) - elif any( - context in str(self._context) - for context in ( - "SPAContext", - "SPAMetadataContext", - "TomographyContext", - "TomographyMetadataContext", - ) - ): - context = str(self._context).split(" ")[0].split(".")[-1] + logger.debug( f"Transferring file {str(transferred_file)} with context {context!r}" ) self.post_transfer(transferred_file) + if not dc_metadata and transferred_file.suffix != ".mdoc": + try: + dc_metadata = self._context.gather_metadata( + self._mdoc_for_reading or self._xml_file(transferred_file), + environment=self._environment, + ) + except KeyError as e: + logger.error( + f"Metadata gathering failed with a key error for key: {e.args[0]}" + ) + raise e + if not dc_metadata or not self._force_mdoc_metadata: + self._mdoc_for_reading = None + self._unseen_xml.append(transferred_file) + if dc_metadata: + self._unseen_xml = [] + if dc_metadata.get("file_extension"): + self._extension = dc_metadata["file_extension"] + else: + dc_metadata["file_extension"] = self._extension + dc_metadata["acquisition_software"] = ( + self._context._acquisition_software + ) + self.notify(dc_metadata) + return + def _xml_file(self, data_file: Path) -> Path: if not self._environment: return data_file.with_suffix(".xml") From b848a97dfc460ca12a416098d0366e7938b4344f Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 13 Apr 2026 13:33:06 +0100 Subject: [PATCH 5/9] Typo --- src/murfey/client/analyser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/murfey/client/analyser.py b/src/murfey/client/analyser.py index a9164c30d..44e80a8ed 100644 --- a/src/murfey/client/analyser.py +++ b/src/murfey/client/analyser.py @@ -350,7 +350,7 @@ def _analyse_in_thread(self): continue self._analyse(transferred_file) self.queue.task_done() - logger.debug("Analyer thread has stopped analysing incoming files") + logger.debug("Analyser thread has stopped analysing incoming files") self.notify(final=True) def _analyse(self, transferred_file: Path): From 4cb78c512c55b361f576af7f381b4333d6947da3 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 13 Apr 2026 13:57:11 +0100 Subject: [PATCH 6/9] Streamlined data structure for example files used in context test --- tests/client/test_analyser.py | 131 ++++++++++++++-------------------- 1 file changed, 52 insertions(+), 79 deletions(-) diff --git a/tests/client/test_analyser.py b/tests/client/test_analyser.py index 3c21b3805..eb3bb00a4 100644 --- a/tests/client/test_analyser.py +++ b/tests/client/test_analyser.py @@ -3,105 +3,78 @@ import pytest from murfey.client.analyser import Analyser -from murfey.client.contexts.atlas import AtlasContext -from murfey.client.contexts.clem import CLEMContext -from murfey.client.contexts.fib import FIBContext from murfey.client.contexts.spa import SPAContext -from murfey.client.contexts.spa_metadata import SPAMetadataContext -from murfey.client.contexts.sxt import SXTContext from murfey.client.contexts.tomo import TomographyContext -from murfey.client.contexts.tomo_metadata import TomographyMetadataContext from murfey.util.models import ProcessingParametersSPA, ProcessingParametersTomo -example_files = [ - # Tomography - ["visit/Position_1_001_0.0_20250715_012434_fractions.tiff", TomographyContext], - ["visit/Position_1_2_002_3.0_20250715_012434_Fractions.mrc", TomographyContext], - ["visit/Position_1_2_003_6.0_20250715_012434_EER.eer", TomographyContext], - ["visit/name1_004_9.0_20250715_012434_fractions.tiff", TomographyContext], - ["visit/Position_1_[30.0].tiff", TomographyContext], - ["visit/Position_1.mdoc", TomographyContext], - ["visit/name1_2.mdoc", TomographyContext], - # Tomography metadata - ["visit/Session.dm", TomographyMetadataContext], - ["visit/SearchMaps/SearchMap.xml", TomographyMetadataContext], - ["visit/Batch/BatchPositionsList.xml", TomographyMetadataContext], - ["visit/Thumbnails/file.mrc", TomographyMetadataContext], - # SPA - ["visit/FoilHole_01234_fractions.tiff", SPAContext], - ["visit/FoilHole_01234_EER.eer", SPAContext], - # SPA metadata - ["atlas/atlas.mrc", AtlasContext], - ["visit/EpuSession.dm", SPAMetadataContext], - ["visit/Metadata/GridSquare.dm", SPAMetadataContext], - # CLEM LIF file - ["visit/images/test_file.lif", CLEMContext], - # CLEM TIFF files - [ +example_files = { + "CLEMContext": [ + # CLEM LIF file + "visit/images/test_file.lif", + # CLEM TIFF files "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12--Z02--C01.tif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12_Lng_LVCC--Z02--C01.tif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Series001--Z00--C00.tif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Series001_Lng_LVCC--Z00--C00.tif", - CLEMContext, - ], - # CLEM TIFF file accompanying metadata - [ + # CLEM TIFF file accompanying metadata "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Position 12_Lng_LVCC.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_histo.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Position 12/Metadata/Position 12_Lng_LVCC_histo.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001.xlif", - CLEMContext, - ], - [ "visit/images/2024_03_14_12_34_56--Project001/grid1/Metadata/Series001_Lng_LVCC.xlif", - CLEMContext, ], - # FIB Autotem files - ["visit/autotem/visit/ProjectData.dat", FIBContext], - ["visit/autotem/visit/Sites/Lamella/SetupImages/Preparation.tif", FIBContext], - [ + "FIBContext": [ + # FIB Autotem files + "visit/autotem/visit/ProjectData.dat", + "visit/autotem/visit/Sites/Lamella/SetupImages/Preparation.tif", "visit/autotem/visit/Sites/Lamella (2)//DCImages/DCM_2026-03-09-23-45-40.926/2026-03-09-23-48-43-Finer-Milling-dc_rescan-image-.png", - FIBContext, - ], - # FIB Maps files - ["visit/maps/visit/EMproject.emxml", FIBContext], - [ + # FIB Maps files + "visit/maps/visit/EMproject.emxml", "visit/maps/visit/LayersData/Layer/Electron Snapshot/Electron Snapshot.tiff", - FIBContext, - ], - [ "visit/maps/visit/LayersData/Layer/Electron Snapshot (2)/Electron Snapshot (2).tiff", - FIBContext, ], - # Soft x-ray tomography - ["visit/tomo__tag_ROI10_area1_angle-60to60@1.5_1sec_251p.txrm", SXTContext], - ["visit/X-ray_mosaic_ROI2.xrm", SXTContext], -] + "SXTContext": [ + "visit/tomo__tag_ROI10_area1_angle-60to60@1.5_1sec_251p.txrm", + "visit/X-ray_mosaic_ROI2.xrm", + ], + "AtlasContext": [ + "atlas/atlas.mrc", + ], + "TomographyContext": [ + "visit/Position_1_001_0.0_20250715_012434_fractions.tiff", + "visit/Position_1_2_002_3.0_20250715_012434_Fractions.mrc", + "visit/Position_1_2_003_6.0_20250715_012434_EER.eer", + "visit/name1_004_9.0_20250715_012434_fractions.tiff", + "visit/Position_1_[30.0].tiff", + "visit/Position_1.mdoc", + "visit/name1_2.mdoc", + ], + "TomographyMetadataContext": [ + "visit/Session.dm", + "visit/SearchMaps/SearchMap.xml", + "visit/Batch/BatchPositionsList.xml", + "visit/Thumbnails/file.mrc", + ], + "SPAContext": [ + "visit/FoilHole_01234_fractions.tiff", + "visit/FoilHole_01234_EER.eer", + ], + "SPAMetadataContext": [ + "visit/EpuSession.dm", + "visit/Metadata/GridSquare.dm", + ], +} -@pytest.mark.parametrize("file_and_context", example_files) +@pytest.mark.parametrize( + "file_and_context", + [ + [file, context] + for context, file_list in example_files.items() + for file in file_list + ], +) def test_find_context(file_and_context, tmp_path): # Unpack parametrised variables file_name, context = file_and_context @@ -111,7 +84,7 @@ def test_find_context(file_and_context, tmp_path): # Check that the results are as expected assert analyser._find_context(tmp_path / file_name) - assert isinstance(analyser._context, context) + assert analyser._context is not None and context in str(analyser._context) # Checks for the specific workflow contexts if isinstance(analyser._context, TomographyContext): From ee8a1750f9f65836e772ea599d6151925a909d32 Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 13 Apr 2026 13:58:14 +0100 Subject: [PATCH 7/9] Add stubs for tests for the different supported workflows --- tests/client/test_analyser.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/client/test_analyser.py b/tests/client/test_analyser.py index eb3bb00a4..fd5e252e8 100644 --- a/tests/client/test_analyser.py +++ b/tests/client/test_analyser.py @@ -141,3 +141,23 @@ def test_analyser_epu_determination(tmp_path): analyser.queue.put(tomo_file) analyser.stop() assert analyser._context._acquisition_software == "epu" + + +def test_analyse_clem(): + pass + + +def test_analyse_fib(): + pass + + +def test_analyse_sxt(): + pass + + +def test_analyse_spa(): + pass + + +def test_analyse_tomo(): + pass From 7852f0421db60db6edeb3f2489a6c4f49123a24e Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 13 Apr 2026 14:44:54 +0100 Subject: [PATCH 8/9] Added tests for '_analyse' for the CLEM, FIB, and SXT workflows, as well as for contextless files --- tests/client/test_analyser.py | 118 ++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 6 deletions(-) diff --git a/tests/client/test_analyser.py b/tests/client/test_analyser.py index fd5e252e8..c10690e3a 100644 --- a/tests/client/test_analyser.py +++ b/tests/client/test_analyser.py @@ -1,6 +1,9 @@ from __future__ import annotations +from pathlib import Path + import pytest +from pytest_mock import MockerFixture from murfey.client.analyser import Analyser from murfey.client.contexts.spa import SPAContext @@ -143,16 +146,119 @@ def test_analyser_epu_determination(tmp_path): assert analyser._context._acquisition_software == "epu" -def test_analyse_clem(): - pass +@pytest.mark.parametrize("test_file", contextless_files) +def test_analyse_no_context( + test_file: str, + mocker: MockerFixture, + tmp_path: Path, +): + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + spy_find_context = mocker.spy(Analyser, "_find_context") + # Initialise the Analyser + analyser = Analyser(tmp_path, "") + analyser._analyse(tmp_path / test_file) -def test_analyse_fib(): - pass + # "_find_context" should be called + assert spy_find_context.call_count == 1 + # "post_transfer" should not be called + mock_post_transfer.assert_not_called() -def test_analyse_sxt(): - pass + +def test_analyse_clem( + mocker: MockerFixture, + tmp_path: Path, +): + # Gather example files related to the CLEM workflow + test_files = [ + file + for context, file_list in example_files.items() + for file in file_list + if context == "CLEMContext" and not file.endswith(".lif") + ] + + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + spy_find_context = mocker.spy(Analyser, "_find_context") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "") + for file in test_files: + analyser._analyse(tmp_path / file) + + # "_find_context" should be called only once + assert spy_find_context.call_count == 1 + assert analyser._context is not None and "CLEMContext" in str(analyser._context) + + # "_post_transfer" should be called on every one of these files + assert mock_post_transfer.call_count == len(test_files) + + +@pytest.mark.parametrize( + "test_params", + # Test the "autotem" and "maps" workflows separately + [ + [software, [file for file in file_list if software in file]] + for software in ("autotem", "maps") + for context, file_list in example_files.items() + if context == "FIBContext" + ], +) +def test_analyse_fib( + mocker: MockerFixture, + test_params: tuple[str, list[str]], + tmp_path: Path, +): + # Unpack test params + software, test_files = test_params + + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + spy_find_context = mocker.spy(Analyser, "_find_context") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "") + for file in test_files: + analyser._analyse(tmp_path / file) + + # "_find_context" should be called only once + assert spy_find_context.call_count == 1 + assert analyser._context is not None and "FIBContext" in str(analyser._context) + assert analyser._context._acquisition_software == software + + # "_post_transfer" should be called on every one of these files + assert mock_post_transfer.call_count == len(test_files) + + +def test_analyse_sxt( + mocker: MockerFixture, + tmp_path: Path, +): + # Load the example files corresponding to the SXT workflow + test_files = [ + file + for context, file_list in example_files.items() + for file in file_list + if context == "SXTContext" + ] + + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + spy_find_context = mocker.spy(Analyser, "_find_context") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "") + for file in test_files: + analyser._analyse(tmp_path / file) + + # "_find_context" should be called only once + assert spy_find_context.call_count == 1 + assert analyser._context is not None and "SXTContext" in str(analyser._context) + + # "_post_transfer" should be called on every one of these files + assert mock_post_transfer.call_count == len(test_files) def test_analyse_spa(): From 00508df6ab57a45a8bafb9fc9e29e11b9193819b Mon Sep 17 00:00:00 2001 From: Eu Pin Tien Date: Mon, 13 Apr 2026 16:38:39 +0100 Subject: [PATCH 9/9] Added tests for the 'limited' logic block in '_analyse' --- tests/client/test_analyser.py | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/client/test_analyser.py b/tests/client/test_analyser.py index c10690e3a..65d57c447 100644 --- a/tests/client/test_analyser.py +++ b/tests/client/test_analyser.py @@ -261,6 +261,42 @@ def test_analyse_sxt( assert mock_post_transfer.call_count == len(test_files) +@pytest.mark.parametrize( + "test_params", + [ + ["SPAMetadataContext", ["AtlasContext", "SPAContext"]], + ["TomographyMetadataContext", ["AtlasContext", "SPAContext"]], + ], +) +def test_analyse_limited( + mocker: MockerFixture, tmp_path: Path, test_params: tuple[str, list[str]] +): + # Unpack test params + expected_context, other_contexts = test_params + + # Load example files related to the CLEM + test_files = [ + file + for context, file_list in example_files.items() + for file in file_list + if context in (expected_context, *other_contexts) + ] + + # Mock the 'post_transfer' class function + mock_post_transfer = mocker.patch.object(Analyser, "post_transfer") + + # Initialise the Analyser + analyser = Analyser(tmp_path, "", limited=True) + for file in test_files: + analyser._analyse(tmp_path / file) + + # "_find_context" should be called only once + assert analyser._context is not None and expected_context in str(analyser._context) + + # "_post_transfer" should be called on every one of these files + assert mock_post_transfer.call_count == len(test_files) + + def test_analyse_spa(): pass