Source code for mh_utils.worklist_parser.classes

#  !/usr/bin/env python
#
#  classes.py
"""
Main classes for the worklist paser.
"""
#
#  Copyright © 2020 Dominic Davis-Foster <dominic@davis-foster.co.uk>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU Lesser General Public License as published by
#  the Free Software Foundation; either version 3 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#  GNU Lesser General Public License for more details.
#
#  You should have received a copy of the GNU Lesser General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
#  MA 02110-1301, USA.
#

# stdlib
from pprint import pformat
from typing import Any, Dict, List, Optional, Sequence, Union
from uuid import UUID

# 3rd party
import attr
import lxml.etree  # type: ignore
import pandas  # type: ignore
from attr_utils.docstrings import add_attrs_doc
from attr_utils.serialise import serde
from domdf_python_tools.bases import Dictable

# this package
from mh_utils.utils import element_to_bool, strip_string
from mh_utils.worklist_parser.columns import Column, columns
from mh_utils.worklist_parser.enums import AttributeType
from mh_utils.worklist_parser.parser import parse_params, parse_sample_info
from mh_utils.xml import XMLFileMixin

__all__ = ["JobData", "Worklist", "Checksum", "Macro", "Attribute"]

pandas.DataFrame.__module__ = "pandas"


[docs]class JobData(Dictable):
	"""
	Class that represents an entry in the worklist.

	:param id: The ID of the job.
	:param job_type: The type of job. TODO: enum of values
	:param run_status: The status of the analysis.  TODO: enum of values
	:param sample_info: Optional ``key: value`` mapping of information about the sample.
	"""

	def __init__(
			self,
			id: Union[str, UUID],  # noqa: A002  # pylint: disable=redefined-builtin
			job_type: int,
			run_status: int,
			sample_info: Optional[dict] = None,
			):

		super().__init__()

		if isinstance(id, UUID):
			self.id = id
		else:
			self.id = UUID(str(id))

		self.job_type = int(job_type)
		self.run_status = int(run_status)

		if sample_info:
			self.sample_info = sample_info
		else:
			self.sample_info = {}

	__slots__ = ["id", "job_type", "run_status", "sample_info"]

	# dtypes
	# 8: Str
	# Inj Vol, Dilution and Equilib Time (min) 5

[docs]	@classmethod
	def from_xml(
			cls,
			element: lxml.objectify.ObjectifiedElement,
			user_columns: Optional[Dict[str, Column]] = None,
			) -> "JobData":
		"""
		Construct a :class:`~.JobData` object from an XML element.

		:param element: The XML element to parse the data from
		:param user_columns: Optional mapping of user column labels to
			:class:`~mh_utils.worklist_parser.columns.Column` objects.
		"""

		return cls(
				id=element.ID,
				job_type=element.JobType,
				run_status=element.RunStatus,
				sample_info=parse_sample_info(element.SampleInfo, user_columns),
				)

	@property
	def __dict__(self):
		data = {}
		for key in self.__slots__:
			if key == "id":
				data[key] = str(self.id)
			else:
				data[key] = getattr(self, key)

		return data

[docs]	def __repr__(self) -> str:
		values = ", ".join(f"{key}={val!r}" for key, val in iter(self) if key != "sample_info")
		return f"{self.__class__.__name__}({values})"


[docs]class Worklist(XMLFileMixin, Dictable):
	"""
	Class that represents an Agilent MassHunter worklist.

	:param version: WorklistInfo version number
	:param locked_run_mode: Flag to indicate whether the data was acquired in locked mode. Yes = -1. No = 0.
	:param instrument_name: The name of the instrument.
	:param params: Mapping of parameter names to values. TODO: Check
	:param user_columns: Mapping of user columns to ??? TODO
	:param jobs:
	:param checksum: The checksum of the worklist file. The format is unknown.
	"""

	def __init__(
			self,
			version: float,
			locked_run_mode: bool,
			instrument_name: str,
			params: dict,
			user_columns: dict,
			jobs: Sequence[JobData],
			checksum: "Checksum",
			):

		super().__init__()

		self.version = float(version)
		self.locked_run_mode = bool(locked_run_mode)
		self.instrument_name = str(instrument_name)
		self.params = params
		self.user_columns = user_columns
		self.jobs = list(jobs)
		self.checksum = checksum

	__slots__ = ["version", "user_columns", "jobs", "checksum", "locked_run_mode", "instrument_name", "params"]

	@property
	def __dict__(self):
		data = {}
		for key in self.__slots__:
			data[key] = getattr(self, key)

		return data

[docs]	@classmethod
	def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Worklist":
		"""
		Construct a :class:`~.Worklist` object from an XML element.
		"""

		version = float(element.Version)
		checksum = Checksum.from_xml(element.Checksum)

		WorklistInfo = element.WorklistInfo

		if WorklistInfo.LockedRunMode == -1:
			locked_run_mode = True
		elif WorklistInfo.LockedRunMode == 0:
			locked_run_mode = False
		else:
			raise ValueError("Unknown value for 'LockedRunMode'")

		instrument_name = str(WorklistInfo.Instrument)
		params = parse_params(WorklistInfo.Params)

		attributes_list: List[Attribute] = []
		jobs_list: List[JobData] = []

		user_columns: Dict[str, Column] = {}

		for attribute in WorklistInfo.AttributeInformation.iterchildren("Attributes"):
			attribute = Attribute.from_xml(attribute)
			attributes_list.append(attribute)

			if attribute.attribute_type != AttributeType.SystemDefined:
				column = Column.from_attribute(attribute)
				user_columns[column.name] = column

		for job in WorklistInfo.JobDataList.iterchildren("JobData"):
			jobs_list.append(JobData.from_xml(job, user_columns))

		return cls(
				version=version,
				locked_run_mode=locked_run_mode,
				instrument_name=instrument_name,
				params=params,
				user_columns=user_columns,
				jobs=jobs_list,
				checksum=checksum,
				)

[docs]	def __repr__(self) -> str:
		return f"{self.__class__.__name__}({pformat(dict(self))})"

[docs]	def as_dataframe(self) -> pandas.DataFrame:
		"""
		Returns the :class:`~.Worklist` as a :class:`pandas.DataFrame`.
		"""

		headers = [col for col in columns] + [col for col in self.user_columns]
		data = []

		for job in self.jobs:
			row = []

			for header_label in headers:
				row.append(job.sample_info[header_label])

			data.append(row)

		# TODO: Sort columns by "reorder_id"

		return pandas.DataFrame(data, columns=headers)


[docs]@serde
@add_attrs_doc
@attr.s(slots=True)
class Checksum:
	"""
	Represents a checksum for a worklist.

	The format of the checksum is unknown.
	"""

	SchemaVersion: int = attr.ib(converter=int)
	ALGO_VERSION: int = attr.ib(converter=int)
	HASHCODE: str = attr.ib(converter=str)

[docs]	@classmethod
	def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Checksum":
		"""
		Construct a :class:`~.Checksum` object from an XML element.
		"""

		return cls(
				SchemaVersion=element.attrib["SchemaVersion"],
				ALGO_VERSION=element.attrib["ALGO_VERSION"],
				HASHCODE=element.MAIN.attrib["HASHCODE"]
				)


[docs]@serde
@add_attrs_doc
@attr.s(slots=True, repr=False)
class Macro:
	"""
	Represents a macro in a worklist.
	"""

	project_name: str = attr.ib(converter=strip_string)
	procedure_name: str = attr.ib(converter=strip_string)
	input_parameter: str = attr.ib(converter=strip_string)
	output_data_type: int = attr.ib(converter=int)
	output_parameter: str = attr.ib(converter=strip_string)
	display_string: str = attr.ib(converter=strip_string)

	# TODO: enum for output_data_type

[docs]	@classmethod
	def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Macro":
		"""
		Construct a :class:`~.Macro` object from an XML element.
		"""

		return cls(
				project_name=element.ProjectName,
				procedure_name=element.ProcedureName,
				input_parameter=element.InputParameter,
				output_data_type=element.OutputDataType,
				output_parameter=element.OutputParameter,
				display_string=element.DisplayString,
				)

	@property
	def undefined(self) -> bool:
		"""
		Returns whether the macro is undefined.
		"""

		return all([
				self.project_name == '',
				self.procedure_name == '',
				self.input_parameter == '',
				self.output_data_type == 0,
				self.output_parameter == '',
				self.display_string == '',
				])

	def __repr__(self) -> str:
		if self.undefined:
			return f"{self.__class__.__name__}(Undefined)"
		else:
			values = ", ".join(f"{x}={getattr(self, x)!r}" for x in self.__slots__ if x != "__weakref__")
			return f"{self.__class__.__name__}({values})"


[docs]@serde
@add_attrs_doc
@attr.s(slots=True)
class Attribute:
	"""
	Represents an Attribute.
	"""

	attribute_id: int = attr.ib(converter=int)

	#: Can be System Defined (``0``), System Used (``1``), or User Added (``2``).
	attribute_type: AttributeType = attr.ib(converter=AttributeType)

	field_type: int = attr.ib(converter=int)
	"""
	Each of the system defined columns have a field type starting from
	sampleid = 0 to reserved6 = 24. The system used column can be 'compound param' = 35,
	'optim param' = 36, 'mass param' = 37 and 'protein param' = 38.
	The User added columns start from 45.
	"""

	system_name: str = attr.ib(converter=strip_string)
	header_name: str = attr.ib(converter=strip_string)

	# TODO: determine data_type and use it to cast the values and the default value
	# Perhaps
	# DataFileValuedata_type = bdict(
	# Unspecified=0,
	# Byte=1,
	# Int16=2,
	# Int32=3,
	# Int64=4,
	# Float32=5,
	# Float64=6,
	# )
	data_type: Any = attr.ib(converter=int)

	default_data_value: str = attr.ib(converter=strip_string)
	reorder_id: int = attr.ib(converter=int)
	show_hide_status: bool = attr.ib(converter=element_to_bool)
	column_width: int = attr.ib(converter=int)

	# TODO: enum for output_data_type

[docs]	@classmethod
	def from_xml(cls, element: lxml.objectify.ObjectifiedElement) -> "Attribute":
		"""
		Construct an :class:`~.Attribute` object from an XML element.
		"""

		return cls(
				attribute_id=element.AttributeID,
				attribute_type=element.AttributeType,
				field_type=element.FieldType,
				system_name=element.SystemName,
				header_name=element.HeaderName,
				data_type=element.DataType,
				default_data_value=element.DefaultDataValue,
				reorder_id=element.ReorderID,
				show_hide_status=element.ShowHideStatus,
				column_width=element.ColumnWidth,
				)