84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345 | class TomogramFile(Tomogram):
"""Represents a tomogram file.
Extends the Tomogram class to handle file operations, including loading
tomogram data from files of specific formats.
Access the image data inside with `get_data()`.
Attributes:
filepath (str): The file path to the tomogram file.
annotations (list of Annotation): Annotations corresponding to the tomogram.
data (numpy.ndarray): A 3-dimensional array containing the tomogram image.
header (dict or numpy.recarray) Other data related to the tomogram file.
"""
def __init__(
self,
filepath: str,
annotations:
Optional[List[Annotation]] = None,
*,
load: bool = True
):
"""Initialize a TomogramFile instance.
Args:
filepath (str): The file path to the tomogram file.
annotations (list of Annotation, optional): Annotations
corresponding to the tomogram. Defaults to None.
load (bool, optional): Whether to load tomogram array data
immediately. Defaults to True. If False, use self.load() when
ready to load data.
"""
self.data = None
self.annotations = annotations
self.filepath = filepath
self.load_header()
if load:
self.load()
def load(self, *, preprocess: bool = True):
"""Load the tomogram data from the specified file.
This method determines the file type based on its extension and loads
the data accordingly.
Args:
preprocess (bool, optional): Whether to preprocess the data after
loading. Defaults to True.
Returns:
The loaded tomogram data.
Raises:
IOError: If the file type is not supported.
"""
if self.data is not None:
return self.data
# Determine how to load based on file extension.
root, extension = os.path.splitext(self.filepath)
if extension in [".mrc", ".rec"]:
data = TomogramFile.mrc_to_np(self.filepath)
elif extension == ".npy":
data = np.load(self.filepath)
else:
raise IOError("Tomogram file must be of type .mrc, .rec, or .npy.")
# Initialize Tomogram class
super().__init__(data, self.annotations)
if preprocess:
self.process()
return self.data
def load_header(self) -> Union[dict, np.recarray]:
"""Loads only tomogram header data from the specified file.
This method determines the file type based on its extension and loads
the data accordingly.
Returns:
The loaded tomogram header data.
Raises:
IOError: If the file type is not supported.
"""
# Determine how to load based on file extension.
root, extension = os.path.splitext(self.filepath)
if extension in [".mrc", ".rec"]:
mrc = mrcfile.open(self.filepath, header_only=True)
self.header = mrc.header
# Shape seems backward because python convention is reverse of
# FORTRAN convention. This is deliberate.
self.shape = tuple(self.header[dim].item() for dim in ['nz', 'ny', 'nx'])
mrc.close()
elif extension == ".npy":
self.header = dict()
else:
raise IOError("Tomogram file must be of type .mrc, .rec, or .npy.")
return self.header
def get_data(self, *, preprocess:bool = True) -> np.ndarray:
"""
Access the data array in the tomogram. If the data has not been loaded,
this method loads it and then returns the loaded array.
Args:
preprocess (bool, optional): Whether to preprocess the data after
loading. Defaults to True.
Returns:
The array data of the tomogram. In other words, returns the image.
"""
return self.load()
def get_shape(self, *, preprocess:bool = True) -> np.ndarray:
"""
Access the data array shape in the tomogram. If the data has not been
loaded, this method loads it and then returns the loaded array shape.
Args:
preprocess (bool, optional): Whether to preprocess the data after
loading. Defaults to True.
Returns:
The data array shape of the tomogram. In other words, returns the
image's dimensions.
"""
return self.shape
def get_voxel_spacing(self):
"""
Uses `.mrc` file header information to find the voxel spacing of this
tomogram in Ångstroms.
Returns:
Either an integer (if the voxel spacing is isotropic, i.e., the same
in all directions), or a 3-tuple (if the spacing is anisotropic)
representing the voxel spacing in each direction.
Raises:
IOError: If the file type is not `.mrc`.
"""
# Determine file extension.
root, extension = os.path.splitext(self.filepath)
if extension not in [".mrc", ".rec"]:
raise IOError("Tomogram file must be .mrc to load the voxel spacing.")
mrc = mrcfile.open(self.filepath, mode='r', header_only=True)
spacing = mrc.voxel_size
# Convert pesky np.recarray to a normal ndarray
if isinstance(spacing, np.recarray):
spacing = np.array([spacing.x.item(), spacing.y.item(), spacing.z.item()])
# If the spacing is already a scalar, return it.
if isinstance(spacing, (int, float)):
return spacing
# If it isn't, check if all the tuple values are the same.
# If so, just return one. If not, return the whole tuple
if spacing[0] == spacing[1] and spacing[0] == spacing[2]:
return spacing[0]
else:
return spacing
@staticmethod
def rescale(array: np.ndarray) -> np.ndarray:
"""Rescale array values to the range [0, 1].
Args:
array (numpy.ndarray): The array to be rescaled.
Returns:
The rescaled array.
"""
maximum = np.max(array)
minimum = np.min(array)
range_ = maximum - minimum
return (array - minimum) / range_
@staticmethod
def mrc_to_np(filepath: str) -> np.ndarray:
"""Convert a .mrc or .rec file to a numpy array.
Args:
filepath (str): The file path to the .mrc or .rec file.
Returns:
The data loaded as a numpy array.
"""
with mrcfile.open(filepath, 'r') as mrc:
data = mrc.data.astype(np.float64)
return data
def process(self) -> np.ndarray:
"""Process the tomogram to improve contrast using contrast stretching.
This method applies contrast stretching to enhance the visibility
of features in the tomogram.
Returns:
The processed tomogram data.
"""
# Contrast stretching
p2, p98 = np.percentile(self.get_data(), (2, 98))
data_rescale = exposure.rescale_intensity(self.get_data(), in_range=(p2, p98))
self.data = data_rescale
return self.get_data()
def reload(self) -> np.ndarray:
"""Reload the tomogram data from the file.
This method reinitializes the tomogram data by loading it again from the
specified file. If the data has already been loaded, access it with
self.get_data().
Returns:
The reloaded tomogram data.
"""
self.data = TomogramFile.mrc_to_np(self.filepath)
return self.get_data()
def get_shape_from_annotations(self) -> np.ndarray:
"""
Returns the shape of the tomogram without having to load it using the
annotations attatched to this tomogram, if any are AnnotationFiles. If
no AnnotationFiles of `.mod` type are in self.annotations, raises an
exception.
Returns:
The shape of the tomogram as inferred from
self.annotations.
Raises:
Exception: If no AnnotationFile objects are in self.annotations.
Exception: If there are multiple AnnotationFile objects in
self.annotations and they imply inconsistent shapes.
"""
shapes = []
for annotation in self.annotations:
if isinstance(annotation, AnnotationFile) and annotation.extension == ".mod":
shape = annotation.tomogram_shape_from_mod()
shapes.append(shape)
if len(shapes) == 0:
raise Exception("No .mod annotations found. Cannot infer tomogram shape.")
elif len(shapes) == 1:
return shapes[0]
else: # Confirm that all the shapes agree
shape = shapes[0]
for s in shapes[1:]:
if s != shape:
raise Exception(f"Inconsistent tomogram shapes of {shape} and {s} implied by .mod annotations.")
return shape
|