Source code for morphops.io

"""Provides IO functions to read from and write to files in common landmark data
file formats.
"""

import numpy as np
import morphops.lmk_util as lmk_util


[docs]class MopsFileReadError(Exception): pass
[docs]class MopsFileWriteError(Exception): pass
[docs]def read_dta(filename): r"""Reads \*.dta files, as written by the IDAV Landmark Editor. dta files typically have the following structure. 1. Few comment lines. Comment lines start with a quotation mark (' or "). 2. A header with structure "1 nL pk 1 9999 Dim=k". Here 1. n is the number of specimens or number of landmark sets 2. L in "nL" indicates that the file has specimen labels - assumed true 3. p is the number of landmarks per landmark set 4. k is the number of coordinates of each landmark (usually 2 or 3) The "1 9999" are ignored (but expected to exist) when reading. This is because those two numbers are a misapplication of the NTS format, which the DTA format is based on. Per the NTS format, the interpretation of the "1 9999" is that the file has missing data indicated by 9999. DTA files always contain the "1 9999" numbers, regardless of whether the file actually has missing data. 3. n lines, each corresponding to the label of 1 specimen. 4. n blocks of p lines. Each line contains k numbers. These correspond to p k-D landmarks in each of the n specimens specified in the order of appearance of their names in the preceding section. Todo ---- Make implementation less non-pythonic if possible, while not allowing errors to go undetected. """ did_header = False names = [] pts = [] curr_line_i = -1 with open(filename, 'r') as f: for line in f: curr_line_i += 1 line = str.strip(line) # If line is empty or starts with quote, continue if len(line) == 0 or \ line.startswith("\'") or line.startswith("\""): continue # If line indicates rectangular matrix and header not yet done, # we have a header. Eg- "1 2L 30 1 9999 Dim=3" if line.startswith("1") and not did_header: header_els = line.split() if len(header_els) != 6: raise MopsFileReadError("Error in line {}. A .dta file " "header must have 6 parts.".format(curr_line_i)) # Item 1 is the n_lmk_sets, followed by L or l n = int(header_els[1].replace('L','').replace('l','')) # Item 2 is the n_lmks*n_coords pk = int(header_els[2]) # Item 5 contains the dimensions k. k = int((header_els[5]).lower().replace('dim=', '')) p = pk//k did_header = True continue # Read the lmk set names if len(names) < n: names.append(line) continue # Read in the coords coords = line.split() if len(coords) is not k: raise MopsFileReadError("Error in line {}. Could not parse the " "coordinates {}".format(curr_line_i, line)) coords = np.array(coords).astype(np.float64) pts.append(coords) # Reshape into a n x p x k tensor lmk_sets = np.array(pts).reshape((n, p, k)) return lmk_sets, names
[docs]def write_dta(filename, lmk_sets, names=[]): r"""Writes \*.dta files, as written by the IDAV Landmark Editor. See also -------- read_dta: For an explanation of the \*.dta format. Todo ---- Make implementation less non-pythonic if possible, without letting errors slip through without raising. """ n = lmk_util.num_lmk_sets(lmk_sets) p = lmk_util.num_lmks(lmk_sets) k = lmk_util.num_coords(lmk_sets) with open(filename, 'w+') as f: # Write some comments f.write("\'DTA file written by morphops\n") f.write("\n") # Write header header_els = [1, n, p*k, 1, 9999, ''.join(np.array(['Dim=',k]).astype(str))] header = ' '.join(np.array(header_els).astype(str)) f.write(header + "\n") f.write("\n") # Write the names. Missing names are populated as 'InsertName{ID}', # where ID goes from [len(names) + 1, n + 1). rem_names_ids = np.arange(len(names)+1,n+1).astype(str) rem_names = np.core.char.add('InsertName', rem_names_ids) names = np.append(names, rem_names) for name in names: f.write(name + "\n") # Write the coordinates for i in range(n): f.write("\n") for j in range(p): lmk = np.array(lmk_sets[i, j, 0:k]) lmkstr = np.array2string(lmk, precision=20) lmkstr = lmkstr.replace('[','').replace(']','').strip() f.write(lmkstr + "\n")