Support import of various fixed-width formats.

This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90)
2025-12-06 12:57:08 +00:00 · 2024-04-30 19:05:54 +02:00
parent 934b921f69
commit be0d7b269f
3 changed files with 131 additions and 73 deletions
--- a/bin/fwr.py
+++ b/bin/fwr.py
@@ -0,0 +1,108 @@
 #!/usr/bin/python3
 """
 Fixed width record importing functions.
 """
 import builtins
 transform = {
 	"int": lambda v: builtins.int(float(v)),
 	"float": float,
 	"string": str,
 	"bool": bool
 }
 def parse_line (line, fields):
 	data = dict()
 	for key in fields:
 		spec = fields[key]
 		transformer = transform[spec["type"]]
 		pos_from = spec["offset"]
 		pos_to = pos_from + spec["length"]
 		text = line[pos_from:pos_to]
 		value = transformer(text)
 		if "enum" in spec:
 			if "default" in spec:
 				value = spec["default"]
 			for enum_key in spec["enum"]:
 				if enum_key == value:
 					enum_value = transformer(spec["enum"][enum_key])
 					value = enum_value
 					break
 		data[key] = value
 	return data
 specfields = {
 	"sps1": {
 		"line_name": { "offset": 1, "length": 16, "type": "int" },
 		"point_number": { "offset": 17, "length": 8, "type": "int" },
 		"easting": { "offset": 46, "length": 9, "type": "float" },
 		"northing": { "offset": 55, "length": 10, "type": "float" }
 	},
 	"sps21": {
 		"line_name": { "offset": 1, "length": 7, "type": "int" },
 		"point_number": { "offset": 11, "length": 7, "type": "int" },
 		"easting": { "offset": 46, "length": 9, "type": "float" },
 		"northing": { "offset": 55, "length": 10, "type": "float" }
 	},
 	"p190": {
 		"line_name": { "offset": 1, "length": 12, "type": "int" },
 		"point_number": { "offset": 19, "length": 6, "type": "int" },
 		"easting": { "offset": 46, "length": 9, "type": "float" },
 		"northing": { "offset": 55, "length": 9, "type": "float" }
 	},
 }
 def from_file(path, spec):
 	# If spec.fields is not present, deduce it from spec.type ("sps1", "sps21", "p190", etc.)
 	if "fields" in spec:
 		fields = spec["fields"]
 	elif "type" in spec and spec["type"] in specfields:
 		fields = specfields[spec["type"]]
 	else:
 		return "Neither 'type' nor 'fields' given. I don't know how to import this fixed-width dataset."
 	firstRow = spec.get("firstRow", 0)
 	skipStart = [] # Skip lines starting with any of these values
 	skipMatch = [] # Skip lines matching any of these values
 	if "type" in spec:
 		if spec["type"] == "sps1" or spec["type"] == "sps21" or spec["type"] == "p190":
 			skipStart = "H"
 			skipMatch = "EOF"
 	records = []
 	with open(path, "r", errors="ignore") as fd:
 		row = 0
 		line = fd.readline()
 		while line:
 			skip = False
 			if row < firstRow:
 				skip = True
 			if not skip:
 				for v in skipStart:
 					if line.startswith(v):
 						skip = True
 						break
 				for v in skipMatch:
 					if line == v:
 						skip = True
 						break
 			if not skip:
 				records.append(parse_line(line, fields))
 			row += 1
 			line = fd.readline()
 	return records
--- a/bin/legacy_fwr.py
+++ b/bin/legacy_fwr.py
@@ -1,68 +0,0 @@
 #!/usr/bin/python3
 """
 Legacy fixed width record importing functions.
 """
 import builtins
 def parse_fwr (string, widths, start=0):
 	"""Parse a fixed-width record.
 	string: the string to parse.
 	widths: a list of record widths. A negative width denotes a field to be skipped.
 	start: optional start index.
 	Returns a list of strings.
 	"""
 	results = []
 	current_index = start
 	for width in widths:
 		if width > 0:
 			results.append(string[current_index : current_index + width])
 			current_index += width
 		else:
 			current_index -= width
 	return results
 def int (v):
 	return builtins.int(float(v))
 def parse_line (string, spec):
 	"""Parse a line from an SPS file."""
 	names = spec["names"]
 	widths = spec["widths"]
 	normalisers = spec["normalisers"]
 	record = [ t[0](t[1]) for t in zip(normalisers, parse_fwr(string, widths)) ]
 	return dict(zip(names, record))
 def from_file(path, spec = None):
 	if spec is None:
 		spec = {
 			"names": [ "line_name", "point_number", "easting", "northing" ],
 			"widths": [ -1, 10, 10, -25, 10, 10 ],
 			"normalisers": [ int, int, float, float ]
 		}
 	else:
 		normaliser_tokens = [ "int", "float", "str", "bool" ]
 		spec["normalisers"] = [ eval(t) for t in spec["types"] if t in normaliser_tokens ]
 	records = []
 	with open(path) as fd:
 		cnt = 0
 		line = fd.readline()
 		while line:
 			cnt = cnt+1
 			if line == "EOF":
 				break
 			record = parse_line(line, spec)
 			if record is not None:
 				records.append(record)
 			line = fd.readline()
 	del spec["normalisers"]
 	return records
--- a/bin/preplots.py
+++ b/bin/preplots.py
@@ -1,15 +1,33 @@
-import legacy_fwr
+import fwr
 """
 Preplot importing functions.
 """
 def from_file (file, realpath = None):
 	"""
 	Return a list of dicts, where each dict has the structure:
 		{
 			"line_name": <int>,
 			"points": [
 				{
 					"line_name": <int>,
 					"point_number": <int>,
 					"easting": <float>,
 					"northing": <float>
 				},
 				…
 			]
 		}
 	On error, return a string describing the error condition.
 	"""
 	filepath = realpath or file["path"]
-	if not "type" in file or file["type"] == "sps":
+	records = fwr.from_file(filepath, file)
-		records = legacy_fwr.from_file(filepath, file["format"] if "format" in file else None )
+
-	else:
+	if type(records) == str:
-		return "Not an SPS file"
+		# This is an error message
 		return records
 	lines = []
 	line_names = set([r["line_name"] for r in records])