dougal-software/bin/fwr.py

#!/usr/bin/python3

"""
Fixed width record importing functions.
"""

import builtins

def to_bool (v):
	try:
		return bool(int(v))
	except ValueError:
		if type(v) == str:
			return v.strip().lower().startswith("t")
		return False

transform = {
	"int": lambda v: builtins.int(float(v)),
	"float": float,
	"string": str,
	"str": str,
	"bool": to_bool
}

def parse_line (line, fields, fixed = None):
	# print("parse_line", line, fields, fixed)
	data = dict()

	if fixed:
		for value in fixed:
			start = value["offset"]
			end = start + len(value["text"])
			text = line[start:end]
			if text != value["text"]:
				return f"Expected text `{value['text']}` at position {start} but found `{text}` instead."

	for key in fields:
		spec = fields[key]
		transformer = transform[spec.get("type", "str")]
		pos_from = spec["offset"]
		pos_to = pos_from + spec["length"]
		text = line[pos_from:pos_to]
		value = transformer(text)
		if "enum" in spec:
			if "default" in spec:
				value = spec["default"]
			for enum_key in spec["enum"]:
				if enum_key == text:
					enum_value = transformer(spec["enum"][enum_key])
					value = enum_value
					break

		data[key] = value

	# print("parse_line data =", data)
	return data


specfields = {
	"sps1": {
		"line_name": { "offset": 1, "length": 16, "type": "int" },
		"point_number": { "offset": 17, "length": 8, "type": "int" },
		"easting": { "offset": 46, "length": 9, "type": "float" },
		"northing": { "offset": 55, "length": 10, "type": "float" }
	},
	"sps21": {
		"line_name": { "offset": 1, "length": 7, "type": "int" },
		"point_number": { "offset": 11, "length": 7, "type": "int" },
		"easting": { "offset": 46, "length": 9, "type": "float" },
		"northing": { "offset": 55, "length": 10, "type": "float" }
	},
	"p190": {
		"line_name": { "offset": 1, "length": 12, "type": "int" },
		"point_number": { "offset": 19, "length": 6, "type": "int" },
		"easting": { "offset": 46, "length": 9, "type": "float" },
		"northing": { "offset": 55, "length": 9, "type": "float" }
	},
}

def from_file(path, spec):

	# If spec.fields is not present, deduce it from spec.type ("sps1", "sps21", "p190", etc.)
	if "fields" in spec:
		fields = spec["fields"]
	elif "type" in spec and spec["type"] in specfields:
		fields = specfields[spec["type"]]
	else:
		# TODO: Should default to looking for spec.format and doing a legacy import on it
		return "Neither 'type' nor 'fields' given. I don't know how to import this fixed-width dataset."

	firstRow = spec.get("firstRow", 0)

	skipStart = [] # Skip lines starting with any of these values
	skipMatch = [] # Skip lines matching any of these values

	if "type" in spec:
		if spec["type"] == "sps1" or spec["type"] == "sps21" or spec["type"] == "p190":
			skipStart = "H"
			skipMatch = "EOF"

	records = []
	with open(path, "r", errors="ignore") as fd:
		row = 0
		line = fd.readline()

		while line:
			skip = False

			if row < firstRow:
				skip = True

			if not skip:
				for v in skipStart:
					if line.startswith(v):
						skip = True
						break
				for v in skipMatch:
					if line == v:
						skip = True
						break

			if not skip:
				records.append(parse_line(line, fields))

			row += 1
			line = fd.readline()

	return records
Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`#!/usr/bin/python3`

			`"""`
			`Fixed width record importing functions.`
			`"""`

			`import builtins`

Fix bool casting. A true value is any text that starts with `t` (case insensitive) or any non-zero integer. 2024-05-03 11:40:53 +02:00			`def to_bool (v):`
			`try:`
			`return bool(int(v))`
			`except ValueError:`
			`if type(v) == str:`
			`return v.strip().lower().startswith("t")`
			`return False`

Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`transform = {`
			`"int": lambda v: builtins.int(float(v)),`
			`"float": float,`
			`"string": str,`
Allow also `str` as a field type specifier 2024-08-22 18:43:57 +02:00			`"str": str,`
Fix bool casting. A true value is any text that starts with `t` (case insensitive) or any non-zero integer. 2024-05-03 11:40:53 +02:00			`"bool": to_bool`
Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`}`

Add fixed strings support to parse_line 2024-05-05 19:34:01 +02:00			`def parse_line (line, fields, fixed = None):`
Comment out debug output 2025-08-07 10:52:13 +02:00			`# print("parse_line", line, fields, fixed)`
Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`data = dict()`

Add fixed strings support to parse_line 2024-05-05 19:34:01 +02:00			`if fixed:`
			`for value in fixed:`
			`start = value["offset"]`
			`end = start + len(value["text"])`
			`text = line[start:end]`
			`if text != value["text"]:`
			return f"Expected text `{value['text']}` at position {start} but found `{text}` instead."

Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`for key in fields:`
			`spec = fields[key]`
Default to text if no field type is specified. 2024-08-22 18:44:24 +02:00			`transformer = transform[spec.get("type", "str")]`
Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`pos_from = spec["offset"]`
			`pos_to = pos_from + spec["length"]`
			`text = line[pos_from:pos_to]`
			`value = transformer(text)`
			`if "enum" in spec:`
			`if "default" in spec:`
			`value = spec["default"]`
			`for enum_key in spec["enum"]:`
Check enum keys against text instead of cast value 2024-05-03 11:40:21 +02:00			`if enum_key == text:`
Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`enum_value = transformer(spec["enum"][enum_key])`
			`value = enum_value`
			`break`

			`data[key] = value`

Comment out debug output 2025-08-07 10:52:13 +02:00			`# print("parse_line data =", data)`
Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`return data`


			`specfields = {`
			`"sps1": {`
			`"line_name": { "offset": 1, "length": 16, "type": "int" },`
			`"point_number": { "offset": 17, "length": 8, "type": "int" },`
			`"easting": { "offset": 46, "length": 9, "type": "float" },`
			`"northing": { "offset": 55, "length": 10, "type": "float" }`
			`},`
			`"sps21": {`
			`"line_name": { "offset": 1, "length": 7, "type": "int" },`
			`"point_number": { "offset": 11, "length": 7, "type": "int" },`
			`"easting": { "offset": 46, "length": 9, "type": "float" },`
			`"northing": { "offset": 55, "length": 10, "type": "float" }`
			`},`
			`"p190": {`
			`"line_name": { "offset": 1, "length": 12, "type": "int" },`
			`"point_number": { "offset": 19, "length": 6, "type": "int" },`
			`"easting": { "offset": 46, "length": 9, "type": "float" },`
			`"northing": { "offset": 55, "length": 9, "type": "float" }`
			`},`
			`}`

			`def from_file(path, spec):`

			`# If spec.fields is not present, deduce it from spec.type ("sps1", "sps21", "p190", etc.)`
			`if "fields" in spec:`
			`fields = spec["fields"]`
			`elif "type" in spec and spec["type"] in specfields:`
			`fields = specfields[spec["type"]]`
			`else:`
Add TODO comment 2024-05-03 11:41:50 +02:00			`# TODO: Should default to looking for spec.format and doing a legacy import on it`
Support import of various fixed-width formats. This supports reading: SPSv1, SPSv2.1, P190 and custom fixed-width formats. Supports skipping lines by startswith() matching or by complete match (e.g., "EOF"). Closes #300 (SPS v1) Closes #301 (SPS v2.1) Closes #302 (P1/90) 2024-04-30 19:05:54 +02:00			`return "Neither 'type' nor 'fields' given. I don't know how to import this fixed-width dataset."`

			`firstRow = spec.get("firstRow", 0)`

			`skipStart = [] # Skip lines starting with any of these values`
			`skipMatch = [] # Skip lines matching any of these values`

			`if "type" in spec:`
			`if spec["type"] == "sps1" or spec["type"] == "sps21" or spec["type"] == "p190":`
			`skipStart = "H"`
			`skipMatch = "EOF"`

			`records = []`
			`with open(path, "r", errors="ignore") as fd:`
			`row = 0`
			`line = fd.readline()`

			`while line:`
			`skip = False`

			`if row < firstRow:`
			`skip = True`

			`if not skip:`
			`for v in skipStart:`
			`if line.startswith(v):`
			`skip = True`
			`break`
			`for v in skipMatch:`
			`if line == v:`
			`skip = True`
			`break`

			`if not skip:`
			`records.append(parse_line(line, fields))`

			`row += 1`
			`line = fd.readline()`

			`return records`