Skip to content

Extended Data Types

Extended Data Types (extended-data-types) is the core utility library for serialization, filesystem-aware workflows, transformation helpers, and typed data normalization inside larger automation systems.

Terminal window
pip install extended-data-types

PyPI Python CI


Supported runtimes

Python 3.10 through 3.14.

Public API model

Start with extended_data_types, then move to the namespaced transformation modules when you need the richer surfaces directly.

Workflow-ready design

Explicit read/decode/write boundaries and plain-data HCL support make the package predictable inside larger automation systems.


Full YAML support with custom tag handling:

from extended_data_types import encode_yaml, decode_yaml
# Encode Python objects to YAML
data = {
"name": "example",
"version": "1.0.0",
"features": ["yaml", "json", "toml"],
"config": {"debug": True, "port": 8080},
}
yaml_str = encode_yaml(data)
print(yaml_str)
# name: example
# version: 1.0.0
# features:
# - yaml
# - json
# - toml
# config:
# debug: true
# port: 8080
# Decode YAML back to Python
decoded = decode_yaml(yaml_str)
assert decoded == data

High-performance JSON via orjson:

from extended_data_types import encode_json, decode_json
data = {
"users": [
{"id": 1, "name": "Alice", "active": True},
{"id": 2, "name": "Bob", "active": False},
],
"metadata": {"total": 2, "page": 1},
}
# Fast JSON encoding
json_str = encode_json(data)
print(json_str)
# {"users":[{"id":1,"name":"Alice","active":true},{"id":2,"name":"Bob","active":false}],"metadata":{"total":2,"page":1}}
# Decode back
decoded = decode_json(json_str)
assert decoded == data

TOML handling for application metadata and configuration documents:

from extended_data_types import encode_toml, decode_toml
data = {
"package": {"name": "my-app", "version": "0.1.0"},
"dependencies": {"requests": ">=2.28.0", "pyyaml": ">=6.0"},
}
toml_str = encode_toml(data)
print(toml_str)
# [package]
# name = "my-app"
# version = "0.1.0"
#
# [dependencies]
# requests = ">=2.28.0"
# pyyaml = ">=6.0"
decoded = decode_toml(toml_str)
assert decoded == data

Terraform-style HCL for plain mappings, scalars, lists, and block lists:

from extended_data_types import decode_hcl2, encode_hcl2
terraform = {
"locals": [{"region": "us-east-1"}],
"resource": [
{
"aws_s3_bucket": {
"logs": {"bucket": "my-logs-bucket", "acl": "private"},
},
},
],
}
hcl_text = encode_hcl2(terraform)
round_tripped = decode_hcl2(hcl_text)
assert round_tripped == terraform

Encoding and decoding with optional data wrapping:

from extended_data_types import base64_encode, base64_decode
# Simple encoding
text = "Hello, World!"
encoded = base64_encode(text, wrap_raw_data=False)
print(encoded) # SGVsbG8sIFdvcmxkIQ==
decoded = base64_decode(encoded, unwrap_raw_data=False)
print(decoded) # Hello, World!
# With data wrapping (useful for structured data)
wrapped_encoded = base64_encode(text, wrap_raw_data=True)
wrapped_decoded = base64_decode(wrapped_encoded, unwrap_raw_data=True)
assert wrapped_decoded == text

The package is designed so file I/O and decoding stay explicit:

from extended_data_types import decode_file, read_file, write_file
write_file("settings.yaml", {"debug": True}, tld=".")
content = read_file("settings.yaml", tld=".")
settings = decode_file(content, file_path="settings.yaml")
  • read_file() reads bytes or text
  • decode_file() decodes already-read content
  • write_file() selects the matching export path from the output suffix

That boundary is deliberate. It keeps local files, remote payloads, and larger automation pipelines easier to reason about.


Convert between all common case styles:

from extended_data_types import (
to_camel_case,
to_pascal_case,
to_snake_case,
to_kebab_case,
)
# From snake_case
original = "user_account_settings"
print(to_camel_case(original)) # userAccountSettings
print(to_pascal_case(original)) # UserAccountSettings
print(to_kebab_case(original)) # user-account-settings
# From camelCase
camel = "userAccountSettings"
print(to_snake_case(camel)) # user_account_settings
print(to_kebab_case(camel)) # user-account-settings
# From PascalCase
pascal = "HTTPResponseCode"
print(to_snake_case(pascal)) # http_response_code
print(to_camel_case(pascal)) # httpResponseCode

Make machine-readable strings human-friendly:

from extended_data_types import humanize, titleize
examples = [
"user_id",
"createdAt",
"HTTPResponse",
"employee_salary_amount",
]
for example in examples:
print(f"{example:30} → humanize: {humanize(example)}")
print(f"{' ':30} → titleize: {titleize(example)}")
# user_id → humanize: User id
# → titleize: User Id
# createdAt → humanize: Created at
# → titleize: Created At
# HTTPResponse → humanize: Http response
# → titleize: Http Response
# employee_salary_amount → humanize: Employee salary amount
# → titleize: Employee Salary Amount

Smart English pluralization and singularization:

from extended_data_types import pluralize, singularize
# Regular and irregular plurals
words = ["cat", "child", "person", "mouse", "analysis", "octopus"]
for word in words:
print(f"{word}{pluralize(word)}")
# cat → cats
# child → children
# person → people
# mouse → mice
# analysis → analyses
# octopus → octopi
# Singularization
plural_words = ["cats", "children", "people", "mice", "analyses"]
for word in plural_words:
print(f"{word}{singularize(word)}")

Convert numbers to ordinal strings:

from extended_data_types import ordinalize
numbers = [1, 2, 3, 4, 11, 12, 13, 21, 22, 23, 100, 101]
for num in numbers:
print(f"{num}{ordinalize(num)}")
# 1 → 1st
# 2 → 2nd
# 3 → 3rd
# 4 → 4th
# 11 → 11th
# 12 → 12th
# 13 → 13th
# 21 → 21st
# 22 → 22nd
# 23 → 23rd
# 100 → 100th
# 101 → 101st

Common string operations:

from extended_data_types import (
removeprefix,
removesuffix,
truncate,
sanitize_key,
)
# Remove prefix/suffix (backport for older Python)
text = "prefix_content_suffix"
print(removeprefix(text, "prefix_")) # content_suffix
print(removesuffix(text, "_suffix")) # prefix_content
# Truncate long strings
long_text = "This is a very long string that needs to be truncated"
print(truncate(long_text, 20)) # This is a very lo...
# Sanitize for use as keys
messy_key = "User Name (Primary)"
print(sanitize_key(messy_key)) # user_name_primary

from extended_data_types import flatten_list, filter_list
# Flatten nested lists
nested = [[1, 2], [3, [4, 5]], 6]
print(flatten_list(nested)) # [1, 2, 3, 4, 5, 6]
# Filter with predicate
items = ["apple", "banana", "apricot", "cherry"]
filtered = filter_list(items, lambda x: x.startswith("a"))
print(filtered) # ["apple", "apricot"]
from extended_data_types import deep_merge, flatten_map, filter_map
# Deep merge dictionaries
dict1 = {"a": 1, "b": {"c": 2, "d": 3}}
dict2 = {"b": {"d": 4, "e": 5}, "f": 6}
merged = deep_merge(dict1, dict2)
print(merged)
# {"a": 1, "b": {"c": 2, "d": 4, "e": 5}, "f": 6}
# Flatten nested dictionaries
nested_dict = {"a": {"b": {"c": 1}}, "d": 2}
flat = flatten_map(nested_dict)
print(flat)
# {"a.b.c": 1, "d": 2}
# Filter dictionary
data = {"name": "John", "age": 30, "city": "NYC", "active": True}
strings_only = filter_map(data, lambda k, v: isinstance(v, str))
print(strings_only)
# {"name": "John", "city": "NYC"}

Helpers for checking and coalescing values:

from extended_data_types import (
is_nothing,
first_non_empty,
all_non_empty,
any_non_empty,
)
# Check if values are "nothing" (None, empty string, empty list, etc.)
print(is_nothing(None)) # True
print(is_nothing("")) # True
print(is_nothing([])) # True
print(is_nothing({})) # True
print(is_nothing("hello")) # False
print(is_nothing([1, 2, 3])) # False
# Get first non-empty value (like COALESCE in SQL)
result = first_non_empty(None, "", "fallback", "ignored")
print(result) # "fallback"
# Check if all values are non-empty
values = ["hello", "world", "!"]
print(all_non_empty(*values)) # True
with_empty = ["hello", "", "world"]
print(all_non_empty(*with_empty)) # False
# Check if any value is non-empty
mostly_empty = [None, "", "found"]
print(any_non_empty(*mostly_empty)) # True

Work with file paths and extensions:

from extended_data_types import match_file_extensions
# Check if file matches allowed extensions
file_path = "document.txt"
allowed = [".txt", ".md", ".rst"]
print(match_file_extensions(file_path, allowed)) # True
image_path = "photo.jpg"
print(match_file_extensions(image_path, allowed)) # False

FunctionDescription
encode_yaml(data)Encode Python object to YAML string
decode_yaml(yaml_str)Decode YAML string to Python object
encode_json(data)Encode Python object to JSON string (via orjson)
decode_json(json_str)Decode JSON string to Python object
encode_toml(data)Encode Python object to TOML string
decode_toml(toml_str)Decode TOML string to Python object
base64_encode(data, wrap_raw_data)Encode to Base64
base64_decode(data, unwrap_raw_data)Decode from Base64
FunctionDescription
to_camel_case(s)Convert to camelCase
to_pascal_case(s)Convert to PascalCase
to_snake_case(s)Convert to snake_case
to_kebab_case(s)Convert to kebab-case
humanize(s)Convert to human-readable form
titleize(s)Convert to Title Case
pluralize(word)Get plural form
singularize(word)Get singular form
ordinalize(n)Convert number to ordinal (1st, 2nd, 3rd)
FunctionDescription
deep_merge(base, override)Recursively merge dictionaries
flatten_map(d, separator)Flatten nested dict to single level
flatten_list(lst)Flatten nested list
filter_map(d, predicate)Filter dictionary by predicate
filter_list(lst, predicate)Filter list by predicate
FunctionDescription
is_nothing(value)Check if value is None/empty
first_non_empty(*values)Return first non-empty value
all_non_empty(*values)Check all values are non-empty
any_non_empty(*values)Check any value is non-empty