Supported runtimes
Python 3.10 through 3.14.
Extended Data Types (extended-data-types) is the core utility library for
serialization, filesystem-aware workflows, transformation helpers, and typed
data normalization inside larger automation systems.
pip install extended-data-typesSupported runtimes
Python 3.10 through 3.14.
Public API model
Start with extended_data_types, then move to the namespaced
transformation modules when you need the richer surfaces directly.
Workflow-ready design
Explicit read/decode/write boundaries and plain-data HCL support make the package predictable inside larger automation systems.
Full YAML support with custom tag handling:
from extended_data_types import encode_yaml, decode_yaml
# Encode Python objects to YAMLdata = { "name": "example", "version": "1.0.0", "features": ["yaml", "json", "toml"], "config": {"debug": True, "port": 8080},}
yaml_str = encode_yaml(data)print(yaml_str)# name: example# version: 1.0.0# features:# - yaml# - json# - toml# config:# debug: true# port: 8080
# Decode YAML back to Pythondecoded = decode_yaml(yaml_str)assert decoded == dataHigh-performance JSON via orjson:
from extended_data_types import encode_json, decode_json
data = { "users": [ {"id": 1, "name": "Alice", "active": True}, {"id": 2, "name": "Bob", "active": False}, ], "metadata": {"total": 2, "page": 1},}
# Fast JSON encodingjson_str = encode_json(data)print(json_str)# {"users":[{"id":1,"name":"Alice","active":true},{"id":2,"name":"Bob","active":false}],"metadata":{"total":2,"page":1}}
# Decode backdecoded = decode_json(json_str)assert decoded == dataTOML handling for application metadata and configuration documents:
from extended_data_types import encode_toml, decode_toml
data = { "package": {"name": "my-app", "version": "0.1.0"}, "dependencies": {"requests": ">=2.28.0", "pyyaml": ">=6.0"},}
toml_str = encode_toml(data)print(toml_str)# [package]# name = "my-app"# version = "0.1.0"## [dependencies]# requests = ">=2.28.0"# pyyaml = ">=6.0"
decoded = decode_toml(toml_str)assert decoded == dataTerraform-style HCL for plain mappings, scalars, lists, and block lists:
from extended_data_types import decode_hcl2, encode_hcl2
terraform = { "locals": [{"region": "us-east-1"}], "resource": [ { "aws_s3_bucket": { "logs": {"bucket": "my-logs-bucket", "acl": "private"}, }, }, ],}
hcl_text = encode_hcl2(terraform)round_tripped = decode_hcl2(hcl_text)
assert round_tripped == terraformEncoding and decoding with optional data wrapping:
from extended_data_types import base64_encode, base64_decode
# Simple encodingtext = "Hello, World!"encoded = base64_encode(text, wrap_raw_data=False)print(encoded) # SGVsbG8sIFdvcmxkIQ==
decoded = base64_decode(encoded, unwrap_raw_data=False)print(decoded) # Hello, World!
# With data wrapping (useful for structured data)wrapped_encoded = base64_encode(text, wrap_raw_data=True)wrapped_decoded = base64_decode(wrapped_encoded, unwrap_raw_data=True)assert wrapped_decoded == textThe package is designed so file I/O and decoding stay explicit:
from extended_data_types import decode_file, read_file, write_file
write_file("settings.yaml", {"debug": True}, tld=".")content = read_file("settings.yaml", tld=".")settings = decode_file(content, file_path="settings.yaml")read_file() reads bytes or textdecode_file() decodes already-read contentwrite_file() selects the matching export path from the output suffixThat boundary is deliberate. It keeps local files, remote payloads, and larger automation pipelines easier to reason about.
Convert between all common case styles:
from extended_data_types import ( to_camel_case, to_pascal_case, to_snake_case, to_kebab_case,)
# From snake_caseoriginal = "user_account_settings"print(to_camel_case(original)) # userAccountSettingsprint(to_pascal_case(original)) # UserAccountSettingsprint(to_kebab_case(original)) # user-account-settings
# From camelCasecamel = "userAccountSettings"print(to_snake_case(camel)) # user_account_settingsprint(to_kebab_case(camel)) # user-account-settings
# From PascalCasepascal = "HTTPResponseCode"print(to_snake_case(pascal)) # http_response_codeprint(to_camel_case(pascal)) # httpResponseCodeMake machine-readable strings human-friendly:
from extended_data_types import humanize, titleize
examples = [ "user_id", "createdAt", "HTTPResponse", "employee_salary_amount",]
for example in examples: print(f"{example:30} → humanize: {humanize(example)}") print(f"{' ':30} → titleize: {titleize(example)}")
# user_id → humanize: User id# → titleize: User Id# createdAt → humanize: Created at# → titleize: Created At# HTTPResponse → humanize: Http response# → titleize: Http Response# employee_salary_amount → humanize: Employee salary amount# → titleize: Employee Salary AmountSmart English pluralization and singularization:
from extended_data_types import pluralize, singularize
# Regular and irregular pluralswords = ["cat", "child", "person", "mouse", "analysis", "octopus"]for word in words: print(f"{word} → {pluralize(word)}")
# cat → cats# child → children# person → people# mouse → mice# analysis → analyses# octopus → octopi
# Singularizationplural_words = ["cats", "children", "people", "mice", "analyses"]for word in plural_words: print(f"{word} → {singularize(word)}")Convert numbers to ordinal strings:
from extended_data_types import ordinalize
numbers = [1, 2, 3, 4, 11, 12, 13, 21, 22, 23, 100, 101]for num in numbers: print(f"{num} → {ordinalize(num)}")
# 1 → 1st# 2 → 2nd# 3 → 3rd# 4 → 4th# 11 → 11th# 12 → 12th# 13 → 13th# 21 → 21st# 22 → 22nd# 23 → 23rd# 100 → 100th# 101 → 101stCommon string operations:
from extended_data_types import ( removeprefix, removesuffix, truncate, sanitize_key,)
# Remove prefix/suffix (backport for older Python)text = "prefix_content_suffix"print(removeprefix(text, "prefix_")) # content_suffixprint(removesuffix(text, "_suffix")) # prefix_content
# Truncate long stringslong_text = "This is a very long string that needs to be truncated"print(truncate(long_text, 20)) # This is a very lo...
# Sanitize for use as keysmessy_key = "User Name (Primary)"print(sanitize_key(messy_key)) # user_name_primaryfrom extended_data_types import flatten_list, filter_list
# Flatten nested listsnested = [[1, 2], [3, [4, 5]], 6]print(flatten_list(nested)) # [1, 2, 3, 4, 5, 6]
# Filter with predicateitems = ["apple", "banana", "apricot", "cherry"]filtered = filter_list(items, lambda x: x.startswith("a"))print(filtered) # ["apple", "apricot"]from extended_data_types import deep_merge, flatten_map, filter_map
# Deep merge dictionariesdict1 = {"a": 1, "b": {"c": 2, "d": 3}}dict2 = {"b": {"d": 4, "e": 5}, "f": 6}merged = deep_merge(dict1, dict2)print(merged)# {"a": 1, "b": {"c": 2, "d": 4, "e": 5}, "f": 6}
# Flatten nested dictionariesnested_dict = {"a": {"b": {"c": 1}}, "d": 2}flat = flatten_map(nested_dict)print(flat)# {"a.b.c": 1, "d": 2}
# Filter dictionarydata = {"name": "John", "age": 30, "city": "NYC", "active": True}strings_only = filter_map(data, lambda k, v: isinstance(v, str))print(strings_only)# {"name": "John", "city": "NYC"}Helpers for checking and coalescing values:
from extended_data_types import ( is_nothing, first_non_empty, all_non_empty, any_non_empty,)
# Check if values are "nothing" (None, empty string, empty list, etc.)print(is_nothing(None)) # Trueprint(is_nothing("")) # Trueprint(is_nothing([])) # Trueprint(is_nothing({})) # Trueprint(is_nothing("hello")) # Falseprint(is_nothing([1, 2, 3])) # False
# Get first non-empty value (like COALESCE in SQL)result = first_non_empty(None, "", "fallback", "ignored")print(result) # "fallback"
# Check if all values are non-emptyvalues = ["hello", "world", "!"]print(all_non_empty(*values)) # True
with_empty = ["hello", "", "world"]print(all_non_empty(*with_empty)) # False
# Check if any value is non-emptymostly_empty = [None, "", "found"]print(any_non_empty(*mostly_empty)) # TrueWork with file paths and extensions:
from extended_data_types import match_file_extensions
# Check if file matches allowed extensionsfile_path = "document.txt"allowed = [".txt", ".md", ".rst"]print(match_file_extensions(file_path, allowed)) # True
image_path = "photo.jpg"print(match_file_extensions(image_path, allowed)) # False| Function | Description |
|---|---|
encode_yaml(data) | Encode Python object to YAML string |
decode_yaml(yaml_str) | Decode YAML string to Python object |
encode_json(data) | Encode Python object to JSON string (via orjson) |
decode_json(json_str) | Decode JSON string to Python object |
encode_toml(data) | Encode Python object to TOML string |
decode_toml(toml_str) | Decode TOML string to Python object |
base64_encode(data, wrap_raw_data) | Encode to Base64 |
base64_decode(data, unwrap_raw_data) | Decode from Base64 |
| Function | Description |
|---|---|
to_camel_case(s) | Convert to camelCase |
to_pascal_case(s) | Convert to PascalCase |
to_snake_case(s) | Convert to snake_case |
to_kebab_case(s) | Convert to kebab-case |
humanize(s) | Convert to human-readable form |
titleize(s) | Convert to Title Case |
pluralize(word) | Get plural form |
singularize(word) | Get singular form |
ordinalize(n) | Convert number to ordinal (1st, 2nd, 3rd) |
| Function | Description |
|---|---|
deep_merge(base, override) | Recursively merge dictionaries |
flatten_map(d, separator) | Flatten nested dict to single level |
flatten_list(lst) | Flatten nested list |
filter_map(d, predicate) | Filter dictionary by predicate |
filter_list(lst, predicate) | Filter list by predicate |
| Function | Description |
|---|---|
is_nothing(value) | Check if value is None/empty |
first_non_empty(*values) | Return first non-empty value |
all_non_empty(*values) | Check all values are non-empty |
any_non_empty(*values) | Check any value is non-empty |