Source code for mcp_ohmy_sql.db.aws_redshift.schema_2_encoder

# -*- coding: utf-8 -*-

"""
This module provides utilities for mapping AWS Redshift schema models to simplified
type representations suitable for LLM consumption.
"""

from ...constants import LLMColumnConstraintEnum

from .tpl import TemplateEnum
from .schema_1_model import (
    ColumnInfo,
    TableInfo,
    SchemaInfo,
    DatabaseInfo,
)


[docs] def encode_column_info( column_info: ColumnInfo, ) -> str: """ Encode an AWS Redshift column into LLM-friendly compact format. Transforms verbose column metadata into a concise string representation optimized for Large Language Model consumption in text-to-SQL tasks. Format: ${COLUMN_NAME}:${DATA_TYPE}${DISTRIBUTION_KEY}${SORT_KEY}${NOT_NULL}${ENCODING} Redshift-specific constraints are encoded as: - ``*DK``: Distribution Key (for data distribution across nodes) - ``*SK-N``: Sort Key with position N (for query optimization) - ``*NN``: Not Null constraint - ``*encoding``: Compression encoding (lzo, delta, etc.) :param column_info: Column metadata with Redshift-specific properties :returns: Compact column representation string Examples: - Distribution key: ``user_id:str*DK*NN*lzo`` - Sort key: ``create_time:dt*SK-1*NN*delta`` - Regular column: ``description:str*lzo`` """ col_name = column_info.name col_type = column_info.llm_type.value if column_info.llm_type else column_info.type dk = f"*{LLMColumnConstraintEnum.DK.value}" if column_info.dist_key else "" sk = ( f"*{LLMColumnConstraintEnum.SK.value}-{column_info.sort_key_position}" if column_info.sort_key_position else "" ) nn = f"*{LLMColumnConstraintEnum.NN.value}" if column_info.notnull else "" encoding = f"*{column_info.encoding}" if column_info.encoding else "" text = f"{col_name}:{col_type}{dk}{sk}{nn}{encoding}" return text
[docs] def encode_table_info( table_info: TableInfo, ) -> str: """ Encode an AWS Redshift table into LLM-friendly compact format. Format:: Table TableName DistributionStyle Distribution Style ( encoded_column_info_1, encoded_column_info_2, ... ) Redshift-specific features: - **Distribution Style**: Shows how data is distributed (KEY, EVEN, ALL) - **Distribution Keys**: Indicates which columns control data distribution - **Sort Keys**: Shows column ordering for query optimization - **Compression**: Displays encoding for each column :param table_info: Table metadata with Redshift-specific properties :returns: Compact table representation string Example:: Table users KEY Distribution Style ( user_id:str*DK*NN*lzo, create_time:dt*SK-1*NN*delta, description:str*lzo, ) """ table_type_name = table_info.object_type.table_type.value table_name = table_info.name dist_style = table_info.dist_style columns = list() for column_info in table_info.columns: column = encode_column_info(column_info) columns.append(column) text = TemplateEnum.table_info.render( table_type_name=table_type_name, table_name=table_name, dist_style=dist_style, columns=columns, ) return text
[docs] def encode_schema_info( schema_info: SchemaInfo, ) -> str: """ Encode an AWS Redshift schema into LLM-friendly compact format. Format:: Schema SchemaName ( encoded_table_info_1, encoded_table_info_2, ..., ) Key benefits for LLM consumption: - **Redshift Optimization**: Highlights distribution and sort keys for query performance understanding - **Compression Visibility**: Shows encoding schemes for storage optimization - **Token Efficiency**: Compact format reduces token usage while preserving Redshift-specific metadata - **Performance Hints**: Distribution and sort key information helps LLMs generate optimized queries :param schema_info: Schema metadata containing Redshift tables :returns: Compact schema representation string Example:: Schema public ( Table users KEY Distribution Style ( user_id:str*DK*NN*lzo, create_time:dt*SK-1*NN*delta, description:str*lzo, ), Table orders EVEN Distribution Style ( order_id:int*PK*NN*delta, user_id:str*NN*FK->users.user_id*lzo, order_date:dt*SK-1*NN*delta, ), ) """ tables = list() for table_info in schema_info.tables: table = encode_table_info(table_info) tables.append(table) text = TemplateEnum.schema_info.render( schema_name=schema_info.name, schema_description=f":'{schema_info.comment}'" if schema_info.comment else "", tables=tables, ) return text
[docs] def encode_database_info( database_info: DatabaseInfo, ) -> str: """ Encode an AWS Redshift database into LLM-friendly compact format. Format:: aws_redshift Database DatabaseName ( Schema SchemaName ( encoded_table_info_1, encoded_table_info_2, ..., ), ... ) Redshift-specific considerations: - **Cluster Architecture**: Represents the distributed nature of Redshift - **Performance Metadata**: Includes distribution and sort key information critical for query optimization - **Compression Details**: Encoding information for storage efficiency - **Multi-Schema Support**: Handles multiple schemas within a cluster :param database_info: Database metadata with Redshift-specific schemas :returns: Compact database representation string Example:: aws_redshift Database mcp_ohmy_sql_dev ( Schema public ( Table users KEY Distribution Style ( user_id:str*DK*NN*lzo, create_time:dt*SK-1*NN*delta, ), ), Schema analytics ( Table daily_metrics EVEN Distribution Style ( metric_date:dt*SK-1*NN*delta, metric_value:dec*NN*delta, ), ), ) """ schemas = list() for schema_info in database_info.schemas: schema = encode_schema_info(schema_info) schemas.append(schema) text = TemplateEnum.database_info.render( database_type=database_info.db_type.value, database_name=database_info.name, database_description=f":'{database_info.comment}'" if database_info.comment else "", schemas=schemas, ) return text