Building SELECT lists with SQL
If you are using SQL for ingestion and you want to rename columns
PascalCase to snake_case
-- PascalCase to snake_case
-- , OtherNFLStats AS other_nfl_stats
SELECT
CONCAT(', ', column_name, ' AS ', LOWER(
REGEXP_REPLACE(
REGEXP_REPLACE(column_name, '([a-z0-9])([A-Z])', '$1_$2'), -- Pass 1: Lower to Upper
'([A-Z])([A-Z][a-z])', '$1_$2' -- Pass 2: Acronym to Word
)
)) AS formatted_column
FROM information_schema.columns
WHERE table_name = 'my_table'
AND table_schema = 'my_schema'
ORDER BY ordinal_position;
PascalCase to snake_case with substitution
-- PascalCase to snake_case
-- , OverageAmount AS overage_amt
WITH RawColumns AS (
SELECT column_name
, LOWER(
REGEXP_REPLACE(
REGEXP_REPLACE(column_name, '([a-z0-9])([A-Z])', '$1_$2')
, '([A-Z])([A-Z][a-z])', '$1_$2'
)
) AS snake_name
FROM information_schema.columns
WHERE table_name = 'my_table'
AND table_schema = 'my_schema'
)
SELECT CONCAT(', ', column_name, ' AS ',
AGGREGATE( -- AGGREGATE is Spark-specific (thus applicable to databricks)
-- Define the list of [pattern, replacement]
ARRAY(
STRUCT('_amount$' AS pat, '_amt' AS rep)
, STRUCT('_percent$' AS pat, '_pct' AS rep)
, STRUCT('_flag$' AS pat, '_flg' AS rep)
, STRUCT('_code$' AS pat, '_cd' AS rep)
, STRUCT('_number$' AS pat, '_nbr' AS rep)
, STRUCT('_name$' AS pat, '_nm' AS rep)
, STRUCT('_description$' AS pat, '_dsc' AS rep)
, STRUCT('_desc$' AS pat, '_dsc' AS rep)
, STRUCT('_date$' AS pat, '_dts' AS rep)
)
, snake_name -- Initial value
, (acc, x) -> REGEXP_REPLACE(acc, x.pat, x.rep) -- Iterative function
)
) AS formatted_column
FROM RawColumns
ORDER BY column_name;