Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Capitalisation: Add camelCase #5777

Merged
merged 14 commits into from
Jun 9, 2024
14 changes: 11 additions & 3 deletions src/sqlfluff/core/rules/config_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,18 @@
"definition": "The capitalisation policy to enforce.",
},
"extended_capitalisation_policy": {
"validation": ["consistent", "upper", "lower", "pascal", "capitalise", "snake"],
"validation": [
"consistent",
"upper",
"lower",
"pascal",
"capitalise",
"snake",
"camel",
],
"definition": (
"The capitalisation policy to enforce, extended with PascalCase "
"and snake_case. "
"The capitalisation policy to enforce, extended with PascalCase, "
"snake_case, and camelCase. "
"This is separate from ``capitalisation_policy`` as it should not be "
Comment on lines 72 to 75
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the precedence rules here aren't very easy to guess. I think from running through the examples below, that I understand the logic - but I think we should document it here.

If nothing else - someone will ask on the slack channel and I'd like to be able to point them to something to explain why the default config is acting the way that it is.

"applied to keywords."
),
Expand Down
29 changes: 17 additions & 12 deletions src/sqlfluff/rules/capitalisation/CP01.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,17 +136,20 @@ def _handle_segment(self, segment: BaseSegment, context: RuleContext) -> LintRes
refuted_cases = memory.get("refuted_cases", set())

# Which cases are definitely inconsistent with the segment?
first_letter_is_lowercase = False
for character in segment.raw:
if is_capitalizable(character):
first_letter_is_lowercase = character != character.upper()
break
# If none of the characters are letters there will be a parsing
# error, so not sure we need this statement
first_letter_is_lowercase = False

# We refute inference of camel, pascal, and snake case.
# snake, if not explicitly set, can be destructive to
# variable names, adding underscores.
# camel and Pascal could allow poorly linted code in,
# so must be explicitly chosen.
refuted_cases.update(["camel", "pascal", "snake"])
if first_letter_is_lowercase:
# snake added here as it cannot be inferred (presents as lower)
refuted_cases.update(["upper", "capitalise", "pascal", "snake"])
refuted_cases.update(["upper", "capitalise"])
if segment.raw != segment.raw.lower():
refuted_cases.update(["lower"])
else:
Expand All @@ -155,8 +158,6 @@ def _handle_segment(self, segment: BaseSegment, context: RuleContext) -> LintRes
refuted_cases.update(["upper"])
if segment.raw != segment.raw.capitalize():
refuted_cases.update(["capitalise"])
if not segment.raw.isalnum():
refuted_cases.update(["pascal", "snake"])

# Update the memory
memory["refuted_cases"] = refuted_cases
Expand Down Expand Up @@ -219,6 +220,14 @@ def _handle_segment(self, segment: BaseSegment, context: RuleContext) -> LintRes
lambda match: match.group(1) + match.group(2).upper() + match.group(3),
segment.raw,
)
elif concrete_policy == "camel":
# Similar to Pascal, for Camel, we can only do a best efforts approach.
# This presents as us never changing case mid-string.
fixed_raw = regex.sub(
"([^a-zA-Z0-9]+|^)([a-zA-Z0-9])([a-zA-Z0-9]*)",
lambda match: match.group(1) + match.group(2).lower() + match.group(3),
segment.raw,
)
elif concrete_policy == "snake":
if segment.raw.isupper():
fixed_raw = segment.raw.lower()
Expand All @@ -240,14 +249,10 @@ def _handle_segment(self, segment: BaseSegment, context: RuleContext) -> LintRes
# build description based on the policy in use
consistency = "consistently " if cap_policy == "consistent" else ""

if concrete_policy in ["upper", "lower"]:
if concrete_policy in ["upper", "lower", "pascal", "camel", "snake"]:
policy = f"{concrete_policy} case."
elif concrete_policy == "capitalise":
policy = "capitalised."
elif concrete_policy == "pascal":
policy = "pascal case."
elif concrete_policy == "snake":
policy = "snake case."

# Return the fixed segment
self.logger.debug(
Expand Down
78 changes: 69 additions & 9 deletions test/fixtures/rules/std_rule_cases/CP02.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,18 @@ test_pass_consistent_capitalisation_with_single_letter_upper:
pass_str: SELECT A, Boo

test_pass_consistent_capitalisation_with_single_word_snake:
# Single-word ambiguity: Pascal vs Capitalise
pass_str: SELECT Apple, Banana_split
# Snake is refuted as ambiguous
pass_str: SELECT apple, banana_split

test_pass_consistent_capitalisation_with_single_word_pascal:
test_fail_consistent_capitalisation_with_single_word_pascal:
# Single-word ambiguity: Pascal vs Capitalise
pass_str: SELECT AppleFritter, Banana
fail_str: SELECT AppleFritter, Banana
fix_str: SELECT APPLEFRITTER, BANANA

test_pass_consistent_capitalisation_with_multiple_words_with_numbers:
test_fail_consistent_capitalisation_with_multiple_words_with_numbers:
# Numbers count as part of words so following letter can be upper or lower
pass_str: SELECT AppleFritter, Apple123fritter, Apple123Fritter
fail_str: SELECT AppleFritter, Apple123fritter, Apple123Fritter
fix_str: SELECT APPLEFRITTER, APPLE123FRITTER, APPLE123FRITTER

test_pass_consistent_capitalisation_with_leading_underscore:
pass_str: SELECT _a, b
Expand Down Expand Up @@ -133,6 +135,36 @@ test_fail_inconsistent_capitalisation_pascal_v_capitalise:
# Pascal vs Capitalise
fail_str: SELECT AppleFritter, Banana_split
fix_str: SELECT AppleFritter, Banana_Split
configs:
rules:
capitalisation.identifiers:
extended_capitalisation_policy: pascal

test_fail_inconsistent_capitalisation_policy_camel_1:
fail_str: SELECT Camelcase
fix_str: SELECT camelcase
configs:
rules:
capitalisation.identifiers:
extended_capitalisation_policy: camel

test_fail_inconsistent_capitalisation_policy_camel_2:
fail_str: SELECT Camel_Case
fix_str: SELECT camel_case
configs:
rules:
capitalisation.identifiers:
extended_capitalisation_policy: camel

test_fail_inconsistent_capitalisation_policy_camel_3:
# Similar to above, you could argue the fixed string is
# Not really Pascal Case, but it's closer than it was!
fail_str: SELECT cAMEL_CASE
fix_str: SELECT cAMEL_cASE
configs:
rules:
capitalisation.identifiers:
extended_capitalisation_policy: camel

test_pass_policy_unquoted_identifiers_aliases_1:
pass_str: SELECT a, B
Expand Down Expand Up @@ -167,7 +199,7 @@ test_pass_policy_unquoted_identifiers_aliases_4:

test_policy_unquoted_identifiers_aliases_5:
fail_str: SELECT UPPER_CASE AS PascalCase, PascalCase AS lower_case
fix_str: SELECT UPPER_CASE AS PascalCase, PascalCase AS Lower_Case
fix_str: SELECT UPPER_CASE AS PASCALCASE, PascalCase AS LOWER_CASE
configs:
rules:
capitalisation.identifiers:
Expand All @@ -192,7 +224,7 @@ test_policy_unquoted_identifiers_aliases_7:

test_policy_unquoted_identifiers_aliases_8:
fail_str: SELECT UPPER_CASE AS PascalCase, PascalCase AS lower_case FROM lower_case AS lower_case
fix_str: SELECT UPPER_CASE AS PascalCase, PascalCase AS Lower_Case FROM lower_case AS Lower_Case
fix_str: SELECT UPPER_CASE AS PASCALCASE, PascalCase AS LOWER_CASE FROM lower_case AS LOWER_CASE
configs:
rules:
capitalisation.identifiers:
Expand All @@ -207,7 +239,7 @@ test_policy_unquoted_identifiers_column_aliases_1:

test_policy_unquoted_identifiers_aliases_2:
fail_str: SELECT UPPER_CASE AS PascalCase, PascalCase AS lower_case FROM lower_case AS lower_case
fix_str: SELECT UPPER_CASE AS PascalCase, PascalCase AS Lower_Case FROM lower_case AS lower_case
fix_str: SELECT UPPER_CASE AS PASCALCASE, PascalCase AS LOWER_CASE FROM lower_case AS lower_case
configs:
rules:
capitalisation.identifiers:
Expand Down Expand Up @@ -281,3 +313,31 @@ test_fail_snake_aliases:
rules:
capitalisation.identifiers:
extended_capitalisation_policy: snake

test_fail_camel_aliases:
# Test for issue #5470
# similar to PascalCase, case logic defined in CP01, but tested in CP02
fail_str: |
SELECT
test1,
test_2,
testColumn3,
TestColumn4,
TESTCOLUMN5,
TEST_COLUMN6,
test_colUmn_7
fix_str: |
SELECT
test1,
test_2,
testColumn3,
testColumn4,
tESTCOLUMN5,
tEST_cOLUMN6,
test_colUmn_7
configs:
core:
dialect: tsql
rules:
capitalisation.identifiers:
extended_capitalisation_policy: camel
12 changes: 9 additions & 3 deletions test/fixtures/rules/std_rule_cases/CP05.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,20 @@ test_pass_default_consistent_capitalised:
ts Time With Time Zone
);

test_pass_default_consistent_pascal:
# Test that we don't have the "inconsistent" bug
pass_str: |
test_fail_default_consistent_pascal:
# Attempting Pascal without config defaults to capitalize
fail_str: |
CREATE TABLE distributors (
did Integer,
name VarChar(40),
ts Time With Time Zone
);
fix_str: |
CREATE TABLE distributors (
did Integer,
name Varchar(40),
ts Time With Time Zone
);

test_fail_data_type_inconsistent_capitalisation_1:
# Test that we don't have the "inconsistent" bug
Expand Down