Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Custom Calcite Rule to remove redundant references #16402

Merged
merged 18 commits into from
May 14, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import org.apache.druid.annotations.EverythingIsNonnullByDefault;
import org.apache.druid.error.DruidException;
import org.apache.druid.query.aggregation.constant.LongConstantAggregator;
import org.apache.druid.query.aggregation.constant.LongConstantBufferAggregator;
import org.apache.druid.query.aggregation.constant.LongConstantVectorAggregator;
Expand Down Expand Up @@ -102,6 +103,9 @@ public GroupingAggregatorFactory(
)
{
Preconditions.checkNotNull(name, "Must have a valid, non-null aggregator name");
if (!groupings.isEmpty() && groupings.stream().distinct().count() < groupings.size()) {
throw DruidException.defensive("Encountered same dimension in more than one grouping!");
}
this.name = name;
this.groupings = groupings;
this.keyDimensions = keyDimensions;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import com.google.common.collect.Sets;
import junitparams.converters.Nullable;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.aggregation.constant.LongConstantAggregator;
import org.apache.druid.query.aggregation.constant.LongConstantBufferAggregator;
Expand Down Expand Up @@ -97,6 +98,12 @@ public void testWithKeyDimensions()
aggregator = factory.factorize(metricFactory);
Assert.assertEquals(2, aggregator.getLong());
}

@Test
public void testWithDuplicateGroupings()
{
Assert.assertThrows(DruidException.class, () -> makeFactory(new String[]{"a", "a"}, null));
}
}

public static class GroupingDimensionsTest
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,10 @@ public Aggregation toDruidAggregation(
}
}
}
AggregatorFactory factory = new GroupingAggregatorFactory(name, arguments);
AggregatorFactory factory = new GroupingAggregatorFactory(
name,
arguments.stream().distinct().collect(Collectors.toList())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wouldn't this will mask the newly added check in GroupingAggregatorFactory ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

made this a soft exception and removed this distinct masking here

);
return Aggregation.create(factory);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
import org.apache.druid.sql.calcite.rule.ReverseLookupRule;
import org.apache.druid.sql.calcite.rule.RewriteFirstValueLastValueRule;
import org.apache.druid.sql.calcite.rule.SortCollapseRule;
import org.apache.druid.sql.calcite.rule.logical.DruidAggregateRemoveRedundancyRule;
import org.apache.druid.sql.calcite.rule.logical.DruidLogicalRules;
import org.apache.druid.sql.calcite.run.EngineFeature;

Expand Down Expand Up @@ -479,6 +480,7 @@ public List<RelOptRule> baseRuleSet(final PlannerContext plannerContext)
rules.add(FilterJoinExcludePushToChildRule.FILTER_ON_JOIN_EXCLUDE_PUSH_TO_CHILD);
rules.add(SortCollapseRule.instance());
rules.add(ProjectAggregatePruneUnusedCallRule.instance());
rules.add(DruidAggregateRemoveRedundancyRule.Config.DEFAULT.toRule());

return rules.build();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.sql.calcite.rule.logical;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.RelRule;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
import org.apache.calcite.rel.core.Aggregate.Group;
import org.apache.calcite.rel.core.AggregateCall;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.rules.TransformationRule;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Util;
import org.apache.calcite.util.mapping.Mappings;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.immutables.value.Value;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
* Planner rule that recognizes a {@link Aggregate}
* on top of a {@link Project} and if possible
* aggregate through the project or removes the project.
*
* This is updated version of {@link org.apache.calcite.rel.rules.AggregateProjectMergeRule}
* to be able to handle expressions.
*/
@Value.Enclosing
public class DruidAggregateRemoveRedundancyRule
extends RelRule<DruidAggregateRemoveRedundancyRule.Config>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could possibly extend RelOptRule instead; that doesn't need this config stuff;

or optionally move that immutables generated stuff inside this class somewhere - so its co-located

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

made use of RelOptRule and got rid of the Immutable class

implements TransformationRule
{

/**
* Creates a DruidAggregateRemoveRedundancyRule.
*/
protected DruidAggregateRemoveRedundancyRule(Config config)
{
super(config);
}

@Override
public void onMatch(RelOptRuleCall call)
{
final Aggregate aggregate = call.rel(0);
final Project project = call.rel(1);
RelNode x = apply(call, aggregate, project);
if (x != null) {
call.transformTo(x);
call.getPlanner().prune(aggregate);
}
}

public static @Nullable RelNode apply(RelOptRuleCall call, Aggregate aggregate, Project project)
{
final Set<Integer> interestingFields = RelOptUtil.getAllFields(aggregate);
if (interestingFields.isEmpty()) {
return null;
}
final Map<Integer, Integer> map = new HashMap<>();
final Map<RexNode, Integer> assignedNodeForExpr = new HashMap<>();
List<RexNode> newRexNodes = new ArrayList<>();
for (int source : interestingFields) {
final RexNode rex = project.getProjects().get(source);
if (!assignedNodeForExpr.containsKey(rex)) {
RexNode newNode = new RexInputRef(source, rex.getType());
assignedNodeForExpr.put(rex, newRexNodes.size());
newRexNodes.add(newNode);
}
map.put(source, assignedNodeForExpr.get(rex));
}

if (newRexNodes.size() == project.getProjects().size()) {
return null;
}

final ImmutableBitSet newGroupSet = aggregate.getGroupSet().permute(map);
ImmutableList<ImmutableBitSet> newGroupingSets = null;
if (aggregate.getGroupType() != Group.SIMPLE) {
newGroupingSets =
ImmutableBitSet.ORDERING.immutableSortedCopy(
Sets.newTreeSet(ImmutableBitSet.permute(aggregate.getGroupSets(), map)));
}

final ImmutableList.Builder<AggregateCall> aggCalls = ImmutableList.builder();
final int sourceCount = aggregate.getInput().getRowType().getFieldCount();
final int targetCount = newRexNodes.size();
final Mappings.TargetMapping targetMapping = Mappings.target(map, sourceCount, targetCount);
for (AggregateCall aggregateCall : aggregate.getAggCallList()) {
aggCalls.add(aggregateCall.transform(targetMapping));
}

final RelBuilder relBuilder = call.builder();
relBuilder.push(project);
relBuilder.project(newRexNodes);

final Aggregate newAggregate =
aggregate.copy(aggregate.getTraitSet(), relBuilder.build(),
newGroupSet, newGroupingSets, aggCalls.build()
);
relBuilder.push(newAggregate);

final List<Integer> newKeys =
Util.transform(
aggregate.getGroupSet().asList(),
key -> Objects.requireNonNull(
map.get(key),
() -> "no value found for key " + key + " in " + map
)
);

// Add a project if the group set is not in the same order or
// contains duplicates.
if (!newKeys.equals(newGroupSet.asList())) {
final List<Integer> posList = new ArrayList<>();
for (int newKey : newKeys) {
posList.add(newGroupSet.indexOf(newKey));
}
for (int i = newAggregate.getGroupCount();
i < newAggregate.getRowType().getFieldCount(); i++) {
posList.add(i);
}
relBuilder.project(relBuilder.fields(posList));
}

return relBuilder.build();
}

/**
* Rule configuration.
*/
@Value.Immutable
public interface Config extends RelRule.Config
Fixed Show fixed Hide fixed
{
Config DEFAULT = DruidImmutableAggregateRemoveRedundancyRule.Config.of()
.withOperandFor(Aggregate.class, Project.class);

@Override
default DruidAggregateRemoveRedundancyRule toRule()
{
return new DruidAggregateRemoveRedundancyRule(this);
}

/**
* Defines an operand tree for the given classes.
*/
default Config withOperandFor(
Class<? extends Aggregate> aggregateClass,
Class<? extends Project> projectClass
)
{
return withOperandSupplier(b0 ->
b0.operand(aggregateClass).oneInput(b1 ->
b1.operand(projectClass).anyInputs())).as(
Config.class);
}
}
}
Loading
Loading