1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
|
package pl.pk.dbtuner.query.optimizer.dynamic
import pl.pk.dbtuner.query.algebra.scalar.Predicate
import pl.pk.dbtuner.util.SetUtil._
import pl.pk.dbtuner.util.Skyline
import scala.math.max
/**
* Rule telling, that some tables can be joined.
* Implementations may contain some additional data like join predicates, to actually
* perform the joining. However those details are not used by the optimizer.
*/
trait AbstractJoin[Table] {
val tables: Set[Table]
}
/**
* An abstract query plan tree providing data required for the optimizer.
*/
trait AbstractTree[Table, Predicate] {
/** Tables used by this tree */
val tables: Set[Table]
/**
* Join predicates used in this tree.
* If two trees have different predicates, they are considered different.
*/
val predicates: Set[Predicate]
/**
* Lower bound cost estimate for this node.
* Usually the cost of returning the first tuple.
*/
val minCost: Float
/**
* Upper bound cost estimate for this node. Usually the cost of
* returning all of the tuples.
*/
val maxCost: Float
/** Hidden cost is a possible cost that using this node can cause for its parents. May be negative. */
val hiddenCost: Float
/** Number of rows that the root of this tree is going to return */
val rows: Float
}
/**
* Date: 2010-01-31
* Time: 16:33:44
*
* Implements a query plan generator based on dynamic programming approach (Selinger, 1979)
*/
trait AbstractJoinOptimizer[Table] {
protected type Order
protected type Tree <: AbstractTree[Table, Predicate]
protected type Join <: AbstractJoin[Table]
/** Plan leaves */
protected val leaves: List[Tree]
/** AbstractJoin specifications telling which node can be joined with which one */
protected val joins: List[Join]
def levels = leaves.groupBy(t => t.tables).size
def optimize: List[Tree] = forest(levels)(prune _).last
def optimizeMax: List[Tree] = forest(levels)(pruneMax _).last
/**
* Generates a forest of trees:
* starting from the one-table plans and finishing at n-table joins.
*/
private def forest(n: Int)(implicit prune: (List[Tree], Int) => List[Tree]): List[List[Tree]] = {
if (n <= 1) {
val p = prune(leaves, Int.MaxValue)
List(p)
}
else {
val f = forest(n - 1)
val p1 = products(f)
val p2 = prune(p1, 500)
f ::: List(p2)
}
}
private def negativeOrZero(x: Double): Double =
if (x < 0.0) x else 0.0
private def positiveOrZero(x: Double): Double =
if (x > 0.0) x else 0.0
private def prune(trees: List[Tree], limit: Int): List[Tree] =
prune(trees, Skyline.weakParetoMax[Tree, Double, Double](_,
x => -(x.minCost + negativeOrZero(x.hiddenCost)),
x => -(x.maxCost + positiveOrZero(x.hiddenCost))), limit)
private def pruneMax(trees: List[Tree], limit: Int): List[Tree] =
prune(trees, Skyline.weakParetoMax[Tree, Double, Double](_,
x => -(x.maxCost + negativeOrZero(x.hiddenCost)),
x => -(x.maxCost + positiveOrZero(x.hiddenCost))), limit)
/**
* Removes suboptimal trees from the list.
* AbstractTree is considered suboptimal if its minCost and maxCost are
* both higher than any other tree with the same set of result tables.
*
* @arg trees list of trees to be pruned
* @arg df domination function that prunes semantically equivalent trees
*/
private def prune(trees: List[Tree], df: Iterable[Tree] => List[Tree], limit: Int): List[Tree] = {
val groups: List[List[Tree]] =
trees.groupBy(t => (t.tables, t.predicates)).values.toList
val pruned: List[Tree] = groups.flatMap(df)
val byPredicate: List[List[Tree]] =
pruned.groupBy(_.predicates).values.toList
byPredicate.flatMap(_.toList.sortBy(_.maxCost).take(max(1, limit / byPredicate.length)))
}
/**
* Join trees in such a way that they produce trees with exactly n leaves.
* For example, given 3 lists of trees:
* 1. a, b, c
* 2. (a,b), (b,c)
* 3. (d,e,f)
* calling it with n = 4 yields:
* (a,b,c), (c,a,b)
* calling it with n = 5 yields:
* (a,b,d,e,f), (b,c,d,e,f)
*/
private def products(forest: List[List[Tree]]): List[Tree] = {
val p =
for ((i1, i2) <- forest.zip(forest.reverse).take((forest.length + 1) / 2))
yield product(i1, i2)
p.flatten
}
/**
* Returns all valid join combinations that can be
* created from two lists of trees.
*/
private def product(t1: List[Tree], t2: List[Tree]): List[Tree] = {
def matchingJoins(s1: Tree, s2: Tree): List[Join] =
joins.filter(j => {
val left = j.tables intersect s1.tables
val right = j.tables intersect s2.tables
!left.isEmpty && !right.isEmpty && left.size + right.size == j.tables.size
})
def simpleJoins(s1: Tree, s2: Tree): List[Tree] = {
val m = matchingJoins(s1, s2)
if (!m.isEmpty)
join(s1, s2, m)
else
Nil
}
def cartesianJoins(s1: Tree, s2: Tree, rowLimit: Float): List[Tree] = {
if (s1.rows * s2.rows > rowLimit)
Nil
else {
val m = matchingJoins(s1, s2)
if (m.isEmpty)
join(s1, s2, m)
else
Nil
}
}
def joinUsing(joinGenerator: (Tree, Tree) => List[Tree]): List[Tree] = {
for (s1 <- t1;
s2 <- t2 if !(s1.tables containsAny s2.tables);
j <- joinGenerator(s1, s2))
yield j
}
val joins1 = joinUsing(simpleJoins)
val rowLimit = (List(Float.MaxValue) ++ (joins1 map (_.rows))) min
val joins2 = joinUsing(cartesianJoins(_, _, rowLimit))
joins1 ::: joins2
}
/**
* Joins two trees using various join algorithms.
* The implementation need not check for correctness of the input parameters.
* @param t1: left tree
* @param t2: right tree
* @param joins: joins that should be all used to produce every tree returned by the result
* @return list of possible results
*/
protected def join(t1: Tree, t2: Tree, joins: List[Join]): List[Tree]
}
|