diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1628b0d..4da8786 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -350,16 +350,21 @@ cost_samplescan(Path *path, PlannerInfo *root,
  *
  * 'rel' is the relation to be operated upon
  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ * 'rows' may be used to point to a row estimate, this may be used when a rel
+ * is unavailable to retrieve row estimates from.
  */
 void
 cost_gather(GatherPath *path, PlannerInfo *root,
-			RelOptInfo *rel, ParamPathInfo *param_info)
+			RelOptInfo *rel, ParamPathInfo *param_info,
+			double *rows)
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
 
 	/* Mark the path with the correct row estimate */
-	if (param_info)
+	if (rows)
+		path->path.rows = *rows;
+	else if (param_info)
 		path->path.rows = param_info->ppi_rows;
 	else
 		path->path.rows = rel->rows;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 5ac60b3..b09125c 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -1491,8 +1491,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
 					extract_grouping_ops(best_path->groupClause),
 					best_path->groupingSets,
 					best_path->numGroups,
-					false,
-					true,
+					best_path->combineStates,
+					best_path->finalizeAggs,
 					subplan);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index a5ea6af..e4f94be 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -1710,6 +1710,19 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 
 		Assert(current_rel->cheapest_total_path != NULL);
 
+		/* Likewise for any partial paths. */
+		foreach(lc, scan_join_rel->partial_pathlist)
+		{
+			Path	   *subpath = (Path *) lfirst(lc);
+			Path	   *path;
+
+			Assert(subpath->param_info == NULL);
+			path = apply_projection_to_path(root, current_rel,
+											subpath, sub_target);
+			current_rel->partial_pathlist =
+								lappend(current_rel->partial_pathlist, path);
+		}
+
 		/*
 		 * If we have grouping and/or aggregation, consider ways to implement
 		 * that.  We build a new upperrel representing the output of this
@@ -3119,6 +3132,7 @@ create_grouping_paths(PlannerInfo *root,
 	RelOptInfo *grouped_rel;
 	bool		can_hash;
 	bool		can_sort;
+	bool		can_parallel;
 	ListCell   *lc;
 
 	/* For now, do all work in the (GROUP_AGG, NULL) upperrel */
@@ -3175,6 +3189,45 @@ create_grouping_paths(PlannerInfo *root,
 	}
 
 	/*
+	 * Here we consider performing aggregation in parallel using multiple
+	 * worker processes. We can permit this when there's at least one
+	 * partial_path in input_rel, but not if the query has grouping sets,
+	 * (although this likely just requires a bit more thought). We also
+	 * disallow parallel mode when the target list contains any volatile
+	 * functions, as this would cause a multiple evaluation hazard.
+	 *
+	 * Parallel grouping and aggregation occurs in two phases. In the first
+	 * phase, which occurs in parallel, groups are created for each input tuple
+	 * of the partial path, each parallel worker's groups are then gathered
+	 * with a Gather node and serialised into the master backend process, which
+	 * performs the 2nd and final grouping or aggregation phase. This is
+	 * supported for both Hash Aggregate and Group Aggregate, although
+	 * currently we only consider paths to generate plans which either use hash
+	 * aggregate for both phases or group aggregate for both phases, we never
+	 * mix the two to try hashing for the 1st phase then group agg on the 2nd
+	 * phase or vice versa. Perhaps this would be a worthwhile future addition,
+	 * but for now, let's keep it simple.
+	 */
+	can_parallel = false;
+
+	if ((parse->hasAggs || parse->groupClause != NIL) &&
+		input_rel->partial_pathlist != NIL &&
+		parse->groupingSets == NIL &&
+		!contain_volatile_functions((Node *) tlist))
+	{
+		/*
+		 * Check that all aggregate functions support partial mode,
+		 * however if there are no aggregate functions then we can skip
+		 * this check.
+		 */
+		if (!parse->hasAggs)
+			can_parallel = true;
+		else if (aggregates_allow_partial((Node *) tlist) == PAT_ANY &&
+				 aggregates_allow_partial(root->parse->havingQual) == PAT_ANY)
+			can_parallel = true;
+	}
+
+	/*
 	 * Create the desired Agg and/or Group path(s)
 	 *
 	 * HAVING clause, if any, becomes qual of the Agg or Group node.
@@ -3191,7 +3244,33 @@ create_grouping_paths(PlannerInfo *root,
 								 parse->groupingSets,
 								 (List *) parse->havingQual,
 								 agg_costs,
-								 dNumGroups));
+								 dNumGroups,
+								 0));
+
+		if (can_parallel)
+		{
+			/*
+			 * Consider parallel hash aggregate for each partial path.
+			 * XXX Should we fetch the cheapest of these and just consider that
+			 * one?
+			 */
+			foreach(lc, input_rel->partial_pathlist)
+			{
+				Path *path = (Path *) lfirst(lc);
+
+				add_path(grouped_rel, (Path *)
+						 create_agg_path(root, grouped_rel,
+										 path,
+										 make_pathtarget_from_tlist(root, tlist),
+										 AGG_HASHED,
+										 parse->groupClause,
+										 parse->groupingSets,
+										 (List *) parse->havingQual,
+										 agg_costs,
+										 dNumGroups,
+										 path->parallel_degree));
+			}
+		}
 	}
 
 	if (can_sort)
@@ -3237,6 +3316,47 @@ create_grouping_paths(PlannerInfo *root,
 												dNumGroups));
 				}
 			}
+
+			if (can_parallel)
+			{
+				AggStrategy aggstrategy;
+
+				if (list_length(parse->groupClause) > 0)
+					aggstrategy = AGG_SORTED;
+				else
+					aggstrategy = AGG_PLAIN;
+
+				foreach(lc, input_rel->partial_pathlist)
+				{
+					Path	   *path = (Path *) lfirst(lc);
+					bool		is_sorted;
+					int			parallel_degree = path->parallel_degree;
+
+					/*
+					 * XXX is this wasted effort? Currently no partial paths
+					 * are sorted.
+					 */
+					is_sorted = pathkeys_contained_in(root->group_pathkeys,
+													  path->pathkeys);
+					if (!is_sorted)
+						path = (Path *) create_sort_path(root,
+														grouped_rel,
+														path,
+														root->group_pathkeys,
+														-1.0);
+					add_path(grouped_rel, (Path *)
+								create_agg_path(root, grouped_rel,
+												path,
+												make_pathtarget_from_tlist(root, tlist),
+												aggstrategy,
+												parse->groupClause,
+												parse->groupingSets,
+												(List *) parse->havingQual,
+												agg_costs,
+												dNumGroups,
+												parallel_degree));
+				}
+			}
 		}
 		else if (parse->groupClause)
 		{
@@ -3269,7 +3389,41 @@ create_grouping_paths(PlannerInfo *root,
 																	  tlist),
 											   parse->groupClause,
 											   (List *) parse->havingQual,
-											   dNumGroups));
+											   dNumGroups,
+											   0));
+				}
+			}
+
+			if (can_parallel)
+			{
+				foreach(lc, input_rel->partial_pathlist)
+				{
+					Path	   *path = (Path *) lfirst(lc);
+					bool		is_sorted;
+					int			parallel_degree = path->parallel_degree;
+
+					/*
+					 * XXX is this wasted effort? Currently no partial paths
+					 * are sorted.
+					 */
+					is_sorted = pathkeys_contained_in(root->group_pathkeys,
+													  path->pathkeys);
+					if (!is_sorted)
+						path = (Path *) create_sort_path(root,
+														grouped_rel,
+														path,
+														root->group_pathkeys,
+														-1.0);
+					add_path(grouped_rel, (Path *)
+							 create_group_path(root,
+											   grouped_rel,
+											   path,
+											 make_pathtarget_from_tlist(root,
+																	  tlist),
+											   parse->groupClause,
+											   (List *) parse->havingQual,
+											   dNumGroups,
+											   parallel_degree));
 				}
 			}
 		}
@@ -3624,7 +3778,8 @@ create_distinct_paths(PlannerInfo *root,
 								 NIL,
 								 NIL,
 								 NULL,
-								 numDistinctRows));
+								 numDistinctRows,
+								 0));
 	}
 
 	/* Give a helpful error if we failed to find any implementation */
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index b931a91..b35b677 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -15,7 +15,9 @@
  */
 #include "postgres.h"
 
+#include "access/htup_details.h"
 #include "access/transam.h"
+#include "catalog/pg_aggregate.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
@@ -139,6 +141,16 @@ static List *set_returning_clause_references(PlannerInfo *root,
 static bool fix_opfuncids_walker(Node *node, void *context);
 static bool extract_query_dependencies_walker(Node *node,
 								  PlannerInfo *context);
+static void set_combineagg_references(PlannerInfo *root, Plan *plan,
+									  int rtoffset);
+static Node *fix_combine_agg_expr(PlannerInfo *root,
+								  Node *node,
+								  indexed_tlist *subplan_itlist,
+								  Index newvarno,
+								  int rtoffset);
+static Node *fix_combine_agg_expr_mutator(Node *node,
+										  fix_upper_expr_context *context);
+static void set_partialagg_aggref_types(PlannerInfo *root, Plan *plan);
 
 /*****************************************************************************
  *
@@ -667,8 +679,23 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 			}
 			break;
 		case T_Agg:
-			set_upper_references(root, plan, rtoffset);
-			break;
+			{
+				Agg *aggplan = (Agg *) plan;
+
+				/*
+				 * For partial aggregation we must adjust the return types of
+				 * the Aggrefs
+				 */
+				if (!aggplan->finalizeAggs)
+					set_partialagg_aggref_types(root, plan);
+
+				if (aggplan->combineStates)
+					set_combineagg_references(root, plan, rtoffset);
+				else
+					set_upper_references(root, plan, rtoffset);
+
+				break;
+			}
 		case T_Group:
 			set_upper_references(root, plan, rtoffset);
 			break;
@@ -2477,3 +2504,188 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context)
 	return expression_tree_walker(node, extract_query_dependencies_walker,
 								  (void *) context);
 }
+
+static void
+set_combineagg_references(PlannerInfo *root, Plan *plan, int rtoffset)
+{
+	Plan	   *subplan = plan->lefttree;
+	indexed_tlist *subplan_itlist;
+	List	   *output_targetlist;
+	ListCell   *l;
+
+	Assert(IsA(plan, Agg));
+	Assert(((Agg *) plan)->combineStates);
+
+	subplan_itlist = build_tlist_index(subplan->targetlist);
+
+	output_targetlist = NIL;
+
+	foreach(l, plan->targetlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+		Node	   *newexpr;
+
+		/* If it's a non-Var sort/group item, first try to match by sortref */
+		if (tle->ressortgroupref != 0 && !IsA(tle->expr, Var))
+		{
+			newexpr = (Node *)
+				search_indexed_tlist_for_sortgroupref((Node *) tle->expr,
+														tle->ressortgroupref,
+														subplan_itlist,
+														OUTER_VAR);
+			if (!newexpr)
+				newexpr = fix_combine_agg_expr(root,
+												(Node *) tle->expr,
+												subplan_itlist,
+												OUTER_VAR,
+												rtoffset);
+		}
+		else
+			newexpr = fix_combine_agg_expr(root,
+											(Node *) tle->expr,
+											subplan_itlist,
+											OUTER_VAR,
+											rtoffset);
+		tle = flatCopyTargetEntry(tle);
+		tle->expr = (Expr *) newexpr;
+		output_targetlist = lappend(output_targetlist, tle);
+	}
+
+	plan->targetlist = output_targetlist;
+
+	plan->qual = (List *)
+		fix_upper_expr(root,
+					   (Node *) plan->qual,
+					   subplan_itlist,
+					   OUTER_VAR,
+					   rtoffset);
+
+	pfree(subplan_itlist);
+}
+
+
+/*
+ * Adjust the Aggref'a args to reference the correct Aggref target in the outer
+ * subplan.
+ */
+static Node *
+fix_combine_agg_expr(PlannerInfo *root,
+			   Node *node,
+			   indexed_tlist *subplan_itlist,
+			   Index newvarno,
+			   int rtoffset)
+{
+	fix_upper_expr_context context;
+
+	context.root = root;
+	context.subplan_itlist = subplan_itlist;
+	context.newvarno = newvarno;
+	context.rtoffset = rtoffset;
+	return fix_combine_agg_expr_mutator(node, &context);
+}
+
+static Node *
+fix_combine_agg_expr_mutator(Node *node, fix_upper_expr_context *context)
+{
+	Var		   *newvar;
+
+	if (node == NULL)
+		return NULL;
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		newvar = search_indexed_tlist_for_var(var,
+											  context->subplan_itlist,
+											  context->newvarno,
+											  context->rtoffset);
+		if (!newvar)
+			elog(ERROR, "variable not found in subplan target list");
+		return (Node *) newvar;
+	}
+	if (IsA(node, Aggref))
+	{
+		TargetEntry *tle;
+		Aggref		*aggref = (Aggref*) node;
+
+		tle = tlist_member(node, context->subplan_itlist->tlist);
+		if (tle)
+		{
+			/* Found a matching subplan output expression */
+			Var		   *newvar;
+			TargetEntry *newtle;
+
+			newvar = makeVarFromTargetEntry(context->newvarno, tle);
+			newvar->varnoold = 0;	/* wasn't ever a plain Var */
+			newvar->varoattno = 0;
+
+			/* update the args in the aggref */
+
+			/* makeTargetEntry ,always set resno to one for finialize agg */
+			newtle = makeTargetEntry((Expr*) newvar, 1, NULL, false);
+
+			/*
+			 * Updated the args, let the newvar refer to the right position of
+			 * the agg function in the subplan
+			 */
+			aggref->args = list_make1(newtle);
+
+			return (Node *) aggref;
+		}
+		else
+			elog(ERROR, "aggref not found in subplan target list");
+	}
+	if (IsA(node, PlaceHolderVar))
+	{
+		PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+		/* See if the PlaceHolderVar has bubbled up from a lower plan node */
+		if (context->subplan_itlist->has_ph_vars)
+		{
+			newvar = search_indexed_tlist_for_non_var((Node *) phv,
+													  context->subplan_itlist,
+													  context->newvarno);
+			if (newvar)
+				return (Node *) newvar;
+		}
+		/* If not supplied by input plan, evaluate the contained expr */
+		return fix_upper_expr_mutator((Node *) phv->phexpr, context);
+	}
+	if (IsA(node, Param))
+		return fix_param_node(context->root, (Param *) node);
+
+	fix_expr_common(context->root, node);
+	return expression_tree_mutator(node,
+								   fix_combine_agg_expr_mutator,
+								   (void *) context);
+}
+
+/* XXX is this really the best place and way to do this? */
+static void
+set_partialagg_aggref_types(PlannerInfo *root, Plan *plan)
+{
+	ListCell *l;
+
+	foreach(l, plan->targetlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+		if (IsA(tle->expr, Aggref))
+		{
+			Aggref *aggref = (Aggref *) tle->expr;
+			HeapTuple	aggTuple;
+			Form_pg_aggregate aggform;
+
+			aggTuple = SearchSysCache1(AGGFNOID,
+									   ObjectIdGetDatum(aggref->aggfnoid));
+			if (!HeapTupleIsValid(aggTuple))
+				elog(ERROR, "cache lookup failed for aggregate %u",
+					 aggref->aggfnoid);
+			aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+			aggref->aggtype = aggform->aggtranstype;
+
+			ReleaseSysCache(aggTuple);
+		}
+	}
+}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 10d919c..dfd3b72 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -862,7 +862,8 @@ make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
 										NIL,
 										NIL,
 										NULL,
-										dNumGroups);
+										dNumGroups,
+										0);
 	}
 	else
 	{
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 6ac25dc..ff8ac19 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -52,6 +52,10 @@
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
+typedef struct
+{
+	PartialAggType allowedtype;
+} partial_agg_context;
 
 typedef struct
 {
@@ -93,6 +97,7 @@ typedef struct
 	bool		allow_restricted;
 } has_parallel_hazard_arg;
 
+static bool partial_aggregate_walker(Node *node, partial_agg_context *context);
 static bool contain_agg_clause_walker(Node *node, void *context);
 static bool count_agg_clauses_walker(Node *node,
 						 count_agg_clauses_context *context);
@@ -400,6 +405,81 @@ make_ands_implicit(Expr *clause)
  *****************************************************************************/
 
 /*
+ * aggregates_allow_partial
+ *		Recursively search for Aggref clauses and determine the maximum
+ *		'degree' of partial aggregation which can be supported. Partial
+ *		aggregation requires that each aggregate does not have a DISTINCT or
+ *		ORDER BY clause, and that it also has a combine function set.
+ */
+PartialAggType
+aggregates_allow_partial(Node *clause)
+{
+	partial_agg_context context;
+
+	/* initially any type is ok, until we find Aggrefs which say otherwise */
+	context.allowedtype = PAT_ANY;
+
+	if (!partial_aggregate_walker(clause, &context))
+		return context.allowedtype;
+	return context.allowedtype;
+}
+
+static bool
+partial_aggregate_walker(Node *node, partial_agg_context *context)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, Aggref))
+	{
+		Aggref	   *aggref = (Aggref *) node;
+		HeapTuple	aggTuple;
+		Form_pg_aggregate aggform;
+
+		Assert(aggref->agglevelsup == 0);
+
+		/*
+		 * We can't perform partial aggregation with Aggrefs containing a
+		 * DISTINCT or ORDER BY clause.
+		 */
+		if (aggref->aggdistinct || aggref->aggorder)
+		{
+			context->allowedtype = PAT_DISABLED;
+			return true;	/* abort search */
+		}
+		aggTuple = SearchSysCache1(AGGFNOID,
+								   ObjectIdGetDatum(aggref->aggfnoid));
+		if (!HeapTupleIsValid(aggTuple))
+			elog(ERROR, "cache lookup failed for aggregate %u",
+				 aggref->aggfnoid);
+		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+		/*
+		 * If there is no combine func, then partial aggregation is not
+		 * possible.
+		 */
+		if (!OidIsValid(aggform->aggcombinefn))
+		{
+			ReleaseSysCache(aggTuple);
+			context->allowedtype = PAT_DISABLED;
+			return true;	/* abort search */
+		}
+
+		/*
+		 * If we find any aggs with an internal transtype then we must ensure
+		 * that pointers to aggregate states are not passed to other processes,
+		 * therefore we set the maximum degree to PAT_INTERNAL_ONLY.
+		 */
+		if (aggform->aggtranstype == INTERNALOID)
+			context->allowedtype = PAT_INTERNAL_ONLY;
+
+		ReleaseSysCache(aggTuple);
+		return false; /* continue searching */
+	}
+	return expression_tree_walker(node, partial_aggregate_walker,
+								  (void *) context);
+}
+
+/*
  * contain_agg_clause
  *	  Recursively search for Aggref/GroupingFunc nodes within a clause.
  *
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 2d6e8aa..16638e7 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1644,7 +1644,7 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		pathnode->single_copy = true;
 	}
 
-	cost_gather(pathnode, root, rel, pathnode->path.param_info);
+	cost_gather(pathnode, root, rel, pathnode->path.param_info, NULL);
 
 	return pathnode;
 }
@@ -2262,7 +2262,13 @@ create_sort_path(PlannerInfo *root,
  * 'rel' is the parent relation associated with the result
  * 'subpath' is the path representing the source of data
  * 'groupClause' is a list of SortGroupClause's
- * 'qual' is the HAVING quals if any
+ * 'qual' is the HAVING quals if any.
+ *
+ * When parallel_degree is greater than zero we perform a 2-phase aggregation,
+ * where phase 1 is executed in parallel, the results of which are consumed by a
+ * Gather node and passed on for the final aggregation stage, where any HAVING
+ * clause is applied.
+ *
  * XXX more
  */
 GroupPath *
@@ -2272,22 +2278,28 @@ create_group_path(PlannerInfo *root,
 				  PathTarget *target,
 				  List *groupClause,
 				  List *qual,
-				  double numGroups)
+				  double numGroups,
+				  int parallel_degree)
 {
 	GroupPath  *pathnode = makeNode(GroupPath);
+	bool		parallel_grouping = parallel_degree > 0;
 
 	pathnode->path.pathtype = T_Group;
 	pathnode->path.parent = rel;
 	pathnode->path.pathtarget = target;
 	/* For now, assume we are above any joins, so no parameterization */
 	pathnode->path.param_info = NULL;
+	pathnode->path.parallel_aware = false;
+	pathnode->path.parallel_safe = false;		/* XXX */
+	pathnode->path.parallel_degree = parallel_degree;
 	/* Group doesn't change sort ordering */
 	pathnode->path.pathkeys = subpath->pathkeys;
 
 	pathnode->subpath = subpath;
 
 	pathnode->groupClause = groupClause;
-	pathnode->qual = qual;
+	/* Only apply qual during final aggregate phase */
+	pathnode->qual = parallel_grouping ? NIL : qual;
 
 	cost_group(&pathnode->path, root,
 			   list_length(groupClause),
@@ -2295,6 +2307,73 @@ create_group_path(PlannerInfo *root,
 			   subpath->startup_cost, subpath->total_cost,
 			   subpath->rows);
 
+	/* Add additional paths when in parallel mode */
+	if (parallel_grouping)
+	{
+		GatherPath	   *gatherpath = makeNode(GatherPath);
+		GroupPath	   *finalgrouppath = makeNode(GroupPath);
+		SortPath	   *sortpath;
+		double			numPartialGroups;
+
+		gatherpath->path.pathtype = T_Gather;
+		gatherpath->path.parent = rel; /* XXX ? */
+		gatherpath->path.pathtarget = target;
+		gatherpath->path.param_info = NULL;
+		gatherpath->path.parallel_aware = false;
+		gatherpath->path.parallel_safe = false;
+		gatherpath->path.parallel_degree = parallel_degree;
+		gatherpath->path.pathkeys = NIL;	/* output is unordered */
+		gatherpath->subpath = (Path *) pathnode;
+		gatherpath->single_copy = false; /* XXX? */
+
+		/*
+		 * Estimate the total number of groups which the gather will receive
+		 * from the aggregate worker processes. We'll assume that each worker
+		 * will produce every possible group, this might be an overestimate,
+		 * although it seems safer to over estimate here rather than
+		 * underestimate. To keep this number sane we cap the number of groups
+		 * so it's never larger than the number of rows in the input path. This
+		 * covers the case when there are less than an average of
+		 * parallel_degree input tuples per group.
+		 */
+		numPartialGroups = Min(numGroups, subpath->rows) *
+							(parallel_degree + 1);
+
+		cost_gather(gatherpath, root, NULL, NULL, &numPartialGroups);
+
+		sortpath =  create_sort_path(root,
+									 rel,
+									 &gatherpath->path,
+									 root->query_pathkeys,
+									 -1.0);
+
+		finalgrouppath->path.pathtype = T_Group;
+		finalgrouppath->path.parent = rel;
+		finalgrouppath->path.pathtarget = target;
+		/* For now, assume we are above any joins, so no parameterization */
+		finalgrouppath->path.param_info = NULL;
+		finalgrouppath->path.parallel_aware = false;
+		finalgrouppath->path.parallel_safe = false;		/* XXX */
+		finalgrouppath->path.parallel_degree = 0;
+		/* Group doesn't change sort ordering */
+		finalgrouppath->path.pathkeys = subpath->pathkeys;
+
+		finalgrouppath->subpath = (Path *) sortpath;
+
+		finalgrouppath->groupClause = groupClause;
+		finalgrouppath->qual = qual;
+
+		cost_group(&finalgrouppath->path, root,
+				   list_length(groupClause),
+				   numGroups,
+				   sortpath->path.startup_cost,
+				   sortpath->path.total_cost,
+				   numPartialGroups);
+
+		/* Overwrite the return value with the final Group node */
+		pathnode = finalgrouppath;
+	}
+
 	/* add tlist eval cost for each output row */
 	pathnode->path.startup_cost += target->cost.startup;
 	pathnode->path.total_cost += target->cost.startup +
@@ -2372,9 +2451,12 @@ create_agg_path(PlannerInfo *root,
 				List *groupingSets,
 				List *qual,
 				const AggClauseCosts *aggcosts,
-				double numGroups)
+				double numGroups,
+				int parallel_degree)
 {
-	AggPath    *pathnode = makeNode(AggPath);
+	AggPath	   *pathnode = makeNode(AggPath);
+	bool		parallel_agg = parallel_degree > 0;
+	Path	   *currentpath;
 
 	pathnode->path.pathtype = T_Agg;
 	pathnode->path.parent = rel;
@@ -2383,7 +2465,7 @@ create_agg_path(PlannerInfo *root,
 	pathnode->path.param_info = NULL;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = false;		/* XXX */
-	pathnode->path.parallel_degree = 0;
+	pathnode->path.parallel_degree = parallel_degree;
 	if (aggstrategy == AGG_SORTED)
 		pathnode->path.pathkeys = subpath->pathkeys;	/* preserves order */
 	else
@@ -2394,7 +2476,10 @@ create_agg_path(PlannerInfo *root,
 	pathnode->numGroups = numGroups;
 	pathnode->groupClause = groupClause;
 	pathnode->groupingSets = groupingSets;
-	pathnode->qual = qual;
+	/* Only apply HAVING clause for final aggregation */
+	pathnode->qual = parallel_agg ? NIL : qual;
+	pathnode->combineStates = false;
+	pathnode->finalizeAggs = !parallel_agg;
 
 	cost_agg(&pathnode->path, root,
 			 aggstrategy, aggcosts,
@@ -2402,11 +2487,93 @@ create_agg_path(PlannerInfo *root,
 			 subpath->startup_cost, subpath->total_cost,
 			 subpath->rows);
 
+	/* Add additional paths when in parallel mode */
+	if (parallel_agg)
+	{
+		GatherPath	   *gatherpath = makeNode(GatherPath);
+		AggPath		   *finalaggpath = makeNode(AggPath);
+		double			numPartialGroups;
+
+		gatherpath->path.pathtype = T_Gather;
+		gatherpath->path.parent = rel; /* XXX ? */
+		gatherpath->path.pathtarget = target;
+		gatherpath->path.param_info = NULL;
+		gatherpath->path.parallel_aware = false;
+		gatherpath->path.parallel_safe = false;
+		gatherpath->path.parallel_degree = parallel_degree;
+		gatherpath->path.pathkeys = NIL;	/* output is unordered */
+		gatherpath->subpath = (Path *) pathnode;
+		gatherpath->single_copy = false; /* XXX? */
+
+		/*
+		 * Estimate the total number of groups which the gather will receive
+		 * from the aggregate worker processes. We'll assume that each worker
+		 * will produce every possible group, this might be an overestimate,
+		 * although it seems safer to over estimate here rather than
+		 * underestimate. To keep this number sane we cap the number of groups
+		 * so it's never larger than the number of rows in the input path. This
+		 * covers the case when there are less than an average of
+		 * parallel_degree input tuples per group.
+		 */
+		numPartialGroups = Min(numGroups, subpath->rows) *
+							(parallel_degree + 1);
+
+		cost_gather(gatherpath, root, NULL, NULL, &numPartialGroups);
+
+		currentpath = &gatherpath->path;
+
+		if (aggstrategy == AGG_SORTED)
+		{
+			SortPath *sortpath;
+
+			sortpath =  create_sort_path(root,
+										 rel,
+										 &gatherpath->path,
+										 root->query_pathkeys,
+										 -1.0);
+			currentpath = &sortpath->path;
+		}
+
+		finalaggpath->path.pathtype = T_Agg;
+		finalaggpath->path.parent = rel;
+		finalaggpath->path.pathtarget = target;
+		/* For now, assume we are above any joins, so no parameterization */
+		finalaggpath->path.param_info = NULL;
+		finalaggpath->path.parallel_aware = false;
+		finalaggpath->path.parallel_safe = false;		/* XXX */
+		finalaggpath->path.parallel_degree = 0;
+
+		 /* if sorted then preserves order */
+		if (aggstrategy == AGG_SORTED)
+			finalaggpath->path.pathkeys = subpath->pathkeys;
+		else
+			finalaggpath->path.pathkeys = NIL;	/* output is unordered */
+
+		finalaggpath->subpath = currentpath;
+
+		finalaggpath->aggstrategy = aggstrategy;
+		finalaggpath->numGroups = numGroups;
+		finalaggpath->groupClause = groupClause;
+		finalaggpath->groupingSets = groupingSets;
+		finalaggpath->qual = qual;
+		finalaggpath->combineStates = true;
+		finalaggpath->finalizeAggs = true;
+
+		cost_agg(&finalaggpath->path, root,
+				 aggstrategy, aggcosts,
+				 list_length(groupClause), numGroups,
+				 currentpath->startup_cost, currentpath->total_cost,
+				 numPartialGroups);
+
+		/* Overwrite the return value with the final aggregate node */
+		pathnode = finalaggpath;
+	}
+
 	/* add tlist eval cost for each output row */
+	/* XXX does this need to happen at each agg level during parallel agg? */
 	pathnode->path.startup_cost += target->cost.startup;
 	pathnode->path.total_cost += target->cost.startup +
 		target->cost.per_tuple * pathnode->path.rows;
-
 	return pathnode;
 }
 
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index cd97ddb..c77b569 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1303,6 +1303,8 @@ typedef struct AggPath
 	List	   *groupClause;	/* a list of SortGroupClause's */
 	List	   *groupingSets;	/* grouping sets to use */
 	List	   *qual;			/* quals (HAVING quals), if any */
+	bool		combineStates;	/* input is partially aggregated agg states */
+	bool		finalizeAggs;	/* should the executor call the finalfn? */
 } AggPath;
 
 /*
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 3b3fd0f..d381ff0 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -27,6 +27,25 @@ typedef struct
 	List	  **windowFuncs;	/* lists of WindowFuncs for each winref */
 } WindowFuncLists;
 
+/*
+ * PartialAggType
+ *	PartialAggType stores whether partial aggregation is allowed and
+ *	which context it is allowed in. We require three states here as there are
+ *	two different contexts in which partial aggregation is safe. For aggregates
+ *	which have an 'stype' of INTERNAL, within a single backend process it is
+ *	okay to pass a pointer to the aggregate state, as the memory to which the
+ *	pointer points to will belong to the same process. In cases where the
+ *	aggregate state must be passed between different processes, for example
+ *	during parallel aggregation, passing the pointer is not okay due to the
+ *	fact that the memory being referenced won't be accessible from another
+ *	process.
+ */
+typedef enum
+{
+	PAT_ANY = 0,		/* Any type of partial aggregation is ok. */
+	PAT_INTERNAL_ONLY,	/* Some aggregates support only internal mode. */
+	PAT_DISABLED		/* Some aggregates don't support partial mode at all */
+} PartialAggType;
 
 extern Expr *make_opclause(Oid opno, Oid opresulttype, bool opretset,
 			  Expr *leftop, Expr *rightop,
@@ -47,6 +66,7 @@ extern Node *make_and_qual(Node *qual1, Node *qual2);
 extern Expr *make_ands_explicit(List *andclauses);
 extern List *make_ands_implicit(Expr *clause);
 
+extern PartialAggType aggregates_allow_partial(Node *clause);
 extern bool contain_agg_clause(Node *clause);
 extern void count_agg_clauses(PlannerInfo *root, Node *clause,
 				  AggClauseCosts *costs);
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 79b2a88..c37c8a8 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -150,7 +150,7 @@ extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 					SpecialJoinInfo *sjinfo,
 					SemiAntiJoinFactors *semifactors);
 extern void cost_gather(GatherPath *path, PlannerInfo *root,
-			RelOptInfo *baserel, ParamPathInfo *param_info);
+			RelOptInfo *baserel, ParamPathInfo *param_info, double *rows);
 extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
 extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
 extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 341cee1..d7c4ac0 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -153,7 +153,8 @@ extern GroupPath *create_group_path(PlannerInfo *root,
 				  PathTarget *target,
 				  List *groupClause,
 				  List *qual,
-				  double numGroups);
+				  double numGroups,
+				  int parallel_degree);
 extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root,
 						 RelOptInfo *rel,
 						 Path *subpath,
@@ -168,7 +169,8 @@ extern AggPath *create_agg_path(PlannerInfo *root,
 				List *groupingSets,
 				List *qual,
 				const AggClauseCosts *aggcosts,
-				double numGroups);
+				double numGroups,
+				int parallel_degree);
 extern RollupPath *create_rollup_path(PlannerInfo *root,
 				   RelOptInfo *rel,
 				   Path *input_path,