diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index ee13136..47b020c 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -918,6 +918,17 @@ ExplainNode(PlanState *planstate, List *ancestors,
 			break;
 		case T_Group:
 			pname = sname = "Group";
+			{
+				Group *group = (Group *) plan;
+
+				if (group->finalizeGroups == false)
+					operation = "Partial";
+				else if (group->combineStates == true)
+					operation = "Finalize";
+
+				if (operation != NULL)
+					pname = psprintf("%s %s", operation, pname);
+			}
 			break;
 		case T_Agg:
 			sname = "Aggregate";
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c
index b9c3959..06d01f3 100644
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -29,9 +29,12 @@
 
 #include "postgres.h"
 
+#include "access/htup_details.h"
+#include "catalog/pg_aggregate.h"
 #include "nodes/extensible.h"
 #include "nodes/relation.h"
 #include "utils/datum.h"
+#include "utils/syscache.h"
 
 
 /*
@@ -191,7 +194,35 @@ static bool
 _equalAggref(const Aggref *a, const Aggref *b)
 {
 	COMPARE_SCALAR_FIELD(aggfnoid);
-	COMPARE_SCALAR_FIELD(aggtype);
+
+	/*
+	 * XXX Temporary fix, until we find a better one.
+	 * To avoid the failure in setting the upper references in upper plans of
+	 * partial aggregate, with its modified targetlist aggregate references,
+	 * As the aggtype of aggref is changed while forming the targetlist
+	 * of partial aggregate for worker process.
+	 */
+	if (a->aggtype != b->aggtype)
+	{
+		HeapTuple	aggTuple;
+		Form_pg_aggregate aggform;
+
+		aggTuple = SearchSysCache1(AGGFNOID,
+								   ObjectIdGetDatum(a->aggfnoid));
+		if (!HeapTupleIsValid(aggTuple))
+			elog(ERROR, "cache lookup failed for aggregate %u",
+				 a->aggfnoid);
+		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+		if (aggform->aggtranstype != b->aggtype)
+		{
+			ReleaseSysCache(aggTuple);
+			return false;
+		}
+
+		ReleaseSysCache(aggTuple);
+	}
+
 	COMPARE_SCALAR_FIELD(aggcollid);
 	COMPARE_SCALAR_FIELD(inputcollid);
 	COMPARE_NODE_FIELD(aggdirectargs);
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index a08c248..9f1416c 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -1968,7 +1968,7 @@ generate_gather_paths(PlannerInfo *root, RelOptInfo *rel)
 	 */
 	cheapest_partial_path = linitial(rel->partial_pathlist);
 	simple_gather_path = (Path *)
-		create_gather_path(root, rel, cheapest_partial_path, NULL);
+		create_gather_path(root, rel, cheapest_partial_path, NULL, NULL);
 	add_path(rel, simple_gather_path);
 }
 
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index ffff3c0..cfd0c35 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -350,16 +350,21 @@ cost_samplescan(Path *path, PlannerInfo *root,
  *
  * 'rel' is the relation to be operated upon
  * 'param_info' is the ParamPathInfo if this is a parameterized path, else NULL
+ * 'rows' may be used to point to a row estimate, this may be used when a rel
+ * is unavailable to retrieve row estimates from.
  */
 void
 cost_gather(GatherPath *path, PlannerInfo *root,
-			RelOptInfo *rel, ParamPathInfo *param_info)
+			RelOptInfo *rel, ParamPathInfo *param_info,
+			double *rows)
 {
 	Cost		startup_cost = 0;
 	Cost		run_cost = 0;
 
 	/* Mark the path with the correct row estimate */
-	if (param_info)
+	if (rows)
+		path->path.rows = *rows;
+	else if (param_info)
 		path->path.rows = param_info->ppi_rows;
 	else
 		path->path.rows = rel->rows;
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c
index 12069ae..62b1861 100644
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -261,7 +261,7 @@ static WindowAgg *make_windowagg(List *tlist, Index winref,
 			   Plan *lefttree);
 static Group *make_group(List *tlist, List *qual, int numGroupCols,
 		   AttrNumber *grpColIdx, Oid *grpOperators,
-		   Plan *lefttree);
+		   bool combineStates, bool finalizeGroups, Plan *lefttree);
 static Unique *make_unique_from_sortclauses(Plan *lefttree, Path *path,
 							 List *distinctList);
 static Unique *make_unique_from_pathkeys(Plan *lefttree,
@@ -1471,6 +1471,8 @@ create_group_plan(PlannerInfo *root, GroupPath *best_path)
 					  extract_grouping_cols(best_path->groupClause,
 											subplan->targetlist),
 					  extract_grouping_ops(best_path->groupClause),
+					  best_path->combineStates,
+					  best_path->finalizeGroups,
 					  subplan);
 
 	copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -1532,8 +1534,8 @@ create_agg_plan(PlannerInfo *root, AggPath *best_path)
 
 	plan = make_agg(tlist, quals,
 					best_path->aggstrategy,
-					false,
-					true,
+					best_path->combineStates,
+					best_path->finalizeAggs,
 					list_length(best_path->groupClause),
 					extract_grouping_cols(best_path->groupClause,
 										  subplan->targetlist),
@@ -5699,6 +5701,8 @@ make_group(List *tlist,
 		   int numGroupCols,
 		   AttrNumber *grpColIdx,
 		   Oid *grpOperators,
+		   bool combineStates,
+		   bool finalizeGroups,
 		   Plan *lefttree)
 {
 	Group	   *node = makeNode(Group);
@@ -5709,6 +5713,8 @@ make_group(List *tlist,
 	node->numCols = numGroupCols;
 	node->grpColIdx = grpColIdx;
 	node->grpOperators = grpOperators;
+	node->combineStates = combineStates;
+	node->finalizeGroups = finalizeGroups;
 
 	plan->qual = qual;
 	plan->targetlist = tlist;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 97cd1f2..fe547c0 100644
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -23,6 +23,7 @@
 #include "access/sysattr.h"
 #include "access/xact.h"
 #include "catalog/pg_constraint_fn.h"
+#include "catalog/pg_aggregate.h"
 #include "executor/executor.h"
 #include "executor/nodeAgg.h"
 #include "foreign/fdwapi.h"
@@ -81,6 +82,13 @@ typedef struct
 	List	   *groupClause;	/* overrides parse->groupClause */
 } standard_qp_extra;
 
+typedef struct
+{
+	AttrNumber	resno;
+	List	   *targetlist;
+}	add_qual_to_tlist_context;
+
+
 /* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -107,6 +115,19 @@ static RelOptInfo *create_grouping_paths(PlannerInfo *root,
 					  AttrNumber *groupColIdx,
 					  List *rollup_lists,
 					  List *rollup_groupclauses);
+static void create_parallelagg_path(PlannerInfo *root,
+						RelOptInfo *input_rel,
+						RelOptInfo *grouped_rel,
+						List *tlist,
+						PathTarget *target,
+						AggStrategy aggstrategy,
+						double dNumGroups,
+						AggClauseCosts *agg_costs);
+static void create_parallelgroup_path(PlannerInfo *root,
+						  RelOptInfo *input_rel,
+						  RelOptInfo *grouped_rel,
+						  PathTarget *target,
+						  double dNumGroups);
 static RelOptInfo *create_window_paths(PlannerInfo *root,
 					RelOptInfo *input_rel,
 					List *base_tlist,
@@ -134,6 +155,9 @@ static List *make_windowInputTargetList(PlannerInfo *root,
 						   List *tlist, List *activeWindows);
 static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc,
 						 List *tlist);
+static List *make_partial_agg_tlist(List *tlist, List *groupClause);
+static List *add_qual_to_tlist(List *targetlist, Node *qual);
+static bool add_qual_to_tlist_walker(Node *node, add_qual_to_tlist_context * context);
 
 
 /*****************************************************************************
@@ -1380,6 +1404,7 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 	RelOptInfo *current_rel;
 	RelOptInfo *final_rel;
 	ListCell   *lc;
+	ListCell   *prev;
 
 	/* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
 	if (parse->limitCount || parse->limitOffset)
@@ -1687,6 +1712,31 @@ grouping_planner(PlannerInfo *root, bool inheritance_update,
 			}
 		}
 
+		/* Likewise for any partial paths. */
+		prev = NULL;
+		foreach(lc, current_rel->partial_pathlist)
+		{
+			Path	   *subpath = (Path *) lfirst(lc);
+			Path	   *path;
+
+			Assert(subpath->param_info == NULL);
+			path = apply_projection_to_path(root, current_rel,
+											subpath, sub_target);
+			if (path != subpath)
+			{
+				/*
+				 * Delete the partial path that is changed to append the
+				 * projection, and add the newly added projection path.
+				 */
+				current_rel->partial_pathlist =
+					list_delete_cell(current_rel->partial_pathlist, lc, prev);
+				current_rel->partial_pathlist =
+					lappend(current_rel->partial_pathlist, path);
+			}
+
+			prev = lc;
+		}
+
 		/*
 		 * Determine the tlist we need grouping paths to emit.  While we could
 		 * skip this if we're not going to call create_grouping_paths, it's
@@ -3101,7 +3151,9 @@ create_grouping_paths(PlannerInfo *root,
 	AggClauseCosts agg_costs;
 	double		dNumGroups;
 	bool		allow_hash;
+	bool		can_parallel;
 	ListCell   *lc;
+	List	   *tlist = make_tlist_from_pathtarget(target);
 
 	/* For now, do all work in the (GROUP_AGG, NULL) upperrel */
 	grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
@@ -3172,6 +3224,47 @@ create_grouping_paths(PlannerInfo *root,
 	}
 
 	/*
+	 * Here we consider performing aggregation in parallel using multiple
+	 * worker processes. We can permit this when there's at least one
+	 * partial_path in input_rel, but not if the query has grouping sets,
+	 * (although this likely just requires a bit more thought). We also
+	 * disallow parallel mode when the target list contains any volatile
+	 * functions, as this would cause a multiple evaluation hazard.
+	 *
+	 * Parallel grouping and aggregation occurs in two phases. In the first
+	 * phase, which occurs in parallel, groups are created for each input
+	 * tuple of the partial path, each parallel worker's groups are then
+	 * gathered with a Gather node and serialised into the master backend
+	 * process, which performs the 2nd and final grouping or aggregation
+	 * phase. This is supported for both Hash Aggregate and Group Aggregate,
+	 * although currently we only consider paths to generate plans which
+	 * either use hash aggregate for both phases or group aggregate for both
+	 * phases, we never mix the two to try hashing for the 1st phase then
+	 * group agg on the 2nd phase or vice versa. Perhaps this would be a
+	 * worthwhile future addition, but for now, let's keep it simple.
+	 */
+	can_parallel = false;
+
+	if ((parse->hasAggs || parse->groupClause != NIL) &&
+		input_rel->partial_pathlist != NIL &&
+		parse->groupingSets == NIL &&
+		!contain_volatile_functions((Node *) tlist))
+	{
+		/*
+		 * Check that all aggregate functions support partial mode, however if
+		 * there are no aggregate functions then we can skip this check.
+		 */
+		if (!parse->hasAggs)
+			can_parallel = true;
+		else if (aggregates_allow_partial((Node *) tlist) == PAT_ANY &&
+				 aggregates_allow_partial(root->parse->havingQual) == PAT_ANY)
+			can_parallel = true;
+	}
+
+	if (can_parallel)
+		grouped_rel->consider_parallel = input_rel->consider_parallel;
+
+	/*
 	 * Collect statistics about aggregates for estimating costs.  Note: we do
 	 * not detect duplicate aggregates here; a somewhat-overestimated cost is
 	 * okay for our purposes.
@@ -3256,7 +3349,20 @@ create_grouping_paths(PlannerInfo *root,
 											 parse->groupClause,
 											 (List *) parse->havingQual,
 											 &agg_costs,
-											 dNumGroups));
+											 dNumGroups,
+											 false,
+											 true));
+
+					if (can_parallel)
+						create_parallelagg_path(root,
+												input_rel,
+												grouped_rel,
+												tlist,
+												target,
+								 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
+												dNumGroups,
+												&agg_costs);
+
 				}
 				else if (parse->groupClause)
 				{
@@ -3271,7 +3377,16 @@ create_grouping_paths(PlannerInfo *root,
 											   target,
 											   parse->groupClause,
 											   (List *) parse->havingQual,
-											   dNumGroups));
+											   dNumGroups,
+											   false,
+											   true));
+
+					if (can_parallel)
+						create_parallelgroup_path(root,
+												  input_rel,
+												  grouped_rel,
+												  target,
+												  dNumGroups);
 				}
 				else
 				{
@@ -3342,7 +3457,20 @@ create_grouping_paths(PlannerInfo *root,
 								 parse->groupClause,
 								 (List *) parse->havingQual,
 								 &agg_costs,
-								 dNumGroups));
+								 dNumGroups,
+								 false,
+								 true));
+
+		if (can_parallel)
+			create_parallelagg_path(root,
+									input_rel,
+									grouped_rel,
+									tlist,
+									target,
+									AGG_HASHED,
+									dNumGroups,
+									&agg_costs);
+
 	}
 
 	/* Give a helpful error if we failed to find any implementation */
@@ -3359,6 +3487,167 @@ create_grouping_paths(PlannerInfo *root,
 }
 
 /*
+ * create_parallelagg_path
+ *
+ * Build a new upperrel containing Paths for parallel aggregation.
+ *
+ */
+static void
+create_parallelagg_path(PlannerInfo *root,
+						RelOptInfo *input_rel,
+						RelOptInfo *grouped_rel,
+						List *tlist,
+						PathTarget *target,
+						AggStrategy aggstrategy,
+						double dNumGroups,
+						AggClauseCosts *agg_costs)
+{
+	Query	   *parse = root->parse;
+	List	   *partial_agg_tlist;
+	ListCell   *lc;
+
+	/*
+	 * The underlying Agg targetlist should be a flat tlist of all Vars and
+	 * Aggs needed to evaluate the expressions and final values of aggregates
+	 * present in the main target list. The quals also should be included.
+	 */
+	partial_agg_tlist = make_partial_agg_tlist(
+								 add_qual_to_tlist(tlist, parse->havingQual),
+											   parse->groupClause);
+
+
+	foreach(lc, input_rel->partial_pathlist)
+	{
+		Path	   *path = (Path *) lfirst(lc);
+		double		numPartialGroups;
+
+		if (aggstrategy == AGG_SORTED)
+			path = (Path *) create_sort_path(root,
+											 grouped_rel,
+											 path,
+											 root->group_pathkeys,
+											 -1.0);
+
+		path = (Path *) create_agg_path(root, grouped_rel,
+										path,
+							   make_pathtarget_from_tlist(partial_agg_tlist),
+										aggstrategy,
+										parse->groupClause,
+										(List *) parse->havingQual,
+										agg_costs,
+										dNumGroups,
+										false,
+										false);
+
+		/*
+		 * Estimate the total number of groups which the gather will receive
+		 * from the aggregate worker processes. We'll assume that each worker
+		 * will produce every possible group, this might be an overestimate,
+		 * although it seems safer to over estimate here rather than
+		 * underestimate. To keep this number sane we cap the number of groups
+		 * so it's never larger than the number of rows in the input path.
+		 * This covers the case when there are less than an average of
+		 * parallel_degree input tuples per group.
+		 */
+		numPartialGroups = Min(dNumGroups, path->rows) * (path->parallel_degree + 1);
+
+		path = (Path *) create_gather_path(root, grouped_rel, path, NULL,
+										   &numPartialGroups);
+
+		if (aggstrategy == AGG_SORTED)
+			path = (Path *) create_sort_path(root,
+											 grouped_rel,
+											 path,
+											 root->group_pathkeys,
+											 -1.0);
+
+		add_path(grouped_rel, (Path *)
+				 create_agg_path(root,
+								 grouped_rel,
+								 path,
+								 target,
+								 aggstrategy,
+								 parse->groupClause,
+								 (List *) parse->havingQual,
+								 agg_costs,
+								 dNumGroups,
+								 true,
+								 true));
+	}
+}
+
+/*
+ * create_parallelgroup_path
+ *
+ * Build a new upperrel containing Paths for parallel grouping.
+ *
+ */
+static void
+create_parallelgroup_path(PlannerInfo *root,
+						  RelOptInfo *input_rel,
+						  RelOptInfo *grouped_rel,
+						  PathTarget *target,
+						  double dNumGroups)
+{
+	Query	   *parse = root->parse;
+	ListCell   *lc;
+
+	foreach(lc, input_rel->partial_pathlist)
+	{
+		Path	   *path = (Path *) lfirst(lc);
+		double		numPartialGroups;
+
+		path = (Path *) create_sort_path(root,
+										 grouped_rel,
+										 path,
+										 root->group_pathkeys,
+										 -1.0);
+
+		path = (Path *) create_group_path(root, grouped_rel,
+										  path,
+										  target,
+										  parse->groupClause,
+										  (List *) parse->havingQual,
+										  dNumGroups,
+										  false,
+										  false);
+
+		/*
+		 * Estimate the total number of groups which the gather will receive
+		 * from the aggregate worker processes. We'll assume that each worker
+		 * will produce every possible group, this might be an overestimate,
+		 * although it seems safer to over estimate here rather than
+		 * underestimate. To keep this number sane we cap the number of groups
+		 * so it's never larger than the number of rows in the input path.
+		 * This covers the case when there are less than an average of
+		 * parallel_degree input tuples per group.
+		 */
+		numPartialGroups = Min(dNumGroups, path->rows) * (path->parallel_degree + 1);
+
+		path = (Path *) create_gather_path(root, grouped_rel, path, NULL,
+										   &numPartialGroups);
+
+		path = (Path *) create_sort_path(root,
+										 grouped_rel,
+										 path,
+										 root->group_pathkeys,
+										 -1.0);
+
+		add_path(grouped_rel, (Path *)
+				 create_group_path(root,
+								   grouped_rel,
+								   path,
+								   target,
+								   parse->groupClause,
+								   (List *) parse->havingQual,
+								   dNumGroups,
+								   true,
+								   true));
+	}
+}
+
+
+/*
  * create_window_paths
  *
  * Build a new upperrel containing Paths for window-function evaluation.
@@ -3664,7 +3953,9 @@ create_distinct_paths(PlannerInfo *root,
 								 parse->distinctClause,
 								 NIL,
 								 NULL,
-								 numDistinctRows));
+								 numDistinctRows,
+								 false,
+								 true));
 	}
 
 	/* Give a helpful error if we failed to find any implementation */
@@ -4390,3 +4681,185 @@ plan_cluster_use_sort(Oid tableOid, Oid indexOid)
 
 	return (seqScanAndSortPath.total_cost < indexScanPath->path.total_cost);
 }
+
+/*
+ * make_partial_agg_tlist
+ *	  Generate appropriate Agg node target list for input to ParallelAgg nodes.
+ *
+ * The initial target list passed to ParallelAgg node from the parser contains
+ * aggregates and GROUP BY columns. For the underlying agg node, we want to
+ * generate a tlist containing bare aggregate references (Aggref) and GROUP BY
+ * expressions. So we flatten all expressions except GROUP BY items into their
+ * component variables.
+ * For example, given a query like
+ *		SELECT a+b, 2 * SUM(c+d) , AVG(d)+SUM(c+d) FROM table GROUP BY a+b;
+ * we want to pass this targetlist to the Agg plan:
+ *		a+b, SUM(c+d), AVG(d)
+ * where the a+b target will be used by the Sort/Group steps, and the
+ * other targets will be used for computing the final results.
+ * Note that we don't flatten Aggref's , since those are to be computed
+ * by the underlying Agg node, and they will be referenced like Vars above it.
+ *
+ * 'tlist' is the ParallelAgg's final target list.
+ *
+ * The result is the targetlist to be computed by the Agg node below the
+ * ParallelAgg node.
+ */
+static List *
+make_partial_agg_tlist(List *tlist, List *groupClause)
+{
+	Bitmapset  *sgrefs;
+	List	   *new_tlist;
+	List	   *flattenable_cols;
+	List	   *flattenable_vars;
+	ListCell   *lc;
+
+	/*
+	 * Collect the sortgroupref numbers of GROUP BY clauses into a bitmapset
+	 * for convenient reference below.
+	 */
+	sgrefs = NULL;
+
+	/* Add in sortgroupref numbers of GROUP BY clauses */
+	foreach(lc, groupClause)
+	{
+		SortGroupClause *grpcl = (SortGroupClause *) lfirst(lc);
+
+		sgrefs = bms_add_member(sgrefs, grpcl->tleSortGroupRef);
+	}
+
+	/*
+	 * Construct a tlist containing all the non-flattenable tlist items, and
+	 * save aside the others for a moment.
+	 */
+	new_tlist = NIL;
+	flattenable_cols = NIL;
+
+	foreach(lc, tlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+		/* Don't want to deconstruct GROUP BY items. */
+		if (tle->ressortgroupref != 0 &&
+			bms_is_member(tle->ressortgroupref, sgrefs))
+		{
+			/* Don't want to deconstruct this value, so add to new_tlist */
+			TargetEntry *newtle;
+
+			newtle = makeTargetEntry(tle->expr,
+									 list_length(new_tlist) + 1,
+									 NULL,
+									 false);
+			/* Preserve its sortgroupref marking, in case it's volatile */
+			newtle->ressortgroupref = tle->ressortgroupref;
+			new_tlist = lappend(new_tlist, newtle);
+		}
+		else
+		{
+			/*
+			 * Column is to be flattened, so just remember the expression for
+			 * later call to pull_var_clause.  There's no need for
+			 * pull_var_clause to examine the TargetEntry node itself.
+			 */
+			flattenable_cols = lappend(flattenable_cols, tle->expr);
+		}
+	}
+
+	/*
+	 * Pull out all the Vars and Aggrefs mentioned in flattenable columns, and
+	 * add them to the result tlist if not already present.  (Some might be
+	 * there already because they're used directly as group clauses.)
+	 *
+	 * Note: it's essential to use PVC_INCLUDE_AGGREGATES here, so that the
+	 * Aggrefs are placed in the Agg node's tlist and not left to be computed
+	 * at higher levels.
+	 */
+	flattenable_vars = pull_var_clause((Node *) flattenable_cols,
+									   PVC_INCLUDE_AGGREGATES,
+									   PVC_INCLUDE_PLACEHOLDERS);
+	new_tlist = add_to_flat_tlist(new_tlist, flattenable_vars);
+
+	/* clean up cruft */
+	list_free(flattenable_vars);
+	list_free(flattenable_cols);
+
+	/*
+	 * Update the targetlist aggref->aggtype with the transtype. This is
+	 * required to send the aggregate transition data from workers to the
+	 * backend for combining and returning the final result.
+	 */
+	foreach(lc, new_tlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(lc);
+
+		if (IsA(tle->expr, Aggref))
+		{
+			Aggref	   *aggref = (Aggref *) tle->expr;
+			HeapTuple	aggTuple;
+			Form_pg_aggregate aggform;
+
+			aggTuple = SearchSysCache1(AGGFNOID,
+									   ObjectIdGetDatum(aggref->aggfnoid));
+			if (!HeapTupleIsValid(aggTuple))
+				elog(ERROR, "cache lookup failed for aggregate %u",
+					 aggref->aggfnoid);
+			aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+			aggref->aggtype = aggform->aggtranstype;
+
+			ReleaseSysCache(aggTuple);
+		}
+	}
+
+	return new_tlist;
+}
+
+/*
+ * add_qual_to_tlist
+ *
+ * Add the agg functions in qual into the target list for the use
+ * of partial aggregate target list in parallel aggregate plan
+ */
+static List *
+add_qual_to_tlist(List *targetlist, Node *qual)
+{
+	add_qual_to_tlist_context context;
+
+	if (qual == NULL)
+		return targetlist;
+
+	context.targetlist = copyObject(targetlist);
+	context.resno = list_length(context.targetlist) + 1;;
+
+	add_qual_to_tlist_walker(qual, &context);
+
+	return context.targetlist;
+}
+
+/*
+ * add_qual_in_tlist_walker
+ *		Go through the qual list to get the aggref and add it in targetlist
+ */
+static bool
+add_qual_to_tlist_walker(Node *node, add_qual_to_tlist_context * context)
+{
+	if (node == NULL)
+		return false;
+
+	if (IsA(node, Aggref))
+	{
+		List	   *tlist = context->targetlist;
+		TargetEntry *te = makeNode(TargetEntry);
+
+		te = makeTargetEntry((Expr *) node,
+							 context->resno++,
+							 NULL,
+							 false);
+
+		tlist = lappend(tlist, te);
+	}
+	else
+		return expression_tree_walker(node, add_qual_to_tlist_walker, context);
+
+	return false;
+}
diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c
index d296d09..f935723 100644
--- a/src/backend/optimizer/plan/setrefs.c
+++ b/src/backend/optimizer/plan/setrefs.c
@@ -140,6 +140,16 @@ static bool fix_opfuncids_walker(Node *node, void *context);
 static bool extract_query_dependencies_walker(Node *node,
 								  PlannerInfo *context);
 
+static void set_combineagg_references(PlannerInfo *root, Plan *plan,
+						  int rtoffset);
+static Node *fix_combine_agg_expr(PlannerInfo *root,
+					 Node *node,
+					 indexed_tlist *subplan_itlist,
+					 Index newvarno,
+					 int rtoffset);
+static Node *fix_combine_agg_expr_mutator(Node *node,
+							 fix_upper_expr_context *context);
+
 /*****************************************************************************
  *
  *		SUBPLAN REFERENCES
@@ -667,8 +677,17 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset)
 			}
 			break;
 		case T_Agg:
-			set_upper_references(root, plan, rtoffset);
-			break;
+			{
+				Agg		   *aggplan = (Agg *) plan;
+
+				if (aggplan->combineStates)
+					set_combineagg_references(root, plan, rtoffset);
+				else
+					set_upper_references(root, plan, rtoffset);
+
+				break;
+			}
+
 		case T_Group:
 			set_upper_references(root, plan, rtoffset);
 			break;
@@ -2478,3 +2497,159 @@ extract_query_dependencies_walker(Node *node, PlannerInfo *context)
 	return expression_tree_walker(node, extract_query_dependencies_walker,
 								  (void *) context);
 }
+
+
+static void
+set_combineagg_references(PlannerInfo *root, Plan *plan, int rtoffset)
+{
+	Plan	   *subplan = plan->lefttree;
+	indexed_tlist *subplan_itlist;
+	List	   *output_targetlist;
+	ListCell   *l;
+
+	Assert(IsA(plan, Agg));
+	Assert(((Agg *) plan)->combineStates);
+
+	subplan_itlist = build_tlist_index(subplan->targetlist);
+
+	output_targetlist = NIL;
+
+	foreach(l, plan->targetlist)
+	{
+		TargetEntry *tle = (TargetEntry *) lfirst(l);
+		Node	   *newexpr;
+
+		/* If it's a non-Var sort/group item, first try to match by sortref */
+		if (tle->ressortgroupref != 0 && !IsA(tle->expr, Var))
+		{
+			newexpr = (Node *)
+				search_indexed_tlist_for_sortgroupref((Node *) tle->expr,
+													  tle->ressortgroupref,
+													  subplan_itlist,
+													  OUTER_VAR);
+			if (!newexpr)
+				newexpr = fix_combine_agg_expr(root,
+											   (Node *) tle->expr,
+											   subplan_itlist,
+											   OUTER_VAR,
+											   rtoffset);
+		}
+		else
+			newexpr = fix_combine_agg_expr(root,
+										   (Node *) tle->expr,
+										   subplan_itlist,
+										   OUTER_VAR,
+										   rtoffset);
+		tle = flatCopyTargetEntry(tle);
+		tle->expr = (Expr *) newexpr;
+		output_targetlist = lappend(output_targetlist, tle);
+	}
+
+	plan->targetlist = output_targetlist;
+
+	plan->qual = (List *)
+		fix_upper_expr(root,
+					   (Node *) plan->qual,
+					   subplan_itlist,
+					   OUTER_VAR,
+					   rtoffset);
+
+	pfree(subplan_itlist);
+}
+
+
+/*
+ * Adjust the Aggref'a args to reference the correct Aggref target in the outer
+ * subplan.
+ */
+static Node *
+fix_combine_agg_expr(PlannerInfo *root,
+					 Node *node,
+					 indexed_tlist *subplan_itlist,
+					 Index newvarno,
+					 int rtoffset)
+{
+	fix_upper_expr_context context;
+
+	context.root = root;
+	context.subplan_itlist = subplan_itlist;
+	context.newvarno = newvarno;
+	context.rtoffset = rtoffset;
+	return fix_combine_agg_expr_mutator(node, &context);
+}
+
+static Node *
+fix_combine_agg_expr_mutator(Node *node, fix_upper_expr_context *context)
+{
+	Var		   *newvar;
+
+	if (node == NULL)
+		return NULL;
+	if (IsA(node, Var))
+	{
+		Var		   *var = (Var *) node;
+
+		newvar = search_indexed_tlist_for_var(var,
+											  context->subplan_itlist,
+											  context->newvarno,
+											  context->rtoffset);
+		if (!newvar)
+			elog(ERROR, "variable not found in subplan target list");
+		return (Node *) newvar;
+	}
+	if (IsA(node, Aggref))
+	{
+		TargetEntry *tle;
+		Aggref	   *aggref = (Aggref *) node;
+
+		tle = tlist_member(node, context->subplan_itlist->tlist);
+		if (tle)
+		{
+			/* Found a matching subplan output expression */
+			Var		   *newvar;
+			TargetEntry *newtle;
+
+			newvar = makeVarFromTargetEntry(context->newvarno, tle);
+			newvar->varnoold = 0;		/* wasn't ever a plain Var */
+			newvar->varoattno = 0;
+
+			/* update the args in the aggref */
+
+			/* makeTargetEntry ,always set resno to one for finialize agg */
+			newtle = makeTargetEntry((Expr *) newvar, 1, NULL, false);
+
+			/*
+			 * Updated the args, let the newvar refer to the right position of
+			 * the agg function in the subplan
+			 */
+			aggref->args = list_make1(newtle);
+
+			return (Node *) aggref;
+		}
+		else
+			elog(ERROR, "aggref not found in subplan target list");
+	}
+	if (IsA(node, PlaceHolderVar))
+	{
+		PlaceHolderVar *phv = (PlaceHolderVar *) node;
+
+		/* See if the PlaceHolderVar has bubbled up from a lower plan node */
+		if (context->subplan_itlist->has_ph_vars)
+		{
+			newvar = search_indexed_tlist_for_non_var((Node *) phv,
+													  context->subplan_itlist,
+													  context->newvarno);
+			if (newvar)
+				return (Node *) newvar;
+		}
+		/* If not supplied by input plan, evaluate the contained expr */
+		return fix_upper_expr_mutator((Node *) phv->phexpr, context);
+	}
+	if (IsA(node, Param))
+		return fix_param_node(context->root, (Param *) node);
+
+	fix_expr_common(context->root, node);
+	return expression_tree_mutator(node,
+								   fix_combine_agg_expr_mutator,
+								   (void *) context);
+}
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c
index 6ea3319..fb139af 100644
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -859,7 +859,9 @@ make_union_unique(SetOperationStmt *op, Path *path, List *tlist,
 										groupList,
 										NIL,
 										NULL,
-										dNumGroups);
+										dNumGroups,
+										false,
+										true);
 	}
 	else
 	{
diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c
index 6ac25dc..4681423 100644
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@@ -52,6 +52,10 @@
 #include "utils/syscache.h"
 #include "utils/typcache.h"
 
+typedef struct
+{
+	PartialAggType allowedtype;
+}	partial_agg_context;
 
 typedef struct
 {
@@ -93,6 +97,7 @@ typedef struct
 	bool		allow_restricted;
 } has_parallel_hazard_arg;
 
+static bool partial_aggregate_walker(Node *node, partial_agg_context * context);
 static bool contain_agg_clause_walker(Node *node, void *context);
 static bool count_agg_clauses_walker(Node *node,
 						 count_agg_clauses_context *context);
@@ -400,6 +405,82 @@ make_ands_implicit(Expr *clause)
  *****************************************************************************/
 
 /*
+ * aggregates_allow_partial
+ *		Recursively search for Aggref clauses and determine the maximum
+ *		'degree' of partial aggregation which can be supported. Partial
+ *		aggregation requires that each aggregate does not have a DISTINCT or
+ *		ORDER BY clause, and that it also has a combine function set.
+ */
+PartialAggType
+aggregates_allow_partial(Node *clause)
+{
+	partial_agg_context context;
+
+	/* initially any type is ok, until we find Aggrefs which say otherwise */
+	context.allowedtype = PAT_ANY;
+
+	if (!partial_aggregate_walker(clause, &context))
+		return context.allowedtype;
+	return context.allowedtype;
+}
+
+static bool
+partial_aggregate_walker(Node *node, partial_agg_context * context)
+{
+	if (node == NULL)
+		return false;
+	if (IsA(node, Aggref))
+	{
+		Aggref	   *aggref = (Aggref *) node;
+		HeapTuple	aggTuple;
+		Form_pg_aggregate aggform;
+
+		Assert(aggref->agglevelsup == 0);
+
+		/*
+		 * We can't perform partial aggregation with Aggrefs containing a
+		 * DISTINCT or ORDER BY clause.
+		 */
+		if (aggref->aggdistinct || aggref->aggorder)
+		{
+			context->allowedtype = PAT_DISABLED;
+			return true;		/* abort search */
+		}
+		aggTuple = SearchSysCache1(AGGFNOID,
+								   ObjectIdGetDatum(aggref->aggfnoid));
+		if (!HeapTupleIsValid(aggTuple))
+			elog(ERROR, "cache lookup failed for aggregate %u",
+				 aggref->aggfnoid);
+		aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
+
+		/*
+		 * If there is no combine func, then partial aggregation is not
+		 * possible.
+		 */
+		if (!OidIsValid(aggform->aggcombinefn))
+		{
+			ReleaseSysCache(aggTuple);
+			context->allowedtype = PAT_DISABLED;
+			return true;		/* abort search */
+		}
+
+		/*
+		 * If we find any aggs with an internal transtype then we must ensure
+		 * that pointers to aggregate states are not passed to other
+		 * processes, therefore we set the maximum degree to
+		 * PAT_INTERNAL_ONLY.
+		 */
+		if (aggform->aggtranstype == INTERNALOID)
+			context->allowedtype = PAT_INTERNAL_ONLY;
+
+		ReleaseSysCache(aggTuple);
+		return false;			/* continue searching */
+	}
+	return expression_tree_walker(node, partial_aggregate_walker,
+								  (void *) context);
+}
+
+/*
  * contain_agg_clause
  *	  Recursively search for Aggref/GroupingFunc nodes within a clause.
  *
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index 272f368..c88011a 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -1648,7 +1648,7 @@ translate_sub_tlist(List *tlist, int relid)
  */
 GatherPath *
 create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
-				   Relids required_outer)
+				   Relids required_outer, double *rows)
 {
 	GatherPath *pathnode = makeNode(GatherPath);
 
@@ -1674,7 +1674,7 @@ create_gather_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 		pathnode->single_copy = true;
 	}
 
-	cost_gather(pathnode, root, rel, pathnode->path.param_info);
+	cost_gather(pathnode, root, rel, pathnode->path.param_info, rows);
 
 	return pathnode;
 }
@@ -2282,7 +2282,9 @@ create_group_path(PlannerInfo *root,
 				  PathTarget *target,
 				  List *groupClause,
 				  List *qual,
-				  double numGroups)
+				  double numGroups,
+				  bool combinestates,
+				  bool finalizeGroups)
 {
 	GroupPath  *pathnode = makeNode(GroupPath);
 
@@ -2301,7 +2303,9 @@ create_group_path(PlannerInfo *root,
 	pathnode->subpath = subpath;
 
 	pathnode->groupClause = groupClause;
-	pathnode->qual = qual;
+	pathnode->qual = finalizeGroups ? qual : NULL;
+	pathnode->combineStates = combinestates;
+	pathnode->finalizeGroups = finalizeGroups;
 
 	cost_group(&pathnode->path, root,
 			   list_length(groupClause),
@@ -2393,7 +2397,9 @@ create_agg_path(PlannerInfo *root,
 				List *groupClause,
 				List *qual,
 				const AggClauseCosts *aggcosts,
-				double numGroups)
+				double numGroups,
+				bool combine_agg,
+				bool finalize_agg)
 {
 	AggPath    *pathnode = makeNode(AggPath);
 
@@ -2415,7 +2421,11 @@ create_agg_path(PlannerInfo *root,
 	pathnode->aggstrategy = aggstrategy;
 	pathnode->numGroups = numGroups;
 	pathnode->groupClause = groupClause;
-	pathnode->qual = qual;
+
+	/* Only apply HAVING clause for final aggregation */
+	pathnode->qual = finalize_agg ? qual : NULL;
+	pathnode->combineStates = combine_agg;
+	pathnode->finalizeAggs = finalize_agg;
 
 	cost_agg(&pathnode->path, root,
 			 aggstrategy, aggcosts,
diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h
index 5961f2c..cfbe56c 100644
--- a/src/include/nodes/plannodes.h
+++ b/src/include/nodes/plannodes.h
@@ -698,6 +698,8 @@ typedef struct Group
 	int			numCols;		/* number of grouping columns */
 	AttrNumber *grpColIdx;		/* their indexes in the target list */
 	Oid		   *grpOperators;	/* equality operators to compare with */
+	bool		combineStates;	/* input is partially grouped states */
+	bool		finalizeGroups;	/* Is the Finalize grouping? */
 } Group;
 
 /* ---------------
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h
index 3d7d07e..d0c9f11 100644
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -1270,6 +1270,8 @@ typedef struct GroupPath
 	Path	   *subpath;		/* path representing input source */
 	List	   *groupClause;	/* a list of SortGroupClause's */
 	List	   *qual;			/* quals (HAVING quals), if any */
+	bool		combineStates;	/* input is partially grouped states */
+	bool		finalizeGroups;	/* Is the Finalize grouping? */
 } GroupPath;
 
 /*
@@ -1300,6 +1302,8 @@ typedef struct AggPath
 	double		numGroups;		/* estimated number of groups in input */
 	List	   *groupClause;	/* a list of SortGroupClause's */
 	List	   *qual;			/* quals (HAVING quals), if any */
+	bool		combineStates;	/* input is partially aggregated agg states */
+	bool		finalizeAggs;	/* should the executor call the finalfn? */
 } AggPath;
 
 /*
diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h
index 3b3fd0f..d03ccc9 100644
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@@ -27,6 +27,26 @@ typedef struct
 	List	  **windowFuncs;	/* lists of WindowFuncs for each winref */
 } WindowFuncLists;
 
+/*
+ * PartialAggType
+ *	PartialAggType stores whether partial aggregation is allowed and
+ *	which context it is allowed in. We require three states here as there are
+ *	two different contexts in which partial aggregation is safe. For aggregates
+ *	which have an 'stype' of INTERNAL, within a single backend process it is
+ *	okay to pass a pointer to the aggregate state, as the memory to which the
+ *	pointer points to will belong to the same process. In cases where the
+ *	aggregate state must be passed between different processes, for example
+ *	during parallel aggregation, passing the pointer is not okay due to the
+ *	fact that the memory being referenced won't be accessible from another
+ *	process.
+ */
+typedef enum
+{
+	PAT_ANY = 0,		/* Any type of partial aggregation is ok. */
+	PAT_INTERNAL_ONLY,	/* Some aggregates support only internal mode. */
+	PAT_DISABLED		/* Some aggregates don't support partial mode at all */
+} PartialAggType;
+
 
 extern Expr *make_opclause(Oid opno, Oid opresulttype, bool opretset,
 			  Expr *leftop, Expr *rightop,
@@ -47,6 +67,7 @@ extern Node *make_and_qual(Node *qual1, Node *qual2);
 extern Expr *make_ands_explicit(List *andclauses);
 extern List *make_ands_implicit(Expr *clause);
 
+extern PartialAggType aggregates_allow_partial(Node *clause);
 extern bool contain_agg_clause(Node *clause);
 extern void count_agg_clauses(PlannerInfo *root, Node *clause,
 				  AggClauseCosts *costs);
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index fea2bb7..ce61d70 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -150,7 +150,8 @@ extern void final_cost_hashjoin(PlannerInfo *root, HashPath *path,
 					SpecialJoinInfo *sjinfo,
 					SemiAntiJoinFactors *semifactors);
 extern void cost_gather(GatherPath *path, PlannerInfo *root,
-			RelOptInfo *baserel, ParamPathInfo *param_info);
+			RelOptInfo *baserel, ParamPathInfo *param_info,
+			double *rows);
 extern void cost_subplan(PlannerInfo *root, SubPlan *subplan, Plan *plan);
 extern void cost_qual_eval(QualCost *cost, List *quals, PlannerInfo *root);
 extern void cost_qual_eval_node(QualCost *cost, Node *qual, PlannerInfo *root);
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 37744bf..1cc8bc2 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -74,7 +74,8 @@ extern MaterialPath *create_material_path(RelOptInfo *rel, Path *subpath);
 extern UniquePath *create_unique_path(PlannerInfo *root, RelOptInfo *rel,
 				   Path *subpath, SpecialJoinInfo *sjinfo);
 extern GatherPath *create_gather_path(PlannerInfo *root,
-				   RelOptInfo *rel, Path *subpath, Relids required_outer);
+				   RelOptInfo *rel, Path *subpath, Relids required_outer,
+				   double *rows);
 extern SubqueryScanPath *create_subqueryscan_path(PlannerInfo *root,
 						 RelOptInfo *rel, Path *subpath,
 						 List *pathkeys, Relids required_outer);
@@ -153,7 +154,9 @@ extern GroupPath *create_group_path(PlannerInfo *root,
 				  PathTarget *target,
 				  List *groupClause,
 				  List *qual,
-				  double numGroups);
+				  double numGroups,
+				  bool combinestates,
+				  bool finalizeGroups);
 extern UpperUniquePath *create_upper_unique_path(PlannerInfo *root,
 						 RelOptInfo *rel,
 						 Path *subpath,
@@ -167,7 +170,9 @@ extern AggPath *create_agg_path(PlannerInfo *root,
 				List *groupClause,
 				List *qual,
 				const AggClauseCosts *aggcosts,
-				double numGroups);
+				double numGroups,
+				bool combine_agg,
+				bool finalize_agg);
 extern GroupingSetsPath *create_groupingsets_path(PlannerInfo *root,
 						 RelOptInfo *rel,
 						 Path *subpath,
