 Makefile           |   2 +-
 src/cuda_control.c |   2 +-
 src/datastore.c    |   2 +-
 src/gpuscan.c      | 172 +++++++++++++++++++++++++++++++++++++++++++++++------
 src/main.c         |  10 ++--
 5 files changed, 163 insertions(+), 25 deletions(-)

diff --git a/Makefile b/Makefile
index 90dc685..a41322d 100644
--- a/Makefile
+++ b/Makefile
@@ -18,7 +18,7 @@ PG_VERSION_NUM=$(shell $(PG_CONFIG) --version | awk '{print $$NF}'	\
 # Source file of CPU portion
 STROM_OBJS = main.o codegen.o datastore.o aggfuncs.o \
 		cuda_control.o cuda_program.o cuda_mmgr.o \
-		gpuscan.o gpujoin.o gpupreagg.o gpusort.o
+		gpuscan.o #gpujoin.o gpupreagg.o gpusort.o
 
 # Source file of GPU portion
 CUDA_OBJS = cuda_common.o \
diff --git a/src/cuda_control.c b/src/cuda_control.c
index 6b5f74c..a012a89 100644
--- a/src/cuda_control.c
+++ b/src/cuda_control.c
@@ -1599,7 +1599,7 @@ pgstrom_fetch_gputask(GpuTaskState *gts)
 				{
 					gts->scan_done = true;
 					elog(DEBUG1, "scan done (%s)",
-						 gts->css.methods->CustomName);
+						 gts->css.methods->xnode.extnodename);
 					break;
 				}
 				dlist_push_tail(&gts->pending_tasks, &gtask->chain);
diff --git a/src/datastore.c b/src/datastore.c
index 84147cf..d22d846 100644
--- a/src/datastore.c
+++ b/src/datastore.c
@@ -286,7 +286,7 @@ pgstrom_get_bulkload_density(Plan *child_plan)
 	 * bulk-output. So, we need to walk down if child node has bulk-
 	 * input.
 	 */
-	while (pgstrom_plan_is_gpujoin_bulkinput(child_plan))
+	while (false) //pgstrom_plan_is_gpujoin_bulkinput(child_plan))
 	{
 		Plan	   *curr_plan = child_plan;
 
diff --git a/src/gpuscan.c b/src/gpuscan.c
index 0ad6315..318b234 100644
--- a/src/gpuscan.c
+++ b/src/gpuscan.c
@@ -19,6 +19,7 @@
 #include "access/xact.h"
 #include "catalog/pg_namespace.h"
 #include "miscadmin.h"
+#include "nodes/readfuncs.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
@@ -99,6 +100,15 @@ deform_gpuscan_info(Plan *plan)
 	return result;
 }
 
+typedef struct {
+	CustomScan	cscan;
+	const char *kern_source;
+	int32		extra_flags;
+	List	   *used_params;
+	List	   *used_vars;
+	List	   *dev_quals;
+} GpuScan;
+
 typedef struct
 {
 	GpuTask			task;
@@ -502,7 +512,8 @@ create_gpuscan_plan(PlannerInfo *root,
 					List *clauses,
 					List *custom_children)
 {
-	CustomScan	   *cscan;
+//	CustomScan	   *cscan;
+	GpuScan		   *gscan;
 	GpuScanInfo		gs_info;
 	List		   *host_quals = NIL;
 	List		   *dev_quals = NIL;
@@ -515,6 +526,8 @@ create_gpuscan_plan(PlannerInfo *root,
 	Assert(rel->rtekind == RTE_RELATION);
 	Assert(custom_children == NIL);
 
+	elog(INFO, "best_path => %s", nodeToString(best_path));
+
 	/*
 	 * Distribution of clauses into device executable and others.
 	 *
@@ -543,23 +556,28 @@ create_gpuscan_plan(PlannerInfo *root,
 	/*
 	 * Construction of GpuScanPlan node; on top of CustomPlan node
 	 */
-	cscan = makeNode(CustomScan);
-	cscan->scan.plan.targetlist = tlist;
-	cscan->scan.plan.qual = host_quals;
-	cscan->scan.plan.lefttree = NULL;
-	cscan->scan.plan.righttree = NULL;
-	cscan->scan.scanrelid = rel->relid;
+	gscan = (GpuScan *) newNode(sizeof(GpuScan), T_CustomScan);
+	gscan->cscan.scan.plan.targetlist = tlist;
+	gscan->cscan.scan.plan.qual = host_quals;
+	gscan->cscan.scan.plan.lefttree = NULL;
+	gscan->cscan.scan.plan.righttree = NULL;
+	gscan->cscan.scan.scanrelid = rel->relid;
 
 	gs_info.kern_source = kern_source;
 	gs_info.extra_flags = context.extra_flags | DEVKERNEL_NEEDS_GPUSCAN;
 	gs_info.used_params = context.used_params;
 	gs_info.used_vars = context.used_vars;
 	gs_info.dev_quals = dev_quals;
-	form_gpuscan_info(cscan, &gs_info);
-	cscan->flags = best_path->flags;
-	cscan->methods = &gpuscan_plan_methods;
-
-	return &cscan->scan.plan;
+	form_gpuscan_info(&gscan->cscan, &gs_info);
+	gscan->cscan.flags = best_path->flags;
+	gscan->cscan.methods = &gpuscan_plan_methods;
+	gscan->kern_source = gs_info.kern_source;
+	gscan->extra_flags = gs_info.extra_flags;
+	gscan->used_params = gs_info.used_params;
+	gscan->used_vars = gs_info.used_vars;
+	gscan->dev_quals = gs_info.dev_quals;
+
+	return &gscan->cscan.scan.plan;
 }
 
 /*
@@ -616,6 +634,100 @@ pgstrom_gpuscan_setup_bulkslot(PlanState *outer_planstate,
 	*p_bulk_slot = css->ss.ss_ScanTupleSlot;
 }
 
+static void
+gpuscan_node_copy(Node *_newnode, const Node *_oldnode)
+{
+	GpuScan		   *newnode = (GpuScan *) _newnode;
+	const GpuScan  *oldnode = (const GpuScan *) _oldnode;
+
+	newnode->kern_source = (oldnode->kern_source != NULL
+							? pstrdup(oldnode->kern_source)
+							: NULL);
+	newnode->extra_flags = oldnode->extra_flags;
+	newnode->used_params = copyObject(oldnode->used_params);
+	newnode->used_vars   = copyObject(oldnode->used_vars);
+	newnode->dev_quals   = copyObject(oldnode->dev_quals);
+}
+
+static void
+_outToken(StringInfo str, const char *s)
+{
+	if (s == NULL || *s == '\0')
+	{
+		appendStringInfoString(str, "<>");
+		return;
+	}
+
+	/*
+	 * Look for characters or patterns that are treated specially by read.c
+	 * (either in pg_strtok() or in nodeRead()), and therefore need a
+	 * protective backslash.
+	 */
+	/* These characters only need to be quoted at the start of the string */
+	if (*s == '<' ||
+		*s == '\"' ||
+		isdigit((unsigned char) *s) ||
+		((*s == '+' || *s == '-') &&
+		 (isdigit((unsigned char) s[1]) || s[1] == '.')))
+		appendStringInfoChar(str, '\\');
+	while (*s)
+	{
+		/* These chars must be backslashed anywhere in the string */
+		if (*s == ' ' || *s == '\n' || *s == '\t' ||
+			*s == '(' || *s == ')' || *s == '{' || *s == '}' ||
+			*s == '\\')
+			appendStringInfoChar(str, '\\');
+		appendStringInfoChar(str, *s++);
+	}
+}
+
+static void
+gpuscan_node_out(StringInfo str, const Node *node)
+{
+	const GpuScan  *gscan = (const GpuScan *) node;
+
+	appendStringInfo(str, " :kern_source ");
+	_outToken(str, gscan->kern_source);
+
+	appendStringInfo(str, " :extra_flags %u", gscan->extra_flags);
+
+	appendStringInfo(str, " :used_params %s",
+					 nodeToString(gscan->used_params));
+	appendStringInfo(str, " :used_vars %s",
+					 nodeToString(gscan->used_vars));
+	appendStringInfo(str, " :dev_quals %s",
+					 nodeToString(gscan->dev_quals));
+}
+
+static void
+gpuscan_node_read(Node *node)
+{
+	GpuScan	   *gscan = (GpuScan *) node;
+	char	   *token;
+	int			length;
+
+	/* :kern_source */
+	token = pg_strtok(&length);
+	token = pg_strtok(&length);
+	gscan->kern_source = (length == 0 ? NULL : debackslash(token, length));
+	/* :extra_flags */
+	token = pg_strtok(&length);
+	token = pg_strtok(&length);
+	gscan->extra_flags = (unsigned int ) strtoul(token, NULL, 10);
+	/* :used_params */
+	token = pg_strtok(&length);
+	token = pg_strtok(&length);
+	gscan->used_params = nodeRead(token, length);
+	/* :used_vars */
+	token = pg_strtok(&length);
+	token = pg_strtok(&length);
+	gscan->used_vars = nodeRead(token, length);
+	/* :dev_quals */
+	token = pg_strtok(&length);
+	token = pg_strtok(&length);
+	gscan->dev_quals = nodeRead(token, length);
+}
+
 /*
  * gpuscan_create_scan_state
  *
@@ -646,6 +758,15 @@ gpuscan_begin(CustomScanState *node, EState *estate, int eflags)
 	GpuContext	   *gcontext = NULL;
 	GpuScanState   *gss = (GpuScanState *) node;
 	GpuScanInfo	   *gs_info = deform_gpuscan_info(node->ss.ps.plan);
+	char		   *test_str;
+	Node		   *test_node;
+
+	/* test for serialization/deserialization */
+	elog(INFO, "test-1: %s", nodeToString(node->ss.ps.plan));
+	test_str = nodeToString(copyObject(node->ss.ps.plan));
+	elog(INFO, "test-2: %s", test_str);
+	test_node = stringToNode(test_str);
+	elog(INFO, "test-3: %s", nodeToString(test_node));
 
 	/* gpuscan should not have inner/outer plan right now */
 	Assert(outerPlan(node) == NULL);
@@ -962,35 +1083,52 @@ pgstrom_init_gpuscan(void)
 
 	/* setup path methods */
 	memset(&gpuscan_path_methods, 0, sizeof(gpuscan_path_methods));
-	gpuscan_path_methods.CustomName			= "GpuScan";
+	gpuscan_path_methods.xnode.extnodename  = "GpuScanPath";
+	gpuscan_path_methods.xnode.node_size    = sizeof(GpuScanPath);
 	gpuscan_path_methods.PlanCustomPath		= create_gpuscan_plan;
+	RegisterExtensibleNodeMethods(&gpuscan_path_methods.xnode);
 
 	/* setup plan methods */
 	memset(&gpuscan_plan_methods, 0, sizeof(gpuscan_plan_methods));
-	gpuscan_plan_methods.CustomName			= "GpuScan";
+	gpuscan_plan_methods.xnode.extnodename  = "GpuScan";
+	gpuscan_plan_methods.xnode.node_size    = sizeof(GpuScan);
+	gpuscan_plan_methods.xnode.nodeCopy     = gpuscan_node_copy;
+	gpuscan_plan_methods.xnode.nodeOut      = gpuscan_node_out;
+	gpuscan_plan_methods.xnode.nodeRead     = gpuscan_node_read;
 	gpuscan_plan_methods.CreateCustomScanState = gpuscan_create_scan_state;
+	RegisterExtensibleNodeMethods(&gpuscan_plan_methods.xnode);
 
 	memset(&bulkscan_plan_methods, 0, sizeof(bulkscan_plan_methods));
-	bulkscan_plan_methods.CustomName		= "BulkScan";
+	bulkscan_plan_methods.xnode.extnodename = "BulkScan";
+	bulkscan_plan_methods.xnode.node_size   = sizeof(GpuScan);
+	bulkscan_plan_methods.xnode.nodeCopy    = gpuscan_node_copy;
+	bulkscan_plan_methods.xnode.nodeOut     = gpuscan_node_out;
+	bulkscan_plan_methods.xnode.nodeRead    = gpuscan_node_read;
 	bulkscan_plan_methods.CreateCustomScanState = gpuscan_create_scan_state;
+	RegisterExtensibleNodeMethods(&bulkscan_plan_methods.xnode);
 
 	/* setup exec methods */
 	memset(&gpuscan_exec_methods, 0, sizeof(gpuscan_exec_methods));
-	gpuscan_exec_methods.c.CustomName         = "GpuScan";
+	gpuscan_exec_methods.c.xnode.extnodename  = "GpuScanState";
+	gpuscan_exec_methods.c.xnode.node_size    = sizeof(GpuScanState);
 	gpuscan_exec_methods.c.BeginCustomScan    = gpuscan_begin;
 	gpuscan_exec_methods.c.ExecCustomScan     = gpuscan_exec;
 	gpuscan_exec_methods.c.EndCustomScan      = gpuscan_end;
 	gpuscan_exec_methods.c.ReScanCustomScan   = gpuscan_rescan;
 	gpuscan_exec_methods.c.ExplainCustomScan  = gpuscan_explain;
 	gpuscan_exec_methods.ExecCustomBulk       = gpuscan_exec_bulk;
+	RegisterExtensibleNodeMethods(&gpuscan_exec_methods.c.xnode);
 
-	bulkscan_exec_methods.c.CustomName        = "BulkScan";
+	memset(&bulkscan_exec_methods, 0, sizeof(bulkscan_exec_methods));
+	bulkscan_exec_methods.c.xnode.extnodename = "BulkScanState";
+	bulkscan_exec_methods.c.xnode.node_size   = sizeof(GpuScanState);
 	bulkscan_exec_methods.c.BeginCustomScan   = gpuscan_begin;
 	bulkscan_exec_methods.c.ExecCustomScan    = gpuscan_exec;
 	bulkscan_exec_methods.c.EndCustomScan     = gpuscan_end;
 	bulkscan_exec_methods.c.ReScanCustomScan  = gpuscan_rescan;
 	bulkscan_exec_methods.c.ExplainCustomScan = gpuscan_explain;
 	bulkscan_exec_methods.ExecCustomBulk      = gpuscan_exec_bulk;
+	RegisterExtensibleNodeMethods(&bulkscan_exec_methods.c.xnode);
 
 	/* hook registration */
 	set_rel_pathlist_next = set_rel_pathlist_hook;
diff --git a/src/main.c b/src/main.c
index e0c2c96..7e2efe9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -208,7 +208,7 @@ pgstrom_recursive_grafter(PlannedStmt *pstmt, Plan *parent, Plan **p_curr_plan)
 			 * Try to inject GpuPreAgg plan if cost of the aggregate plan
 			 * is enough expensive to justify preprocess by GPU.
 			 */
-			pgstrom_try_insert_gpupreagg(pstmt, (Agg *) plan);
+			//pgstrom_try_insert_gpupreagg(pstmt, (Agg *) plan);
 			break;
 
 		case T_SubqueryScan:
@@ -311,7 +311,7 @@ pgstrom_recursive_grafter(PlannedStmt *pstmt, Plan *parent, Plan **p_curr_plan)
 			 * Try to replace Sort node by GpuSort node if cost of
 			 * the alternative plan is enough reasonable to replace.
 			 */
-			pgstrom_try_insert_gpusort(pstmt, p_curr_plan);
+			//pgstrom_try_insert_gpusort(pstmt, p_curr_plan);
 			break;
 
 		default:
@@ -386,9 +386,9 @@ _PG_init(void)
 
 	/* registration of custom-scan providers */
 	pgstrom_init_gpuscan();
-	pgstrom_init_gpujoin();
-	pgstrom_init_gpupreagg();
-	pgstrom_init_gpusort();
+//	pgstrom_init_gpujoin();
+//	pgstrom_init_gpupreagg();
+//	pgstrom_init_gpusort();
 
 	/* miscellaneous initializations */
 	pgstrom_init_misc_guc();
