Changeset: b2be3bc23d42 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b2be3bc23d42
Modified Files:
        pathfinder/src/sqlhelpers/xmlshred/encoding.c
        pathfinder/src/sqlhelpers/xmlshred/include/encoding.h
        pathfinder/src/sqlhelpers/xmlshred/main.c
Branch: default
Log Message:

Adjustments to pfshred:
- Extended prefix printing (now prefixes are stored in a separate file as well.
- Generated dummy values for prefixes, uris and local names for all node kinds.
- Added parent information to be recorded for shredding to X100 (-x).


diffs (truncated from 450 to 300 lines):

diff --git a/pathfinder/src/sqlhelpers/xmlshred/encoding.c 
b/pathfinder/src/sqlhelpers/xmlshred/encoding.c
--- a/pathfinder/src/sqlhelpers/xmlshred/encoding.c
+++ b/pathfinder/src/sqlhelpers/xmlshred/encoding.c
@@ -50,6 +50,7 @@
 FILE *out;
 FILE *out_attr;
 FILE *out_names;
+FILE *out_prefixes;
 FILE *out_uris;
 FILE *guide_out;
 FILE *err;
@@ -77,12 +78,18 @@
                                   ? (g)->guide             \
                                   : 0)
 
-/* localname and URI hash tables */
+/* localname, prefix, and URI hash tables */
 static hashtable_t localname_hash;
+static hashtable_t prefixes_hash;
 static hashtable_t uris_hash;
+/* localname, prefix, and URI counters */
+static unsigned int global_localname_id;
+static unsigned int global_prefix_id;
+static unsigned int global_uri_id;
 
 #define BAILOUT(...) do { SHoops (SH_FATAL, __VA_ARGS__);   \
                           free_hashtable (localname_hash);  \
+                          free_hashtable (prefixes_hash);   \
                           free_hashtable (uris_hash);       \
                           free (buffer);                    \
                           exit (1);                         \
@@ -197,6 +204,12 @@
 }
     
 void
+print_prefix_id (FILE *f, node_t tuple)
+{
+    fprintf (f, "%i", tuple.prefix_id);
+}
+
+void
 print_prefix_str (FILE *f, node_t tuple)
 {
     fputs ((char *) tuple.prefix, f);
@@ -250,20 +263,23 @@
 static void lambda_P (node_t n) 
 { if (n.parent) lambda_E (*(n.parent)); }
 
+static void lambda_x (node_t n) 
+{ print_prefix_id (out, n); }
+
 static void lambda_X (node_t n) 
-{ if (n.uri_id != -1) print_prefix_str (out, n); }
+{ print_prefix_str (out, n); }
 
 static void lambda_n (node_t n) 
-{ if (n.localname_id != -1) print_localname_id (out, n); }
+{ print_localname_id (out, n); }
 
 static void lambda_N (node_t n) 
-{ if (n.localname_id != -1) print_localname_str (out, n); }
+{ print_localname_str (out, n); }
 
 static void lambda_u (node_t n) 
-{ if (n.uri_id != -1) print_uri_id (out, n); }
+{ print_uri_id (out, n); }
 
 static void lambda_U (node_t n) 
-{ if (n.uri_id != -1) print_uri_str (out, n); }
+{ print_uri_str (out, n); }
 
 static void lambda_d (node_t n) 
 { print_number (out, n); }
@@ -295,8 +311,9 @@
   , ['p'] = lambda_p
   , ['P'] = lambda_P
   , ['n'] = lambda_n
+  , ['N'] = lambda_N
+  , ['x'] = lambda_x
   , ['X'] = lambda_X
-  , ['N'] = lambda_N
   , ['u'] = lambda_u
   , ['U'] = lambda_U
   , ['d'] = lambda_d
@@ -355,52 +372,35 @@
 }
 
 static int
-generate_localname_id (const xmlChar *localname)
+generate_id (const xmlChar *str, unsigned int *global_id,
+             hashtable_t ht, FILE *f)
 {
-    static unsigned int global_localname_id = 0;
-    int localname_id;
+    int id;
    
-    if (!localname)
-        return -1;
+    if (!str)
+        return 0; /* we ensured in initialize() that there is
+                     always an entry 0 */
                      
-    localname_id = hashtable_find (localname_hash, (char *) localname);
+    id = hashtable_find (ht, (char *) str);
 
-    if (NOKEY (localname_id)) {
+    if (NOKEY (id)) {
         /* key not found, create a new name id */
-        localname_id = global_localname_id++;
-        /* add the (localname, localname_id) pair into the hash table */
-        hashtable_insert (localname_hash, (char *) localname, localname_id);
+        id = (*global_id)++;
+        /* add the (str, id) pair into the hash table */
+        hashtable_insert (ht, (char *) str, id);
         /* print the name binding if necessary */
         if (shredstate.names_separate)
-            fprintf (out_names, "%i, \"%s\"\n", localname_id, (char*) 
localname);
+            fprintf (f, "%i, \"%s\"\n", id, (char*) str);
     }
 
-    return localname_id;
+    return id;
 }
-
-static int
-generate_uri_id (const xmlChar *URI)
-{  
-    static unsigned int global_uri_id = 0;
-    int uri_id;
-    
-    if (!URI)
-        return -1;
-        
-    uri_id = hashtable_find (uris_hash, (char *) URI);
-
-    if (NOKEY (uri_id)) {
-        /* key not found, create a new URI id */
-        uri_id = global_uri_id++;
-        /* add the (URI, uri_id) pair to the hash table */
-        hashtable_insert (uris_hash, (char *) URI, uri_id);
-        /* print the URI binding if necessary */
-        if (shredstate.names_separate)
-            fprintf (out_uris, "%i, \"%s\"\n", uri_id, (char*) URI);
-    }
-
-    return uri_id;
-}
+#define generate_localname_id(str) \
+        generate_id((str),&global_localname_id,localname_hash,out_names)
+#define generate_prefix_id(str) \
+        generate_id((str),&global_prefix_id,prefixes_hash,out_prefixes)
+#define generate_uri_id(str) \
+        generate_id((str),&global_uri_id,uris_hash,out_uris)
 
 static void
 flush_node (kind_t kind, 
@@ -425,6 +425,8 @@
         prefix_copy = strndup ((char *) prefix, text_size);
     } else if (prefix)
         prefix_copy = strdup ((char *) prefix);
+    else
+        prefix_copy = strdup ("");
         
     /* check if tagname is larger than text_size characters */
     if (text_size && localname &&
@@ -433,6 +435,8 @@
         localname_copy = strndup ((char *) localname, text_size);
     } else if (localname)
         localname_copy = strdup ((char *) localname);
+    else
+        localname_copy = strdup ("");
     
     /* check if uri is larger than text_size characters */
     if (text_size && URI && (unsigned int) xmlStrlen (URI) > text_size) {
@@ -440,6 +444,8 @@
         URI_copy = strndup ((char *) URI, text_size);
     } else if (URI)
         URI_copy = strdup ((char *) URI);
+    else
+        URI_copy = strdup ("");
 
     /* check if value is larger than text_size characters */
     if (text_size && value && (unsigned int) xmlStrlen (value) > text_size) {
@@ -461,6 +467,7 @@
       , .level          = level
       , .kind           = kind
       , .prefix         = prefix_copy
+      , .prefix_id      = generate_prefix_id (prefix)
       , .localname      = localname_copy
       , .localname_id   = generate_localname_id (localname)
       , .uri            = URI_copy
@@ -538,10 +545,11 @@
         , .level          = level
         , .kind           = doc
         , .prefix         = NULL
+        , .prefix_id      = 0
         , .localname      = NULL
-        , .localname_id   = -1
+        , .localname_id   = 0
         , .uri            = NULL
-        , .uri_id         = -1
+        , .uri_id         = 0
         , .value          = doc_name
         , .guide          = guide
     };
@@ -649,6 +657,7 @@
       , .level          = level
       , .kind           = elem
       , .prefix         = prefix_copy
+      , .prefix_id      = generate_prefix_id (prefix) 
       , .localname      = localname_copy
       , .localname_id   = generate_localname_id (localname) 
       , .uri            = URI_copy
@@ -676,18 +685,21 @@
                     
                 if (shredstate.names_separate)
                     fprintf (out_attr,
-                             SSZFMT ", " SSZFMT ", %i, %i, \"%.*s\"",
+                             SSZFMT ", " SSZFMT ", %i, %i, %i, \"%.*s\"",
                              att_id++,
                              pre,
+                             generate_prefix_id (atts[1]),
                              generate_uri_id (atts[2]),
                              generate_localname_id (atts[0]),
                              (int) (atts[4] - atts[3]),
                              (char*) atts[3]);
                 else                           
                     fprintf (out_attr,
-                             SSZFMT ", " SSZFMT ", \"%s\", \"%s\", \"%.*s\"",
+                             SSZFMT ", " SSZFMT 
+                             ", \"%s\", \"%s\", \"%s\", \"%.*s\"",
                              att_id++,
                              pre,
+                             (char*) atts[1],
                              (char*) atts[2],
                              (char*) atts[0],
                              (int) (atts[4] - atts[3]),
@@ -896,15 +908,17 @@
 initialize (FILE *shout,
             FILE *attout,
             FILE *namesout,
+            FILE *prefixesout,
             FILE *urisout)
 {
     /* bind the different output files to global variables
        to make them accessible inside the callback functions */
-    out       = shout;
-    out_attr  = attout;
-    out_names = namesout; 
-    out_uris  = urisout;
-    err       = stderr;
+    out          = shout;
+    out_attr     = attout;
+    out_names    = namesout; 
+    out_prefixes = prefixesout; 
+    out_uris     = urisout;
+    err          = stderr;
 
     /* how many characters should be stored in
      * the value column */
@@ -917,11 +931,25 @@
     /* compile the -F format string */
     compile_fmt (shredstate.format);
     
-    /* initialize localname and URI hashes */
+    /* initialize localname, prefix, and URI hashes */
     localname_hash = new_hashtable (); 
+    prefixes_hash = new_hashtable ();
     uris_hash = new_hashtable ();
-    /* pre-insert entry for empty namespace URIs */
+    /* initialize localname, prefix, and URI counters */
+    global_localname_id = 0;
+    global_prefix_id    = 0;
+    global_uri_id       = 0;
+
+    /* pre-insert entries */
+    generate_localname_id ((xmlChar *) "");
+    generate_prefix_id ((xmlChar *) "");
     generate_uri_id ((xmlChar *) "");
+
+    /* ensure that there is at least one id
+       for localname, prefix, and URI */
+    assert (global_localname_id == 1 &&
+            global_prefix_id == 1 &&
+            global_uri_id == 1);
     
     /* Whether to escape the quotes in the textnode content */
     if (shredstate.escape_quotes)
@@ -957,6 +985,7 @@
             FILE *shout,
             FILE *attout,
             FILE *namesout,
+            FILE *prefixesout,
             FILE *urisout,
             FILE *guideout,
             shred_state_t *status)
@@ -966,7 +995,7 @@
     shredstate = *status;
 
     assert (shout);
_______________________________________________
Checkin-list mailing list
Checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to