Changeset: 2f740b0aabd2 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=2f740b0aabd2
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
Branch: rdf
Log Message:

improve layout and data presentation of survey data
- remove quotes and language tags from strings
- indicate multi-valued properties with a star *
- indicate FK properties with a reference ->ReferencedTableName
- use only last part of URI for type property values


diffs (165 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -6190,8 +6190,43 @@ str printSampleData(CSSample *csSample, 
 #endif
 
 #if NO_OUTPUTFILE == 0
+static
+void printPropertyWithMarkers(FILE *fout, str propStr, CSSampleExtend 
*csSampleEx, CSPropTypes *csPropTypes, int tblId, int propId, BATiter mapi, BAT 
*mbat) {
+       // print property string
+       fprintf(fout, "%s", propStr);
+
+       // add star (*) if multi-valued
+       if (csSampleEx[tblId].lstIsMVCol[propId]) {
+               fprintf(fout, "*");
+       }
+
+       // add reference (->) if FK
+       if (csPropTypes[tblId].lstPropTypes[propId].isFKProp == 1) {
+               str nameStr;
+               int refTblId = csPropTypes[tblId].lstPropTypes[propId].refTblId;
+               if (csSampleEx[refTblId].candidatesOrdered[0] != BUN_NONE) { // 
table name (= best candidate) available
+#if USE_SHORT_NAMES
+                       str nameStrShort;
+#endif
+                       getStringName(csSampleEx[tblId].candidatesOrdered[0], 
&nameStr, mapi, mbat, 1);
+#if USE_SHORT_NAMES
+                       getPropNameShort(&nameStrShort, nameStr);
+                       fprintf(fout, "->%s", nameStrShort);
+                       GDKfree(nameStrShort);
+#else
+                       fprintf(fout, "->%s", nameStr);
+#endif
+                       GDKfree(nameStr);
+               } else { // no table name
+                       fprintf(fout, "->Table%d", refTblId);
+               }
+       }
+}
+#endif
+
+#if NO_OUTPUTFILE == 0
 static 
-str printFullSampleData(CSSampleExtend *csSampleEx, int num, BAT *mbat, 
PropStat *propStat, CSset *freqCSset){
+str printFullSampleData(CSSampleExtend *csSampleEx, int num, BAT *mbat, 
PropStat *propStat, CSset *freqCSset, CSPropTypes *csPropTypes){
 
        int     i,j, k; 
        FILE    *fout, *foutrand, *foutsol, *fouttb, *foutis; 
@@ -6230,13 +6265,24 @@ str printFullSampleData(CSSampleExtend *
        int     found = 0;
        CS      freqCS;
 
-
-       mapi = bat_iterator(mbat);
+       oid     *typeAttributesOids;
+       char    *isTypeProp; // 1 if property is in typeAttributes[]
+
        if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
                throw(RDF, "rdf.rdfschema",
                                "could not open the tokenizer\n");
        }
        
+       // get oids for typeAttributes[]
+       typeAttributesOids = GDKmalloc(sizeof(oid) * typeAttributesCount);
+       if (!typeAttributesOids){
+               fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
+       }
+       for (i = 0; i < typeAttributesCount; ++i) {
+               TKNZRappend(&typeAttributesOids[i], &typeAttributes[i]);
+       }
+
+       mapi = bat_iterator(mbat);
 
        strcpy(filename, "sampleDataFull");
        strcat(filename, ".txt");
@@ -6336,6 +6382,24 @@ str printFullSampleData(CSSampleExtend *
                else 
                        fprintf(fouttb,"CREATE TABLE tbSample%d \n (\n", i);
 
+               // mark type columns, because their sample data is represented 
without <...>
+               isTypeProp = GDKmalloc(sizeof(char) * sample.numProp);
+               if (!isTypeProp){
+                       fprintf(stderr, "ERROR: Couldn't malloc memory!\n");
+               }
+               for (j = 0; j < sample.numProp; ++j) {
+                       isTypeProp[j] = 0;
+               }
+               for (j = 0; j < sample.numProp; ++j) {
+                       for (k = 0; k < typeAttributesCount; ++k) {
+                               if (sample.lstProp[j] == typeAttributesOids[k]) 
{
+                                       // found a type property
+                                       isTypeProp[j] = 1;
+                                       break;
+                               }
+                       }
+               }
+
                // Compute property order (descending by support) and number of 
properties that are printed
                found = 0;
                numPropsInSampleTable = 
(sample.numProp>(1+NUM_PROP_SUPPORT_SAMPLE+NUM_PROP_TFIDF_SAMPLE))?(1+NUM_PROP_SUPPORT_SAMPLE+NUM_PROP_TFIDF_SAMPLE):sample.numProp;
@@ -6424,7 +6488,8 @@ str printFullSampleData(CSSampleExtend *
                        takeOid(sample.lstProp[index], &propStr);       
 #if USE_SHORT_NAMES
                        getPropNameShort(&propStrShort, propStr);
-                       fprintf(fout,"|%s", propStrShort);
+                       fprintf(fout,"|");
+                       printPropertyWithMarkers(fout, propStrShort, 
csSampleEx, csPropTypes, i, index, mapi, mbat);
 
                        pch = strstr (propStrShort,"-");
                        if (pch != NULL) *pch = '\0';   //Remove - characters 
from prop  //WEBCRAWL specific problem
@@ -6468,7 +6533,8 @@ str printFullSampleData(CSSampleExtend *
 
                        GDKfree(propStrShort);
 #else
-                       fprintf(fout,";%s", propStr);
+                       fprintf(fout, "|");
+                       printPropertyWithMarkers(fout, propStr, csSampleEx, 
csPropTypes, i, index, mapi, mbat);
 #endif
                        GDKfree(propStr);
                }
@@ -6503,7 +6569,12 @@ str printFullSampleData(CSSampleExtend *
                                                        takeOid(*objOid, 
&objStr);
                                                        
getPropNameShort(&objStrShort, objStr);
 
-                                                       fprintf(fout,"|<%s>", 
objStrShort);
+                                                       if (isTypeProp[index]) {
+                                                               // type props 
are printed without <...>
+                                                               
fprintf(fout,"|%s", objStrShort);
+                                                       } else {
+                                                               
fprintf(fout,"|<%s>", objStrShort);
+                                                       }
                                                        fprintf(foutis,"|<%s>", 
objStrShort);
                                                        GDKfree(objStrShort);
                                                        GDKfree(objStr);
@@ -6544,8 +6615,11 @@ str printFullSampleData(CSSampleExtend *
                                                fprintf(foutis,"|NULL");
                                        }
                                        else{
-                                               fprintf(fout,"|%s", objStr);
+                                               str objStrShort;
+                                               
getStringBetweenQuotes(&objStrShort, objStr); // remove quotes and language tags
+                                               fprintf(fout,"|%s", 
objStrShort);
                                                fprintf(foutis,"| %s", objStr);
+                                               GDKfree(objStrShort);
                                        }
                                }
 
@@ -6596,6 +6670,8 @@ str printFullSampleData(CSSampleExtend *
                        
        }
 
+       GDKfree(typeAttributesOids);
+
        fclose(fout);
        fclose(foutsol);
        fclose(foutrand);
@@ -7090,7 +7166,7 @@ str getFullSampleData(CStableStat* cstab
        
        initFullSampleData(csSampleEx, mTblIdxFreqIdxMapping, labels, 
cstablestat, csPropTypes, freqCSset, numTables, lmapbatid, rmapbatid);
 
-       printFullSampleData(csSampleEx, numTables, mbat, propStat, freqCSset);
+       printFullSampleData(csSampleEx, numTables, mbat, propStat, freqCSset, 
csPropTypes);
        
        freeSampleExData(csSampleEx, numTables);
        BBPunfix(mbat->batCacheid);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to