Changeset: 654b3064edbb for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=654b3064edbb
Modified Files:
        monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
        monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
        monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
        monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
        monetdb5/extras/rdf/rdflabels.c
Branch: rdf
Log Message:

Change the priority in choosing name.

Ontology-based type value > ontology > type value > fk

Also add several ontology metadata information.


diffs (299 lines):

diff --git a/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh 
b/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
--- a/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
+++ b/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh
@@ -116,6 +116,32 @@ sed -i "s:AttFile:${PWD}/ontAttribute.og
 
 mclient < loadtmp.sql
 
+#opengraphschema
+NUMMETADATA=`cat ontMetadata.opengraphschema.csv | wc -l`
+NUMATTRIBUTES=`cat ontAttribute.opengraphschema.csv | wc -l`
+
+cp loadOntologySAMPLE.sql loadtmp.sql
+sed -i "s:NUMMETADATA:$NUMMETADATA:g" loadtmp.sql
+sed -i "s:NUMATTRIBUTES:$NUMATTRIBUTES:g" loadtmp.sql
+sed -i "s:MetaFile:${PWD}/ontMetadata.opengraphschema.csv:g" loadtmp.sql
+sed -i "s:AttFile:${PWD}/ontAttribute.opengraphschema.csv:g" loadtmp.sql
+
+
+mclient < loadtmp.sql
+
+#Dublin core
+NUMMETADATA=`cat ontMetadata.dc.csv | wc -l`
+NUMATTRIBUTES=`cat ontAttribute.dc.csv | wc -l`
+
+cp loadOntologySAMPLE.sql loadtmp.sql
+sed -i "s:NUMMETADATA:$NUMMETADATA:g" loadtmp.sql
+sed -i "s:NUMATTRIBUTES:$NUMATTRIBUTES:g" loadtmp.sql
+sed -i "s:MetaFile:${PWD}/ontMetadata.dc.csv:g" loadtmp.sql
+sed -i "s:AttFile:${PWD}/ontAttribute.dc.csv:g" loadtmp.sql
+
+
+mclient < loadtmp.sql
+
 #List of possible ontologies
 NUMONT=`cat ontList.csv | wc -l`
 
diff --git a/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv 
b/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
--- a/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
+++ b/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv
@@ -48,9 +48,10 @@ http://ogp.me/ns#Company|http://ogp.me/n
 http://ogp.me/ns#Company|http://ogp.me/ns/fb#admins
 http://ogp.me/ns#Company|http://ogp.me/ns/fb#app_id
 http://ogp.me/ns#Company|http://ogp.me/ns/fb#profile_id
-http://ogp.me/ns#Game|http://ogp.me/ns/fb#admins
-http://ogp.me/ns#Game|http://ogp.me/ns/fb#app_id
-http://ogp.me/ns#Game|http://ogp.me/ns/fb#profile_id
+http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#admins
+http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#app_id
+http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#profile_id
+http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#page_id
 http://ogp.me/ns#Game|http://ogp.me/ns/Game#points
 http://ogp.me/ns#Game|http://ogp.me/ns/Game#secret
 http://ogp.me/ns#Product|http://ogp.me/ns/Product#age_group
@@ -95,3 +96,9 @@ http://ogp.me/ns#Website|http://ogp.me/n
 http://ogp.me/ns#Website|http://ogp.me/ns/fb#admins
 http://ogp.me/ns#Website|http://ogp.me/ns/fb#app_id
 http://ogp.me/ns#Website|http://ogp.me/ns/fb#profile_id
+http://ogp.mc/ns#Website|http://ogp.mc/ns#description
+http://ogp.mc/ns#Website|http://ogp.mc/ns#image
+http://ogp.mc/ns#Website|http://ogp.mc/ns#site_name
+http://ogp.mc/ns#Website|http://ogp.mc/ns#title
+http://ogp.mc/ns#Website|http://ogp.mc/ns#type
+http://ogp.mc/ns#Website|http://ogp.mc/ns#url
diff --git a/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv 
b/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
--- a/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
+++ b/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv
@@ -1,9 +1,13 @@
 
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#country-name
 
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#locality
+http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#photo
 
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#postal-code
 http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#region
 
http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#street-address
 
http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#child
+http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#photo
 
http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#title
 http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#url
 
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#availability
@@ -11,7 +15,9 @@ http://rdf.data-vocabulary.org/#Offer|ht
 http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#currency
 
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#identifier
 
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#itemoffered
+http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#name
 http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#offerurl
+http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#photo
 http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#price
 
http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#pricevaliduntil
 http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#quantity
@@ -21,10 +27,14 @@ http://rdf.data-vocabulary.org/#OfferAgg
 
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#highprice
 
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#itemoffered
 
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#lowprice
+http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#name
 
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#offercount
 
http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#offerurl
+http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#photo
 
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#address
 
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#category
+http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#photo
 
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#pricerange
 
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#tel
 
http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#url
@@ -34,7 +44,9 @@ http://rdf.data-vocabulary.org/#Person|h
 
http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#colleague
 http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#contact
 http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#friend
+http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#name
 http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#nickname
+http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#photo
 http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#role
 http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#tel
 http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#title
@@ -44,17 +56,23 @@ http://rdf.data-vocabulary.org/#Product|
 
http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#description
 
http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#identifier
 http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#image
+http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#name
 
http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#offerdetails
+http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#photo
 http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#url
 http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#average
 http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#best
 http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#max
 http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#min
+http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#photo
 http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#value
 http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#worst
 http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#author
 http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#cookTime
 http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#duration
+http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#photo
 http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#prepTime
 
http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#published
 
http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#recipeType
@@ -65,6 +83,8 @@ http://rdf.data-vocabulary.org/#Recipe|h
 
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#description
 
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#dtreviewed
 
http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#itemreviewed
+http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#photo
 http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#rating
 http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#reviewer
 http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#summary
@@ -76,12 +96,18 @@ http://rdf.data-vocabulary.org/#Review-a
 
http://rdf.data-vocabulary.org/#Review-aggregate|http://rdf.data-vocabulary.org/#reviewer
 
http://rdf.data-vocabulary.org/#Review-aggregate|http://rdf.data-vocabulary.org/#summary
 
http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#amount
+http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#photo
 
http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#instruction
+http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#photo
 
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#calories
 
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#carbohydrates
 
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#cholesterol
 http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#fat
 http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#fiber
+http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#photo
 
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#protein
 
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#saturatedFat
 
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#servingSize
@@ -89,3 +115,5 @@ http://rdf.data-vocabulary.org/#nutritio
 
http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#unsaturatedFat
 http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#max
 http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#min
+http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#name
+http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#photo
diff --git a/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv 
b/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
--- a/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
+++ b/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv
@@ -34,8 +34,10 @@ http://ogp.me/ns#Product|product|http://
 http://ogp.me/ns#Song|song|http://ogp.me/ns#Products_and_Entertainment
 http://ogp.me/ns#Movie|movie|http://ogp.me/ns#Products_and_Entertainment
 http://ogp.me/ns#Tv_show|tv_show|http://ogp.me/ns#Products_and_Entertainment
+http://ogp.me/ns#FB_OGPObject|Facebook OGP Object|http://ogp.me/ns#Websites
 http://ogp.me/ns#Blog|blog|http://ogp.me/ns#Websites
 http://ogp.me/ns#Website|website|http://ogp.me/ns#Websites
 http://ogp.me/ns#Article|article|http://ogp.me/ns#Website
 http://ogp.me/ns#Blog|blog|http://ogp.me/ns#Website
 http://ogp.me/ns#Company|company|http://ogp.me/ns#Website
+http://ogp.mc/ns#Website|Website|http://ogp.mc/ns#Websites
diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c
--- a/monetdb5/extras/rdf/rdflabels.c
+++ b/monetdb5/extras/rdf/rdflabels.c
@@ -25,7 +25,7 @@
 #include <math.h>
 
 // list of known ontologies
-int ontologyCount = 73;
+int ontologyCount = 74;
 ontology ontologies[] = {
 {{"<http:", "www.facebook.com", "2008"}, 3},
 {{"<http:", "facebook.com", "2008"}, 3},
@@ -39,6 +39,7 @@ ontology ontologies[] = {
 {{"<http:", "www.purl.org", "stuff"}, 3},
 {{"<http:", "ogp.me", "ns"}, 3},
 {{"<https:", "ogp.me", "ns"}, 3},
+{{"<http:", "ogp.mc", "ns"}, 3},
 {{"<http:", "www.w3.org", "1999", "02", "22-rdf-syntax-ns"}, 5}, // rdf
 {{"<http:", "www.w3.org", "2000", "01", "rdf-schema"}, 5}, // rdfs
 {{"<http:", "www.w3.org", "2004", "02", "skos", "core"}, 6}, // skos (Simple 
Knowledge Organization System)
@@ -1150,7 +1151,7 @@ oid* getOntologyCandidates(oid** ontattr
        int             i, j, k, l;
        oid             *result = NULL;
        
-       if (freqId == 9) printf("listNum = %d\n",listNum);
+       //if (freqId == 161) printf("listNum = %d\n",listNum);
        for (i = 0; i < listNum; ++i) {
                int             filledListsCount = 0;
                oid             **candidates = NULL;
@@ -1244,7 +1245,7 @@ oid* getOntologyCandidates(oid** ontattr
                // remove subclass if superclass is in list
                for (k = 0; k < num; ++k) {
                        int found = 0;
-                       if (freqId == 9) printf("   TFIDF score at %d 
("BUNFMT") is: %f | Number of matched Prop %d \n",k, classStat[k].ontoClass, 
classStat[k].tfidfs,classStat[k].numMatchedProp);
+                       //if (freqId == 161) printf("   TFIDF score at %d 
("BUNFMT") is: %f | Number of matched Prop %d \n",k, classStat[k].ontoClass, 
classStat[k].tfidfs,classStat[k].numMatchedProp);
                        if (classStat[k].tfidfs < ONTOLOGY_FREQ_THRESHOLD) 
break; // values not frequent enough (list is sorted by tfidfs)
                        for (j = 0; j < ontmetadataCount && (found == 0); ++j) {
                                oid muri = ontmetadata[0][j];
@@ -1495,28 +1496,31 @@ void createOntologyLookupResult(oid** re
 
                propOntologies = findOntologies(cs, propOntologiesCount, 
&propOntologiesOids);
 
-               if (i == 9){
+               /*      
+               if (i == 161){
                printf("Prop ontologies count. \n");
                for (j = 0; j < ontologyCount; ++j) {
                        if (propOntologiesCount[j] > 0)
                                printf("    %d props in ontology %d \n ", 
propOntologiesCount[j], j);
                }
                
-               }       
+               }
+               */
 
                // get class names
                resultCount[i] = 0;
                
                result[i] = getOntologyCandidates(ontattributes, 
ontattributesCount, ontmetadata, ontmetadataCount, &(resultCount[i]), 
resultMatchedProp, propOntologiesOids, propOntologiesCount, ontologyCount, 
propStat, i);
 
-               if (i == 9){
+               /*
+               if (i == 161){
                        printf("Ontology candidates \n");
                        for (j = 0; j < resultCount[i]; j++){
                                printf(BUNFMT " (Num prop matched %d \n", 
result[i][j], resultMatchedProp[i][j]);
                        }
                        //exit(-1);
                }       
-               
+               */
 
                for (j = 0; j < ontologyCount; ++j) {
                        free(propOntologies[j]);
@@ -2113,6 +2117,11 @@ void removeDuplicatedCandidates(CSlabel 
 
 #if USE_TABLE_NAME
 /* For one CS: Choose the best table name out of all collected candidates 
(ontology, type, fk). */
+/**
+ * The priority is:
+ * Ontology-based type values >  Ontology-based name > Type value > FK name > 
Non frequent type value
+ * 
+ */
 static
 void getTableName(CSlabel* label, int csIdx,  int typeAttributesCount, 
TypeAttributesFreq*** typeAttributesHistogram, int** 
typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** 
result,int** resultMatchedProp, int* resultCount, IncidentFKs* links, oid** 
ontmetadata, int ontmetadataCount, BAT *ontmetaBat, OntClass *ontclassSet) {
        int             i, j;
@@ -2313,10 +2322,15 @@ void getTableName(CSlabel* label, int cs
                }
                label->candidatesCount += resultCount[csIdx];
        }
-
+       
+       // If the name found previously (based on the type values) is not 
+       // an ontology-based value (e.g., simply a string), we will choose the 
ontology name for 
+       // the CS's name. 
+       
        // chose the best ontology candidate based on number of matched props 
as label 
        // TODO: Improve this score a bit, by choosing the higher tfidf score, 
than number of matched prop
-       if (!nameFound && resultCount[csIdx] >= 1){
+       
+       if (choosenOntologyTypeValue == BUN_NONE && resultCount[csIdx] >= 1){
                label->name = result[csIdx][bestOntCandIdx];
                label->hierarchy = getOntoHierarchy(label->name, 
&(label->hierarchyCount), ontmetadata, ontmetadataCount);
                nameFound = 1;
@@ -2326,7 +2340,6 @@ void getTableName(CSlabel* label, int cs
        }
 
 
-
        // --- FK ---
        // add top3 fk values to list of candidates
        if (links[csIdx].num > 0) {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to