Changeset: 654b3064edbb for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=654b3064edbb Modified Files: monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv monetdb5/extras/rdf/rdflabels.c Branch: rdf Log Message:
Change the priority in choosing name. Ontology-based type value > ontology > type value > fk Also add several ontology metadata information. diffs (299 lines): diff --git a/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh b/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh --- a/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh +++ b/monetdb5/extras/rdf/ontmetadata/loadOntologyToMonetDB.sh @@ -116,6 +116,32 @@ sed -i "s:AttFile:${PWD}/ontAttribute.og mclient < loadtmp.sql +#opengraphschema +NUMMETADATA=`cat ontMetadata.opengraphschema.csv | wc -l` +NUMATTRIBUTES=`cat ontAttribute.opengraphschema.csv | wc -l` + +cp loadOntologySAMPLE.sql loadtmp.sql +sed -i "s:NUMMETADATA:$NUMMETADATA:g" loadtmp.sql +sed -i "s:NUMATTRIBUTES:$NUMATTRIBUTES:g" loadtmp.sql +sed -i "s:MetaFile:${PWD}/ontMetadata.opengraphschema.csv:g" loadtmp.sql +sed -i "s:AttFile:${PWD}/ontAttribute.opengraphschema.csv:g" loadtmp.sql + + +mclient < loadtmp.sql + +#Dublin core +NUMMETADATA=`cat ontMetadata.dc.csv | wc -l` +NUMATTRIBUTES=`cat ontAttribute.dc.csv | wc -l` + +cp loadOntologySAMPLE.sql loadtmp.sql +sed -i "s:NUMMETADATA:$NUMMETADATA:g" loadtmp.sql +sed -i "s:NUMATTRIBUTES:$NUMATTRIBUTES:g" loadtmp.sql +sed -i "s:MetaFile:${PWD}/ontMetadata.dc.csv:g" loadtmp.sql +sed -i "s:AttFile:${PWD}/ontAttribute.dc.csv:g" loadtmp.sql + + +mclient < loadtmp.sql + #List of possible ontologies NUMONT=`cat ontList.csv | wc -l` diff --git a/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv b/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv --- a/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv +++ b/monetdb5/extras/rdf/ontmetadata/ontAttribute.ogp.csv @@ -48,9 +48,10 @@ http://ogp.me/ns#Company|http://ogp.me/n http://ogp.me/ns#Company|http://ogp.me/ns/fb#admins http://ogp.me/ns#Company|http://ogp.me/ns/fb#app_id http://ogp.me/ns#Company|http://ogp.me/ns/fb#profile_id -http://ogp.me/ns#Game|http://ogp.me/ns/fb#admins -http://ogp.me/ns#Game|http://ogp.me/ns/fb#app_id -http://ogp.me/ns#Game|http://ogp.me/ns/fb#profile_id +http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#admins +http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#app_id +http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#profile_id +http://ogp.me/ns#FB_OGPObject|http://ogp.me/ns/fb#page_id http://ogp.me/ns#Game|http://ogp.me/ns/Game#points http://ogp.me/ns#Game|http://ogp.me/ns/Game#secret http://ogp.me/ns#Product|http://ogp.me/ns/Product#age_group @@ -95,3 +96,9 @@ http://ogp.me/ns#Website|http://ogp.me/n http://ogp.me/ns#Website|http://ogp.me/ns/fb#admins http://ogp.me/ns#Website|http://ogp.me/ns/fb#app_id http://ogp.me/ns#Website|http://ogp.me/ns/fb#profile_id +http://ogp.mc/ns#Website|http://ogp.mc/ns#description +http://ogp.mc/ns#Website|http://ogp.mc/ns#image +http://ogp.mc/ns#Website|http://ogp.mc/ns#site_name +http://ogp.mc/ns#Website|http://ogp.mc/ns#title +http://ogp.mc/ns#Website|http://ogp.mc/ns#type +http://ogp.mc/ns#Website|http://ogp.mc/ns#url diff --git a/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv b/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv --- a/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv +++ b/monetdb5/extras/rdf/ontmetadata/ontAttribute.rdfvocabulary.csv @@ -1,9 +1,13 @@ http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#country-name http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#locality +http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#postal-code http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#region http://rdf.data-vocabulary.org/#Address|http://rdf.data-vocabulary.org/#street-address http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#child +http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#title http://rdf.data-vocabulary.org/#Breadcrumb|http://rdf.data-vocabulary.org/#url http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#availability @@ -11,7 +15,9 @@ http://rdf.data-vocabulary.org/#Offer|ht http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#currency http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#identifier http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#itemoffered +http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#name http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#offerurl +http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#price http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#pricevaliduntil http://rdf.data-vocabulary.org/#Offer|http://rdf.data-vocabulary.org/#quantity @@ -21,10 +27,14 @@ http://rdf.data-vocabulary.org/#OfferAgg http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#highprice http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#itemoffered http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#lowprice +http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#name http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#offercount http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#offerurl +http://rdf.data-vocabulary.org/#OfferAggregate|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#address http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#category +http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#pricerange http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#tel http://rdf.data-vocabulary.org/#Organization|http://rdf.data-vocabulary.org/#url @@ -34,7 +44,9 @@ http://rdf.data-vocabulary.org/#Person|h http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#colleague http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#contact http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#friend +http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#name http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#nickname +http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#role http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#tel http://rdf.data-vocabulary.org/#Person|http://rdf.data-vocabulary.org/#title @@ -44,17 +56,23 @@ http://rdf.data-vocabulary.org/#Product| http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#description http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#identifier http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#image +http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#name http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#offerdetails +http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Product|http://rdf.data-vocabulary.org/#url http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#average http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#best http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#max http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#min +http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#value http://rdf.data-vocabulary.org/#Rating|http://rdf.data-vocabulary.org/#worst http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#author http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#cookTime http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#duration +http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#prepTime http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#published http://rdf.data-vocabulary.org/#Recipe|http://rdf.data-vocabulary.org/#recipeType @@ -65,6 +83,8 @@ http://rdf.data-vocabulary.org/#Recipe|h http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#description http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#dtreviewed http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#itemreviewed +http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#rating http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#reviewer http://rdf.data-vocabulary.org/#Review|http://rdf.data-vocabulary.org/#summary @@ -76,12 +96,18 @@ http://rdf.data-vocabulary.org/#Review-a http://rdf.data-vocabulary.org/#Review-aggregate|http://rdf.data-vocabulary.org/#reviewer http://rdf.data-vocabulary.org/#Review-aggregate|http://rdf.data-vocabulary.org/#summary http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#amount +http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#ingredient|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#instruction +http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#instructions|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#calories http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#carbohydrates http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#cholesterol http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#fat http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#fiber +http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#photo http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#protein http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#saturatedFat http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#servingSize @@ -89,3 +115,5 @@ http://rdf.data-vocabulary.org/#nutritio http://rdf.data-vocabulary.org/#nutrition|http://rdf.data-vocabulary.org/#unsaturatedFat http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#max http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#min +http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#name +http://rdf.data-vocabulary.org/#timeRange|http://rdf.data-vocabulary.org/#photo diff --git a/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv b/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv --- a/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv +++ b/monetdb5/extras/rdf/ontmetadata/ontMetadata.ogp.csv @@ -34,8 +34,10 @@ http://ogp.me/ns#Product|product|http:// http://ogp.me/ns#Song|song|http://ogp.me/ns#Products_and_Entertainment http://ogp.me/ns#Movie|movie|http://ogp.me/ns#Products_and_Entertainment http://ogp.me/ns#Tv_show|tv_show|http://ogp.me/ns#Products_and_Entertainment +http://ogp.me/ns#FB_OGPObject|Facebook OGP Object|http://ogp.me/ns#Websites http://ogp.me/ns#Blog|blog|http://ogp.me/ns#Websites http://ogp.me/ns#Website|website|http://ogp.me/ns#Websites http://ogp.me/ns#Article|article|http://ogp.me/ns#Website http://ogp.me/ns#Blog|blog|http://ogp.me/ns#Website http://ogp.me/ns#Company|company|http://ogp.me/ns#Website +http://ogp.mc/ns#Website|Website|http://ogp.mc/ns#Websites diff --git a/monetdb5/extras/rdf/rdflabels.c b/monetdb5/extras/rdf/rdflabels.c --- a/monetdb5/extras/rdf/rdflabels.c +++ b/monetdb5/extras/rdf/rdflabels.c @@ -25,7 +25,7 @@ #include <math.h> // list of known ontologies -int ontologyCount = 73; +int ontologyCount = 74; ontology ontologies[] = { {{"<http:", "www.facebook.com", "2008"}, 3}, {{"<http:", "facebook.com", "2008"}, 3}, @@ -39,6 +39,7 @@ ontology ontologies[] = { {{"<http:", "www.purl.org", "stuff"}, 3}, {{"<http:", "ogp.me", "ns"}, 3}, {{"<https:", "ogp.me", "ns"}, 3}, +{{"<http:", "ogp.mc", "ns"}, 3}, {{"<http:", "www.w3.org", "1999", "02", "22-rdf-syntax-ns"}, 5}, // rdf {{"<http:", "www.w3.org", "2000", "01", "rdf-schema"}, 5}, // rdfs {{"<http:", "www.w3.org", "2004", "02", "skos", "core"}, 6}, // skos (Simple Knowledge Organization System) @@ -1150,7 +1151,7 @@ oid* getOntologyCandidates(oid** ontattr int i, j, k, l; oid *result = NULL; - if (freqId == 9) printf("listNum = %d\n",listNum); + //if (freqId == 161) printf("listNum = %d\n",listNum); for (i = 0; i < listNum; ++i) { int filledListsCount = 0; oid **candidates = NULL; @@ -1244,7 +1245,7 @@ oid* getOntologyCandidates(oid** ontattr // remove subclass if superclass is in list for (k = 0; k < num; ++k) { int found = 0; - if (freqId == 9) printf(" TFIDF score at %d ("BUNFMT") is: %f | Number of matched Prop %d \n",k, classStat[k].ontoClass, classStat[k].tfidfs,classStat[k].numMatchedProp); + //if (freqId == 161) printf(" TFIDF score at %d ("BUNFMT") is: %f | Number of matched Prop %d \n",k, classStat[k].ontoClass, classStat[k].tfidfs,classStat[k].numMatchedProp); if (classStat[k].tfidfs < ONTOLOGY_FREQ_THRESHOLD) break; // values not frequent enough (list is sorted by tfidfs) for (j = 0; j < ontmetadataCount && (found == 0); ++j) { oid muri = ontmetadata[0][j]; @@ -1495,28 +1496,31 @@ void createOntologyLookupResult(oid** re propOntologies = findOntologies(cs, propOntologiesCount, &propOntologiesOids); - if (i == 9){ + /* + if (i == 161){ printf("Prop ontologies count. \n"); for (j = 0; j < ontologyCount; ++j) { if (propOntologiesCount[j] > 0) printf(" %d props in ontology %d \n ", propOntologiesCount[j], j); } - } + } + */ // get class names resultCount[i] = 0; result[i] = getOntologyCandidates(ontattributes, ontattributesCount, ontmetadata, ontmetadataCount, &(resultCount[i]), resultMatchedProp, propOntologiesOids, propOntologiesCount, ontologyCount, propStat, i); - if (i == 9){ + /* + if (i == 161){ printf("Ontology candidates \n"); for (j = 0; j < resultCount[i]; j++){ printf(BUNFMT " (Num prop matched %d \n", result[i][j], resultMatchedProp[i][j]); } //exit(-1); } - + */ for (j = 0; j < ontologyCount; ++j) { free(propOntologies[j]); @@ -2113,6 +2117,11 @@ void removeDuplicatedCandidates(CSlabel #if USE_TABLE_NAME /* For one CS: Choose the best table name out of all collected candidates (ontology, type, fk). */ +/** + * The priority is: + * Ontology-based type values > Ontology-based name > Type value > FK name > Non frequent type value + * + */ static void getTableName(CSlabel* label, int csIdx, int typeAttributesCount, TypeAttributesFreq*** typeAttributesHistogram, int** typeAttributesHistogramCount, TypeStat* typeStat, int typeStatCount, oid** result,int** resultMatchedProp, int* resultCount, IncidentFKs* links, oid** ontmetadata, int ontmetadataCount, BAT *ontmetaBat, OntClass *ontclassSet) { int i, j; @@ -2313,10 +2322,15 @@ void getTableName(CSlabel* label, int cs } label->candidatesCount += resultCount[csIdx]; } - + + // If the name found previously (based on the type values) is not + // an ontology-based value (e.g., simply a string), we will choose the ontology name for + // the CS's name. + // chose the best ontology candidate based on number of matched props as label // TODO: Improve this score a bit, by choosing the higher tfidf score, than number of matched prop - if (!nameFound && resultCount[csIdx] >= 1){ + + if (choosenOntologyTypeValue == BUN_NONE && resultCount[csIdx] >= 1){ label->name = result[csIdx][bestOntCandIdx]; label->hierarchy = getOntoHierarchy(label->name, &(label->hierarchyCount), ontmetadata, ontmetadataCount); nameFound = 1; @@ -2326,7 +2340,6 @@ void getTableName(CSlabel* label, int cs } - // --- FK --- // add top3 fk values to list of candidates if (links[csIdx].num > 0) { _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list