00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 #include <plang/database.h>
00021 #include <plang/errors.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <wn.h>
00025 
00228 #define P_WORD_HASH_SIZE 511
00229 typedef struct p_word p_word;
00230 struct p_word
00231 {
00232     p_word *next;
00233     unsigned int word_flags;
00234     char word[1];
00235 };
00236 static p_word *p_word_hash[P_WORD_HASH_SIZE];
00237 static int p_initialized = 0;
00240 
00241 
00242 static int p_word_match(const char *word1, const char *word2)
00243 {
00244     int ch1, ch2;
00245     while (*word1 != '\0' && *word2 != '\0') {
00246         ch1 = ((int)(*word1++)) & 0xFF;
00247         ch2 = ((int)(*word2++)) & 0xFF;
00248         if (ch2 >= 'A' && ch2 <= 'Z')
00249             ch2 += 'a' - 'A';
00250         else if (ch2 == ' ')
00251             ch2 = '_';
00252         if (ch1 != ch2)
00253             return 0;
00254     }
00255     return *word1 == '\0' && *word2 == '\0';
00256 }
00257 
00258 
00259 static void p_word_strcpy(char *dest, const char *src)
00260 {
00261     int ch;
00262     while (*src != '\0') {
00263         ch = ((int)(*src++)) & 0xFF;
00264         if (ch >= 'A' && ch <= 'Z')
00265             ch += 'a' - 'A';
00266         else if (ch == ' ')
00267             ch = '_';
00268         *dest++ = (char)ch;
00269     }
00270     *dest = '\0';
00271 }
00272 
00273 
00274 
00275 static p_goal_result word_check
00276     (p_context *context, p_term *word, int kind)
00277 {
00278     int type;
00279     const char *name;
00280     const char *temp;
00281     unsigned int hash;
00282     unsigned int ch;
00283     size_t len;
00284     p_word *current;
00285     char *baseform;
00286     char *wordstr;
00287     int pos;
00288 
00289     
00290     word = p_term_deref_member(context, word);
00291     type = p_term_type(word);
00292     if (type != P_TERM_ATOM && type != P_TERM_STRING)
00293         return P_RESULT_FAIL;
00294 
00295     
00296     if (p_term_name_length(word) >= (size_t)(WORDBUF - 1))
00297         return P_RESULT_FAIL;
00298 
00299     
00300     name = p_term_name(word);
00301     temp = name;
00302     hash = 0;
00303     len = 0;
00304     while (*temp != '\0') {
00305         ch = ((unsigned int)(*temp++)) & 0xFF;
00306         if (ch >= 'A' && ch <= 'Z')
00307             ch += 'a' - 'A';
00308         else if (ch == ' ')
00309             ch = '_';
00310         hash = hash * 5 + ch;
00311         ++len;
00312     }
00313     hash %= P_WORD_HASH_SIZE;
00314 
00315     
00316     current = p_word_hash[hash];
00317     while (current) {
00318         if (p_word_match(current->word, name)) {
00319             if (current->word_flags & bit(kind))
00320                 return P_RESULT_TRUE;
00321             else
00322                 return P_RESULT_FAIL;
00323         }
00324         current = current->next;
00325     }
00326 
00327     
00328     current = (p_word *)malloc(sizeof(p_word) + len);
00329     if (!current)
00330         return P_RESULT_FAIL;
00331     current->next = p_word_hash[hash];
00332     p_word_strcpy(current->word, name);
00333     p_word_hash[hash] = current;
00334 
00335     
00336 
00337     current->word_flags = in_wn(current->word, ALL_POS);
00338     for (pos = NOUN; pos <= ADV; ++pos) {
00339         if (current->word_flags & bit(pos))
00340             continue;
00341         wordstr = current->word;
00342         while ((baseform = morphstr(wordstr, pos)) != 0) {
00343             current->word_flags |= in_wn(baseform, pos);
00344             wordstr = 0;
00345         }
00346     }
00347     if (current->word_flags & bit(kind))
00348         return P_RESULT_TRUE;
00349     else
00350         return P_RESULT_FAIL;
00351 }
00352 
00353 static int is_instantiated(const p_term *term)
00354 {
00355     if (!term)
00356         return 0;
00357     else if ((p_term_type(term) & P_TERM_VARIABLE) != 0)
00358         return 0;
00359     else
00360         return 1;
00361 }
00362 
00364 struct p_wn_code
00365 {
00366     const char *name;
00367     int value;
00368 };
00370 static struct p_wn_code const parts_of_speech[] = {
00371     {"noun", NOUN},
00372     {"verb", VERB},
00373     {"adjective", ADJ},
00374     {"adverb", ADV},
00375     {0, -1}
00376 };
00377 #define FETCH_SYNSET    1024    
00378 static struct p_wn_code const queries[] = {
00379     {"antptr", ANTPTR},
00380     {"hyperptr", HYPERPTR},
00381     {"hypoptr", HYPOPTR},
00382     {"entailptr", ENTAILPTR},
00383     {"simptr", SIMPTR},
00384     {"ismemberptr", ISMEMBERPTR},
00385     {"isstuffptr", ISSTUFFPTR},
00386     {"ispartptr", ISPARTPTR},
00387     {"hasmemberptr", HASMEMBERPTR},
00388     {"hasstuffptr", HASSTUFFPTR},
00389     {"haspartptr", HASPARTPTR},
00390     {"meronym", MERONYM},
00391     {"holonym", HOLONYM},
00392     {"causeto", CAUSETO},
00393     {"pplptr", PPLPTR},
00394     {"seealsoptr", SEEALSOPTR},
00395     {"pertptr", PERTPTR},
00396     {"attribute", ATTRIBUTE},
00397     {"verbgroup", VERBGROUP},
00398     {"derivation", DERIVATION},
00399     {"classification", CLASSIFICATION},
00400     {"class", CLASS},
00401     {"syns", SYNS},
00402     {"freq", FREQ},
00403     {"frames", FRAMES},
00404     {"coords", COORDS},
00405     {"relatives", RELATIVES},
00406     {"hmeronym", HMERONYM},
00407     {"hholonym", HHOLONYM},
00408     {"wngrep", WNGREP},
00409     {"overview", OVERVIEW},
00410     {"classif_category", CLASSIF_CATEGORY},
00411     {"classif_usage", CLASSIF_USAGE},
00412     {"classif_regional", CLASSIF_REGIONAL},
00413     {"class_category", CLASS_CATEGORY},
00414     {"class_usage", CLASS_USAGE},
00415     {"class_regional", CLASS_REGIONAL},
00416     {"instance", INSTANCE},
00417     {"instances", INSTANCES},
00418     {"synset", FETCH_SYNSET},
00419     {0, -1}
00420 };
00421 static int lookup_code(const struct p_wn_code *codes, const p_term *code)
00422 {
00423     const char *name;
00424     if (p_term_type(code) != P_TERM_ATOM)
00425         return -1;
00426     name = p_term_name(code);
00427     while (codes->name && strcmp(codes->name, name) != 0)
00428         ++codes;
00429     return codes->value;
00430 }
00431 
00432 
00433 static void collect_db_search
00434     (p_context *context, p_term **head, p_term **tail,
00435      SynsetPtr synset, int recurse)
00436 {
00437     int index;
00438     p_term *list;
00439     while (synset) {
00440         for (index = 0; index < synset->wcount; ++index) {
00441             list = *head;
00442             while (list) {
00443                 if (!strcmp(p_term_name(p_term_head(list)),
00444                             synset->words[index]))
00445                     break;
00446                 list = p_term_tail(list);
00447             }
00448             if (!list) {
00449                 p_term *str = p_term_create_string
00450                     (context, synset->words[index]);
00451                 if (*tail) {
00452                     list = p_term_create_list(context, str, 0);
00453                     p_term_set_tail(*tail, list);
00454                     *tail = list;
00455                 } else {
00456                     *tail = p_term_create_list(context, str, 0);
00457                     *head = *tail;
00458                 }
00459             }
00460         }
00461         if (recurse) {
00462             collect_db_search
00463                 (context, head, tail, synset->ptrlist, recurse);
00464         }
00465         synset = synset->nextss;
00466     }
00467 }
00468 
00469 
00470 #define DB_SEARCH           0
00471 #define DB_DESCRIPTION      1
00472 static p_goal_result words_db_search
00473     (p_context *context, p_term **args, p_term **error, int search_type)
00474 {
00475     p_term *word = p_term_deref_member(context, args[0]);
00476     p_term *part_of_speech = p_term_deref_member(context, args[1]);
00477     p_term *query = p_term_deref_member(context, args[2]);
00478     p_term *sense = p_term_deref_member(context, args[3]);
00479     p_term *result = p_term_deref_member(context, args[4]);
00480     int type, wn_part_of_speech, wn_query, wn_sense, ch;
00481     char *norm_word, *temp;
00482     p_term *str;
00483     p_goal_result goal_result;
00484 
00485     
00486     if (!is_instantiated(word) || !is_instantiated(part_of_speech) ||
00487             !is_instantiated(query) || !is_instantiated(sense)) {
00488         *error = p_create_instantiation_error(context);
00489         return P_RESULT_ERROR;
00490     }
00491     if (is_instantiated(result)) {
00492         *error = p_create_type_error(context, "variable", result);
00493         return P_RESULT_ERROR;
00494     }
00495     type = p_term_type(word);
00496     if (type != P_TERM_ATOM && type != P_TERM_STRING) {
00497         *error = p_create_type_error(context, "atom_or_string", word);
00498         return P_RESULT_ERROR;
00499     }
00500     wn_part_of_speech = lookup_code(parts_of_speech, part_of_speech);
00501     if (wn_part_of_speech < 0) {
00502         *error = p_create_type_error(context, "part_of_speech", part_of_speech);
00503         return P_RESULT_ERROR;
00504     }
00505     wn_query = lookup_code(queries, query);
00506     if (wn_query < 0 || (wn_query == FETCH_SYNSET &&
00507                          search_type != DB_SEARCH)) {
00508         *error = p_create_type_error(context, "word_query", query);
00509         return P_RESULT_ERROR;
00510     }
00511     type = p_term_type(sense);
00512     if (type == P_TERM_INTEGER) {
00513         wn_sense = p_term_integer_value(sense);
00514         if (wn_sense < 1)
00515             wn_sense = -1;
00516     } else if (type == P_TERM_ATOM &&
00517                !strcmp(p_term_name(sense), "allsenses")) {
00518         wn_sense = ALLSENSES;
00519     } else {
00520         wn_sense = -1;
00521     }
00522     if (wn_sense < 0) {
00523         *error = p_create_type_error(context, "word_sense", sense);
00524         return P_RESULT_ERROR;
00525     }
00526 
00527     
00528     if (p_term_name_length(word) >= (size_t)(WORDBUF - 1))
00529         return P_RESULT_FAIL;
00530 
00531     
00532     norm_word = (char *)malloc(p_term_name_length(word) + 1);
00533     if (!norm_word)
00534         return P_RESULT_FAIL;
00535     strcpy(norm_word, p_term_name(word));
00536     temp = norm_word;
00537     while ((ch = *temp) != '\0') {
00538         if (ch >= 'A' && ch <= 'Z')
00539             *temp = (char)(ch + 'a' - 'A');
00540         else if (ch == ' ')
00541             *temp = '_';
00542         ++temp;
00543     }
00544 
00545     
00546     goal_result = P_RESULT_FAIL;
00547     if (search_type == DB_DESCRIPTION) {
00548         
00549         temp = findtheinfo
00550             (norm_word, wn_part_of_speech, wn_query, wn_sense);
00551         if (temp && *temp != '\0') {
00552             str = p_term_create_string(context, temp);
00553             if (p_term_unify(context, result, str, P_BIND_DEFAULT))
00554                 goal_result = P_RESULT_TRUE;
00555         }
00556     } else if (wn_query != FETCH_SYNSET) {
00557         
00558         p_term *head = 0;
00559         p_term *tail = 0;
00560         SynsetPtr synset = findtheinfo_ds
00561             (norm_word, wn_part_of_speech, wn_query, wn_sense);
00562         while (synset) {
00563             collect_db_search
00564                 (context, &head, &tail, synset->ptrlist, 1);
00565             synset = synset->nextss;
00566         }
00567         if (head) {
00568             p_term_set_tail(tail, p_term_nil_atom(context));
00569             if (p_term_unify(context, result, head, P_BIND_DEFAULT))
00570                 goal_result = P_RESULT_TRUE;
00571         }
00572     } else {
00573         
00574 
00575         p_term *head = 0;
00576         p_term *tail = 0;
00577         if (wn_part_of_speech == ADJ)
00578             wn_query = SIMPTR;
00579         else if (wn_part_of_speech == ADV)
00580             wn_query = SYNS;
00581         else
00582             wn_query = HYPERPTR;
00583         SynsetPtr synset = findtheinfo_ds
00584             (norm_word, wn_part_of_speech, wn_query, wn_sense);
00585         while (synset) {
00586             collect_db_search(context, &head, &tail, synset, 0);
00587             synset = synset->nextss;
00588         }
00589         if (head) {
00590             p_term_set_tail(tail, p_term_nil_atom(context));
00591             if (p_term_unify(context, result, head, P_BIND_DEFAULT))
00592                 goal_result = P_RESULT_TRUE;
00593         }
00594     }
00595     free(norm_word);
00596     return goal_result;
00597 }
00598 
00641 static p_goal_result words_adjective
00642     (p_context *context, p_term **args, p_term **error)
00643 {
00644     return word_check(context, args[0], ADJ);
00645 }
00646 
00690 static p_goal_result words_adverb
00691     (p_context *context, p_term **args, p_term **error)
00692 {
00693     return word_check(context, args[0], ADV);
00694 }
00695 
00791 static p_goal_result words_base_forms
00792     (p_context *context, p_term **args, p_term **error)
00793 {
00794     p_term *word = p_term_deref_member(context, args[0]);
00795     p_term *part_of_speech = p_term_deref_member(context, args[1]);
00796     int type, wn_part_of_speech;
00797     char *baseform;
00798     p_term *head;
00799     p_term *tail;
00800     p_term *new_tail;
00801 
00802     
00803     if (!is_instantiated(word) || !is_instantiated(part_of_speech)) {
00804         *error = p_create_instantiation_error(context);
00805         return P_RESULT_ERROR;
00806     }
00807     type = p_term_type(word);
00808     if (type != P_TERM_ATOM && type != P_TERM_STRING) {
00809         *error = p_create_type_error(context, "atom_or_string", word);
00810         return P_RESULT_ERROR;
00811     }
00812     wn_part_of_speech = lookup_code(parts_of_speech, part_of_speech);
00813     if (wn_part_of_speech < 0) {
00814         *error = p_create_type_error(context, "part_of_speech", part_of_speech);
00815         return P_RESULT_ERROR;
00816     }
00817 
00818     
00819     if (p_term_name_length(word) >= (size_t)(WORDBUF - 1))
00820         return P_RESULT_FAIL;
00821 
00822     
00823     baseform = morphstr((char *)p_term_name(word), wn_part_of_speech);
00824     if (!baseform)
00825         return P_RESULT_FAIL;
00826     head = tail = p_term_create_list
00827         (context, p_term_create_string(context, baseform), 0);
00828     while ((baseform = morphstr(0, wn_part_of_speech)) != 0) {
00829         new_tail = p_term_create_list
00830             (context, p_term_create_string(context, baseform), 0);
00831         p_term_set_tail(tail, new_tail);
00832         tail = new_tail;
00833     }
00834     p_term_set_tail(tail, p_term_nil_atom(context));
00835 
00836     
00837     if (p_term_unify(context, args[2], head, P_BIND_DEFAULT))
00838         return P_RESULT_TRUE;
00839     else
00840         return P_RESULT_FAIL;
00841 }
00842 
00912 static p_goal_result words_description
00913     (p_context *context, p_term **args, p_term **error)
00914 {
00915     return words_db_search(context, args, error, DB_DESCRIPTION);
00916 }
00917 
00960 static p_goal_result words_noun
00961     (p_context *context, p_term **args, p_term **error)
00962 {
00963     return word_check(context, args[0], NOUN);
00964 }
00965 
01092 static p_goal_result words_search
01093     (p_context *context, p_term **args, p_term **error)
01094 {
01095     return words_db_search(context, args, error, DB_SEARCH);
01096 }
01097 
01141 static p_goal_result words_verb
01142     (p_context *context, p_term **args, p_term **error)
01143 {
01144     return word_check(context, args[0], VERB);
01145 }
01146 
01147 void plang_module_setup(p_context *context)
01148 {
01149     if (p_initialized) {
01150         re_wninit();
01151     } else {
01152         wninit();
01153         p_initialized = 1;
01154     }
01155     p_db_set_builtin_predicate
01156         (p_term_create_atom(context, "words::adjective"), 1, words_adjective);
01157     p_db_set_builtin_predicate
01158         (p_term_create_atom(context, "words::adverb"), 1, words_adverb);
01159     p_db_set_builtin_predicate
01160         (p_term_create_atom(context, "words::base_forms"), 3, words_base_forms);
01161     p_db_set_builtin_predicate
01162         (p_term_create_atom(context, "words::description"), 5, words_description);
01163     p_db_set_builtin_predicate
01164         (p_term_create_atom(context, "words::noun"), 1, words_noun);
01165     p_db_set_builtin_predicate
01166         (p_term_create_atom(context, "words::search"), 5, words_search);
01167     p_db_set_builtin_predicate
01168         (p_term_create_atom(context, "words::verb"), 1, words_verb);
01169 }
01170 
01171 void plang_module_shutdown(p_context *context)
01172 {
01173     int index;
01174     p_word *current;
01175     p_word *next;
01176     for (index = 0; index < P_WORD_HASH_SIZE; ++index) {
01177         current = p_word_hash[index];
01178         while (current) {
01179             next = current->next;
01180             free(current);
01181             current = next;
01182         }
01183         p_word_hash[index] = 0;
01184     }
01185 }