00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <plang/database.h>
00021 #include <plang/errors.h>
00022 #include <stdlib.h>
00023 #include <string.h>
00024 #include <wn.h>
00025
00228 #define P_WORD_HASH_SIZE 511
00229 typedef struct p_word p_word;
00230 struct p_word
00231 {
00232 p_word *next;
00233 unsigned int word_flags;
00234 char word[1];
00235 };
00236 static p_word *p_word_hash[P_WORD_HASH_SIZE];
00237 static int p_initialized = 0;
00240
00241
00242 static int p_word_match(const char *word1, const char *word2)
00243 {
00244 int ch1, ch2;
00245 while (*word1 != '\0' && *word2 != '\0') {
00246 ch1 = ((int)(*word1++)) & 0xFF;
00247 ch2 = ((int)(*word2++)) & 0xFF;
00248 if (ch2 >= 'A' && ch2 <= 'Z')
00249 ch2 += 'a' - 'A';
00250 else if (ch2 == ' ')
00251 ch2 = '_';
00252 if (ch1 != ch2)
00253 return 0;
00254 }
00255 return *word1 == '\0' && *word2 == '\0';
00256 }
00257
00258
00259 static void p_word_strcpy(char *dest, const char *src)
00260 {
00261 int ch;
00262 while (*src != '\0') {
00263 ch = ((int)(*src++)) & 0xFF;
00264 if (ch >= 'A' && ch <= 'Z')
00265 ch += 'a' - 'A';
00266 else if (ch == ' ')
00267 ch = '_';
00268 *dest++ = (char)ch;
00269 }
00270 *dest = '\0';
00271 }
00272
00273
00274
00275 static p_goal_result word_check
00276 (p_context *context, p_term *word, int kind)
00277 {
00278 int type;
00279 const char *name;
00280 const char *temp;
00281 unsigned int hash;
00282 unsigned int ch;
00283 size_t len;
00284 p_word *current;
00285 char *baseform;
00286 char *wordstr;
00287 int pos;
00288
00289
00290 word = p_term_deref_member(context, word);
00291 type = p_term_type(word);
00292 if (type != P_TERM_ATOM && type != P_TERM_STRING)
00293 return P_RESULT_FAIL;
00294
00295
00296 if (p_term_name_length(word) >= (size_t)(WORDBUF - 1))
00297 return P_RESULT_FAIL;
00298
00299
00300 name = p_term_name(word);
00301 temp = name;
00302 hash = 0;
00303 len = 0;
00304 while (*temp != '\0') {
00305 ch = ((unsigned int)(*temp++)) & 0xFF;
00306 if (ch >= 'A' && ch <= 'Z')
00307 ch += 'a' - 'A';
00308 else if (ch == ' ')
00309 ch = '_';
00310 hash = hash * 5 + ch;
00311 ++len;
00312 }
00313 hash %= P_WORD_HASH_SIZE;
00314
00315
00316 current = p_word_hash[hash];
00317 while (current) {
00318 if (p_word_match(current->word, name)) {
00319 if (current->word_flags & bit(kind))
00320 return P_RESULT_TRUE;
00321 else
00322 return P_RESULT_FAIL;
00323 }
00324 current = current->next;
00325 }
00326
00327
00328 current = (p_word *)malloc(sizeof(p_word) + len);
00329 if (!current)
00330 return P_RESULT_FAIL;
00331 current->next = p_word_hash[hash];
00332 p_word_strcpy(current->word, name);
00333 p_word_hash[hash] = current;
00334
00335
00336
00337 current->word_flags = in_wn(current->word, ALL_POS);
00338 for (pos = NOUN; pos <= ADV; ++pos) {
00339 if (current->word_flags & bit(pos))
00340 continue;
00341 wordstr = current->word;
00342 while ((baseform = morphstr(wordstr, pos)) != 0) {
00343 current->word_flags |= in_wn(baseform, pos);
00344 wordstr = 0;
00345 }
00346 }
00347 if (current->word_flags & bit(kind))
00348 return P_RESULT_TRUE;
00349 else
00350 return P_RESULT_FAIL;
00351 }
00352
00353 static int is_instantiated(const p_term *term)
00354 {
00355 if (!term)
00356 return 0;
00357 else if ((p_term_type(term) & P_TERM_VARIABLE) != 0)
00358 return 0;
00359 else
00360 return 1;
00361 }
00362
00364 struct p_wn_code
00365 {
00366 const char *name;
00367 int value;
00368 };
00370 static struct p_wn_code const parts_of_speech[] = {
00371 {"noun", NOUN},
00372 {"verb", VERB},
00373 {"adjective", ADJ},
00374 {"adverb", ADV},
00375 {0, -1}
00376 };
00377 #define FETCH_SYNSET 1024
00378 static struct p_wn_code const queries[] = {
00379 {"antptr", ANTPTR},
00380 {"hyperptr", HYPERPTR},
00381 {"hypoptr", HYPOPTR},
00382 {"entailptr", ENTAILPTR},
00383 {"simptr", SIMPTR},
00384 {"ismemberptr", ISMEMBERPTR},
00385 {"isstuffptr", ISSTUFFPTR},
00386 {"ispartptr", ISPARTPTR},
00387 {"hasmemberptr", HASMEMBERPTR},
00388 {"hasstuffptr", HASSTUFFPTR},
00389 {"haspartptr", HASPARTPTR},
00390 {"meronym", MERONYM},
00391 {"holonym", HOLONYM},
00392 {"causeto", CAUSETO},
00393 {"pplptr", PPLPTR},
00394 {"seealsoptr", SEEALSOPTR},
00395 {"pertptr", PERTPTR},
00396 {"attribute", ATTRIBUTE},
00397 {"verbgroup", VERBGROUP},
00398 {"derivation", DERIVATION},
00399 {"classification", CLASSIFICATION},
00400 {"class", CLASS},
00401 {"syns", SYNS},
00402 {"freq", FREQ},
00403 {"frames", FRAMES},
00404 {"coords", COORDS},
00405 {"relatives", RELATIVES},
00406 {"hmeronym", HMERONYM},
00407 {"hholonym", HHOLONYM},
00408 {"wngrep", WNGREP},
00409 {"overview", OVERVIEW},
00410 {"classif_category", CLASSIF_CATEGORY},
00411 {"classif_usage", CLASSIF_USAGE},
00412 {"classif_regional", CLASSIF_REGIONAL},
00413 {"class_category", CLASS_CATEGORY},
00414 {"class_usage", CLASS_USAGE},
00415 {"class_regional", CLASS_REGIONAL},
00416 {"instance", INSTANCE},
00417 {"instances", INSTANCES},
00418 {"synset", FETCH_SYNSET},
00419 {0, -1}
00420 };
00421 static int lookup_code(const struct p_wn_code *codes, const p_term *code)
00422 {
00423 const char *name;
00424 if (p_term_type(code) != P_TERM_ATOM)
00425 return -1;
00426 name = p_term_name(code);
00427 while (codes->name && strcmp(codes->name, name) != 0)
00428 ++codes;
00429 return codes->value;
00430 }
00431
00432
00433 static void collect_db_search
00434 (p_context *context, p_term **head, p_term **tail,
00435 SynsetPtr synset, int recurse)
00436 {
00437 int index;
00438 p_term *list;
00439 while (synset) {
00440 for (index = 0; index < synset->wcount; ++index) {
00441 list = *head;
00442 while (list) {
00443 if (!strcmp(p_term_name(p_term_head(list)),
00444 synset->words[index]))
00445 break;
00446 list = p_term_tail(list);
00447 }
00448 if (!list) {
00449 p_term *str = p_term_create_string
00450 (context, synset->words[index]);
00451 if (*tail) {
00452 list = p_term_create_list(context, str, 0);
00453 p_term_set_tail(*tail, list);
00454 *tail = list;
00455 } else {
00456 *tail = p_term_create_list(context, str, 0);
00457 *head = *tail;
00458 }
00459 }
00460 }
00461 if (recurse) {
00462 collect_db_search
00463 (context, head, tail, synset->ptrlist, recurse);
00464 }
00465 synset = synset->nextss;
00466 }
00467 }
00468
00469
00470 #define DB_SEARCH 0
00471 #define DB_DESCRIPTION 1
00472 static p_goal_result words_db_search
00473 (p_context *context, p_term **args, p_term **error, int search_type)
00474 {
00475 p_term *word = p_term_deref_member(context, args[0]);
00476 p_term *part_of_speech = p_term_deref_member(context, args[1]);
00477 p_term *query = p_term_deref_member(context, args[2]);
00478 p_term *sense = p_term_deref_member(context, args[3]);
00479 p_term *result = p_term_deref_member(context, args[4]);
00480 int type, wn_part_of_speech, wn_query, wn_sense, ch;
00481 char *norm_word, *temp;
00482 p_term *str;
00483 p_goal_result goal_result;
00484
00485
00486 if (!is_instantiated(word) || !is_instantiated(part_of_speech) ||
00487 !is_instantiated(query) || !is_instantiated(sense)) {
00488 *error = p_create_instantiation_error(context);
00489 return P_RESULT_ERROR;
00490 }
00491 if (is_instantiated(result)) {
00492 *error = p_create_type_error(context, "variable", result);
00493 return P_RESULT_ERROR;
00494 }
00495 type = p_term_type(word);
00496 if (type != P_TERM_ATOM && type != P_TERM_STRING) {
00497 *error = p_create_type_error(context, "atom_or_string", word);
00498 return P_RESULT_ERROR;
00499 }
00500 wn_part_of_speech = lookup_code(parts_of_speech, part_of_speech);
00501 if (wn_part_of_speech < 0) {
00502 *error = p_create_type_error(context, "part_of_speech", part_of_speech);
00503 return P_RESULT_ERROR;
00504 }
00505 wn_query = lookup_code(queries, query);
00506 if (wn_query < 0 || (wn_query == FETCH_SYNSET &&
00507 search_type != DB_SEARCH)) {
00508 *error = p_create_type_error(context, "word_query", query);
00509 return P_RESULT_ERROR;
00510 }
00511 type = p_term_type(sense);
00512 if (type == P_TERM_INTEGER) {
00513 wn_sense = p_term_integer_value(sense);
00514 if (wn_sense < 1)
00515 wn_sense = -1;
00516 } else if (type == P_TERM_ATOM &&
00517 !strcmp(p_term_name(sense), "allsenses")) {
00518 wn_sense = ALLSENSES;
00519 } else {
00520 wn_sense = -1;
00521 }
00522 if (wn_sense < 0) {
00523 *error = p_create_type_error(context, "word_sense", sense);
00524 return P_RESULT_ERROR;
00525 }
00526
00527
00528 if (p_term_name_length(word) >= (size_t)(WORDBUF - 1))
00529 return P_RESULT_FAIL;
00530
00531
00532 norm_word = (char *)malloc(p_term_name_length(word) + 1);
00533 if (!norm_word)
00534 return P_RESULT_FAIL;
00535 strcpy(norm_word, p_term_name(word));
00536 temp = norm_word;
00537 while ((ch = *temp) != '\0') {
00538 if (ch >= 'A' && ch <= 'Z')
00539 *temp = (char)(ch + 'a' - 'A');
00540 else if (ch == ' ')
00541 *temp = '_';
00542 ++temp;
00543 }
00544
00545
00546 goal_result = P_RESULT_FAIL;
00547 if (search_type == DB_DESCRIPTION) {
00548
00549 temp = findtheinfo
00550 (norm_word, wn_part_of_speech, wn_query, wn_sense);
00551 if (temp && *temp != '\0') {
00552 str = p_term_create_string(context, temp);
00553 if (p_term_unify(context, result, str, P_BIND_DEFAULT))
00554 goal_result = P_RESULT_TRUE;
00555 }
00556 } else if (wn_query != FETCH_SYNSET) {
00557
00558 p_term *head = 0;
00559 p_term *tail = 0;
00560 SynsetPtr synset = findtheinfo_ds
00561 (norm_word, wn_part_of_speech, wn_query, wn_sense);
00562 while (synset) {
00563 collect_db_search
00564 (context, &head, &tail, synset->ptrlist, 1);
00565 synset = synset->nextss;
00566 }
00567 if (head) {
00568 p_term_set_tail(tail, p_term_nil_atom(context));
00569 if (p_term_unify(context, result, head, P_BIND_DEFAULT))
00570 goal_result = P_RESULT_TRUE;
00571 }
00572 } else {
00573
00574
00575 p_term *head = 0;
00576 p_term *tail = 0;
00577 if (wn_part_of_speech == ADJ)
00578 wn_query = SIMPTR;
00579 else if (wn_part_of_speech == ADV)
00580 wn_query = SYNS;
00581 else
00582 wn_query = HYPERPTR;
00583 SynsetPtr synset = findtheinfo_ds
00584 (norm_word, wn_part_of_speech, wn_query, wn_sense);
00585 while (synset) {
00586 collect_db_search(context, &head, &tail, synset, 0);
00587 synset = synset->nextss;
00588 }
00589 if (head) {
00590 p_term_set_tail(tail, p_term_nil_atom(context));
00591 if (p_term_unify(context, result, head, P_BIND_DEFAULT))
00592 goal_result = P_RESULT_TRUE;
00593 }
00594 }
00595 free(norm_word);
00596 return goal_result;
00597 }
00598
00641 static p_goal_result words_adjective
00642 (p_context *context, p_term **args, p_term **error)
00643 {
00644 return word_check(context, args[0], ADJ);
00645 }
00646
00690 static p_goal_result words_adverb
00691 (p_context *context, p_term **args, p_term **error)
00692 {
00693 return word_check(context, args[0], ADV);
00694 }
00695
00791 static p_goal_result words_base_forms
00792 (p_context *context, p_term **args, p_term **error)
00793 {
00794 p_term *word = p_term_deref_member(context, args[0]);
00795 p_term *part_of_speech = p_term_deref_member(context, args[1]);
00796 int type, wn_part_of_speech;
00797 char *baseform;
00798 p_term *head;
00799 p_term *tail;
00800 p_term *new_tail;
00801
00802
00803 if (!is_instantiated(word) || !is_instantiated(part_of_speech)) {
00804 *error = p_create_instantiation_error(context);
00805 return P_RESULT_ERROR;
00806 }
00807 type = p_term_type(word);
00808 if (type != P_TERM_ATOM && type != P_TERM_STRING) {
00809 *error = p_create_type_error(context, "atom_or_string", word);
00810 return P_RESULT_ERROR;
00811 }
00812 wn_part_of_speech = lookup_code(parts_of_speech, part_of_speech);
00813 if (wn_part_of_speech < 0) {
00814 *error = p_create_type_error(context, "part_of_speech", part_of_speech);
00815 return P_RESULT_ERROR;
00816 }
00817
00818
00819 if (p_term_name_length(word) >= (size_t)(WORDBUF - 1))
00820 return P_RESULT_FAIL;
00821
00822
00823 baseform = morphstr((char *)p_term_name(word), wn_part_of_speech);
00824 if (!baseform)
00825 return P_RESULT_FAIL;
00826 head = tail = p_term_create_list
00827 (context, p_term_create_string(context, baseform), 0);
00828 while ((baseform = morphstr(0, wn_part_of_speech)) != 0) {
00829 new_tail = p_term_create_list
00830 (context, p_term_create_string(context, baseform), 0);
00831 p_term_set_tail(tail, new_tail);
00832 tail = new_tail;
00833 }
00834 p_term_set_tail(tail, p_term_nil_atom(context));
00835
00836
00837 if (p_term_unify(context, args[2], head, P_BIND_DEFAULT))
00838 return P_RESULT_TRUE;
00839 else
00840 return P_RESULT_FAIL;
00841 }
00842
00912 static p_goal_result words_description
00913 (p_context *context, p_term **args, p_term **error)
00914 {
00915 return words_db_search(context, args, error, DB_DESCRIPTION);
00916 }
00917
00960 static p_goal_result words_noun
00961 (p_context *context, p_term **args, p_term **error)
00962 {
00963 return word_check(context, args[0], NOUN);
00964 }
00965
01092 static p_goal_result words_search
01093 (p_context *context, p_term **args, p_term **error)
01094 {
01095 return words_db_search(context, args, error, DB_SEARCH);
01096 }
01097
01141 static p_goal_result words_verb
01142 (p_context *context, p_term **args, p_term **error)
01143 {
01144 return word_check(context, args[0], VERB);
01145 }
01146
01147 void plang_module_setup(p_context *context)
01148 {
01149 if (p_initialized) {
01150 re_wninit();
01151 } else {
01152 wninit();
01153 p_initialized = 1;
01154 }
01155 p_db_set_builtin_predicate
01156 (p_term_create_atom(context, "words::adjective"), 1, words_adjective);
01157 p_db_set_builtin_predicate
01158 (p_term_create_atom(context, "words::adverb"), 1, words_adverb);
01159 p_db_set_builtin_predicate
01160 (p_term_create_atom(context, "words::base_forms"), 3, words_base_forms);
01161 p_db_set_builtin_predicate
01162 (p_term_create_atom(context, "words::description"), 5, words_description);
01163 p_db_set_builtin_predicate
01164 (p_term_create_atom(context, "words::noun"), 1, words_noun);
01165 p_db_set_builtin_predicate
01166 (p_term_create_atom(context, "words::search"), 5, words_search);
01167 p_db_set_builtin_predicate
01168 (p_term_create_atom(context, "words::verb"), 1, words_verb);
01169 }
01170
01171 void plang_module_shutdown(p_context *context)
01172 {
01173 int index;
01174 p_word *current;
01175 p_word *next;
01176 for (index = 0; index < P_WORD_HASH_SIZE; ++index) {
01177 current = p_word_hash[index];
01178 while (current) {
01179 next = current->next;
01180 free(current);
01181 current = next;
01182 }
01183 p_word_hash[index] = 0;
01184 }
01185 }